From 5a0fbe0fb9b5609d4babba80862d64d726b5d75e Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 14:05:29 -0700 Subject: [PATCH 01/25] Update pre-commit configuration --- .pre-commit-config.yaml | 50 ++++++++++++++++++----------------------- setup.cfg | 17 ++++++++++++++ 2 files changed, 39 insertions(+), 28 deletions(-) create mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 621da1cf0..1c3d2e332 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,37 +1,31 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.1.0 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-docstring-first - - id: check-json - - id: check-yaml - - id: pretty-format-json - args: ["--autofix", "--indent=2", "--no-sort-keys"] + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-docstring-first + - id: check-json + - id: check-yaml + - id: pretty-format-json + args: ["--autofix", "--indent=2", "--no-sort-keys"] -- repo: https://github.com/ambv/black - rev: 19.10b0 + - repo: https://github.com/psf/black + rev: 20.8b1 hooks: - - id: black - args: ["--line-length", "100"] + - id: black + args: ["--line-length", "100"] -- repo: https://gitlab.com/pycqa/flake8 + - repo: https://gitlab.com/pycqa/flake8 rev: 3.8.3 hooks: - - id: flake8 + - id: flake8 + args: + [ + "--tee", + ] -- repo: https://github.com/asottile/seed-isort-config - rev: v2.2.0 + - repo: https://github.com/PyCQA/isort + rev: 5.8.0 hooks: - - id: seed-isort-config - -- repo: https://github.com/pre-commit/mirrors-isort - rev: v5.2.0 - hooks: - - id: isort - -- repo: https://github.com/myint/rstcheck - rev: master - hooks: - - id: rstcheck \ No newline at end of file + - id: isort diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..91b55bc5e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,17 @@ +[flake8] +exclude = setup.py,docs +ignore = E999,N802,N806,W503,W504 +max-line-length = 100 +output-file = flake8_log.txt + +[isort] +known_first_party = echopype +known_third_party = _echopype_version,setuptools +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +combine_as_imports = True +line_length = 100 +skip = + echopype/__init__.py + setup.py From fa37344e5018cc974c5e9626b40a2076f2db75db Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 14:16:12 -0700 Subject: [PATCH 02/25] Ignore tests and test_data for flake8 --- setup.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 91b55bc5e..4515a8388 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [flake8] -exclude = setup.py,docs +exclude = setup.py,docs,echopype/test_data,echopype/tests ignore = E999,N802,N806,W503,W504 max-line-length = 100 output-file = flake8_log.txt @@ -14,4 +14,6 @@ combine_as_imports = True line_length = 100 skip = echopype/__init__.py + echopype/tests + echopype/test_data setup.py From 211d5d61621b71da9c05f97cbf20bfab0f6517aa Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 14:21:57 -0700 Subject: [PATCH 03/25] Isort ignore docs --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 4515a8388..92c434b7f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,4 +16,5 @@ skip = echopype/__init__.py echopype/tests echopype/test_data + docs setup.py From b144859d2c2bc86d779583f4539ffd06418d8705 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 14:26:02 -0700 Subject: [PATCH 04/25] Black and isort setup.py --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index be5c25ade..82565084a 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,11 @@ from setuptools import setup, find_packages # Long description will go up on the pypi page -with open('README.md') as file: +with open("README.md") as file: LONG_DESCRIPTION = file.read() # Dependencies. -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = f.readlines() INSTALL_REQUIRES = [t.strip() for t in requirements] @@ -42,5 +42,5 @@ setup_requires=["setuptools>=45", "wheel", "setuptools_scm"],) -if __name__ == '__main__': +if __name__ == "__main__": setup(**opts) From 2901c149a57f8bb0dde620c3447eb1b9d3612e16 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 14:56:31 -0700 Subject: [PATCH 05/25] Move settings to setup.cfg --- .pre-commit-config.yaml | 5 ----- setup.cfg | 17 ++++++++++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c3d2e332..6ef439c81 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,16 +14,11 @@ repos: rev: 20.8b1 hooks: - id: black - args: ["--line-length", "100"] - repo: https://gitlab.com/pycqa/flake8 rev: 3.8.3 hooks: - id: flake8 - args: - [ - "--tee", - ] - repo: https://github.com/PyCQA/isort rev: 5.8.0 diff --git a/setup.cfg b/setup.cfg index 92c434b7f..d2069e6ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,20 @@ +[black] +line-length = 100 +exclude = + echopype/tests + echopype/test_data + docs + setup.py + [flake8] -exclude = setup.py,docs,echopype/test_data,echopype/tests -ignore = E999,N802,N806,W503,W504 max-line-length = 100 output-file = flake8_log.txt +tee = True +exclude = + echopype/tests + echopype/test_data + docs + setup.py [isort] known_first_party = echopype @@ -13,7 +25,6 @@ force_grid_wrap = 0 combine_as_imports = True line_length = 100 skip = - echopype/__init__.py echopype/tests echopype/test_data docs From 875b494874839435dadc3d54445e18d061d76997 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 17:11:57 -0700 Subject: [PATCH 06/25] Moving some metadata to setup cfg --- setup.cfg | 20 ++++++++++++++++++++ setup.py | 46 ++++++++++++++++------------------------------ 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/setup.cfg b/setup.cfg index d2069e6ac..503a71e36 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,23 @@ +[metadata] +name = echopype +maintainer = Wu-Jung Lee +maintainer_email = leewujung@gmail.com +description = Enhancing the interoperability and scalability in analyzing ocean sonar data +long_description_content_type = text/markdown +url = https://github.com/OSOceanAcoustics/echopype +license = Apache License, Version 2.0 +classifiers = + Development Status :: 3 - Alpha + Environment :: Console + Intended Audience :: Science/Research + License :: OSI Approved :: Apache Software License + Operating System :: OS Independent + Programming Language :: Python + Topic :: Scientific/Engineering +author = Wu-Jung Lee +author_email = leewujung@gmail.com +platforms = OS Independent + [black] line-length = 100 exclude = diff --git a/setup.py b/setup.py index 82565084a..b99d6852b 100644 --- a/setup.py +++ b/setup.py @@ -10,36 +10,22 @@ requirements = f.readlines() INSTALL_REQUIRES = [t.strip() for t in requirements] -opts = dict(name='echopype', - maintainer='Wu-Jung Lee', - maintainer_email='leewujung@gmail.com', - description='Enhancing the interoperability and scalability in analyzing ocean sonar data', - long_description=LONG_DESCRIPTION, - long_description_content_type='text/markdown', - url='https://github.com/OSOceanAcoustics/echopype', - download_url='', - license='Apache License, Version 2.0', - classifiers=['Development Status :: 3 - Alpha', - 'Environment :: Console', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Topic :: Scientific/Engineering'], - author='Wu-Jung Lee', - author_email='leewujung@gmail.com', - platforms='OS Independent', - packages=find_packages(), - include_package_data=True, - install_requires=INSTALL_REQUIRES, - py_modules=["_echopype_version"], - use_scm_version={ - "fallback_version": "unknown", - "local_scheme": "node-and-date", - "write_to": "_echopype_version.py", - "write_to_template": 'version = "{version}"\n', - }, - setup_requires=["setuptools>=45", "wheel", "setuptools_scm"],) +opts = dict( + long_description=LONG_DESCRIPTION, + packages=find_packages(), + include_package_data=True, + package_dir={"": "."}, + install_requires=INSTALL_REQUIRES, + tests_require=["tox", "pandas"], + py_modules=["_echopype_version"], + use_scm_version={ + "fallback_version": "unknown", + "local_scheme": "node-and-date", + "write_to": "_echopype_version.py", + "write_to_template": 'version = "{version}"\n', + }, + setup_requires=["setuptools>=30.3.0", "wheel", "setuptools_scm"], +) if __name__ == "__main__": From de614a91b8ee740c53126d21e4be85973bfbef38 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 17:29:50 -0700 Subject: [PATCH 07/25] Fix formatting in build system files --- .ci_helpers/docker/docker-compose.yaml | 1 - .ci_helpers/docker/http.dockerfile | 2 +- .ci_helpers/docker/minioci.dockerfile | 2 +- .ci_helpers/py3.7.yaml | 2 +- .ci_helpers/py3.8.yaml | 2 +- .ci_helpers/py3.9.yaml | 2 +- .github/actions/gdrive-rclone/action.yaml | 2 +- .github/actions/gdrive-rclone/entrypoint.sh | 3 +-- .github/workflows/docker.yaml | 2 +- .github/workflows/lint.yaml | 2 +- .github/workflows/pypi.yaml | 2 +- notebooks/environment.yml | 21 +++++++++++++++++++++ pytest.ini | 2 +- 13 files changed, 32 insertions(+), 13 deletions(-) create mode 100644 notebooks/environment.yml diff --git a/.ci_helpers/docker/docker-compose.yaml b/.ci_helpers/docker/docker-compose.yaml index a340a2fe2..d92cf5f32 100644 --- a/.ci_helpers/docker/docker-compose.yaml +++ b/.ci_helpers/docker/docker-compose.yaml @@ -20,4 +20,3 @@ services: - echopypenetwork networks: echopypenetwork: - \ No newline at end of file diff --git a/.ci_helpers/docker/http.dockerfile b/.ci_helpers/docker/http.dockerfile index aee563845..ff390da72 100644 --- a/.ci_helpers/docker/http.dockerfile +++ b/.ci_helpers/docker/http.dockerfile @@ -1,3 +1,3 @@ FROM httpd:2.4 -COPY echopype/test_data /usr/local/apache2/htdocs/data \ No newline at end of file +COPY echopype/test_data /usr/local/apache2/htdocs/data diff --git a/.ci_helpers/docker/minioci.dockerfile b/.ci_helpers/docker/minioci.dockerfile index 65987dfe6..277b251ca 100644 --- a/.ci_helpers/docker/minioci.dockerfile +++ b/.ci_helpers/docker/minioci.dockerfile @@ -3,4 +3,4 @@ FROM minio/minio # Install git RUN microdnf install git -CMD ["minio", "server", "/data"] \ No newline at end of file +CMD ["minio", "server", "/data"] diff --git a/.ci_helpers/py3.7.yaml b/.ci_helpers/py3.7.yaml index 4d5464921..26adce72f 100644 --- a/.ci_helpers/py3.7.yaml +++ b/.ci_helpers/py3.7.yaml @@ -16,4 +16,4 @@ dependencies: - requests - aiohttp - s3fs==0.5.2 - - mamba \ No newline at end of file + - mamba diff --git a/.ci_helpers/py3.8.yaml b/.ci_helpers/py3.8.yaml index 3165c72ab..4a1d753dc 100644 --- a/.ci_helpers/py3.8.yaml +++ b/.ci_helpers/py3.8.yaml @@ -16,4 +16,4 @@ dependencies: - requests - aiohttp - s3fs==0.5.2 - - mamba \ No newline at end of file + - mamba diff --git a/.ci_helpers/py3.9.yaml b/.ci_helpers/py3.9.yaml index 6153f6b5c..1b364ca5d 100644 --- a/.ci_helpers/py3.9.yaml +++ b/.ci_helpers/py3.9.yaml @@ -16,4 +16,4 @@ dependencies: - requests - aiohttp - s3fs==0.5.2 - - mamba \ No newline at end of file + - mamba diff --git a/.github/actions/gdrive-rclone/action.yaml b/.github/actions/gdrive-rclone/action.yaml index 101e97bb6..8c4e1fde8 100644 --- a/.github/actions/gdrive-rclone/action.yaml +++ b/.github/actions/gdrive-rclone/action.yaml @@ -2,4 +2,4 @@ name: 'GDrive Rclone' description: 'Performs RClone from a google drive folder' runs: using: 'docker' - image: 'Dockerfile' \ No newline at end of file + image: 'Dockerfile' diff --git a/.github/actions/gdrive-rclone/entrypoint.sh b/.github/actions/gdrive-rclone/entrypoint.sh index d1c451908..4fb607d5f 100755 --- a/.github/actions/gdrive-rclone/entrypoint.sh +++ b/.github/actions/gdrive-rclone/entrypoint.sh @@ -27,10 +27,9 @@ then echo "Copying new test data from google drive" rclone copy gdrive: $TEST_DATA_FOLDER echo "Done" - + chmod -R ugoa+w $TEST_DATA_FOLDER ls -lah $TEST_DATA_FOLDER else echo "${TEST_DATA_FOLDER} not found" fi - diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 8b2b3fc7d..d73525057 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -36,7 +36,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Login to DockerHub - uses: docker/login-action@v1 + uses: docker/login-action@v1 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_TOKEN }} diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 174c1633e..43ae4db96 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -16,4 +16,4 @@ jobs: - uses: actions/setup-python@v2 - uses: pre-commit/action@v2.0.0 # allow to error for now - continue-on-error: true \ No newline at end of file + continue-on-error: true diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index 482cd05d3..dd361ac47 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -39,7 +39,7 @@ jobs: # local part of the version isn't included, making the version string # compatible with PyPI. sed --in-place "s/node-and-date/no-local-version/g" setup.py - + - name: Build source and wheel distributions run: | python setup.py sdist bdist_wheel diff --git a/notebooks/environment.yml b/notebooks/environment.yml new file mode 100644 index 000000000..a19286ba3 --- /dev/null +++ b/notebooks/environment.yml @@ -0,0 +1,21 @@ +name: echopype +channels: + - conda-forge +dependencies: + - python=3.8 + - matplotlib + - netCDF4 + - numpy + - pynmea2 + - pytz + - scipy + - xarray + - zarr + # extras + - beautifulsoup4 + - click + - dask + - ipykernel + - pandas + - requests + - tox diff --git a/pytest.ini b/pytest.ini index c03c8d6f0..7a3a8bfd2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,4 +2,4 @@ [pytest] testpaths = echopype/tests -cache_dir = .cache \ No newline at end of file +cache_dir = .cache From 31c832386004115e3555c889adbcfea392727876 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 17:49:50 -0700 Subject: [PATCH 08/25] Pre-commit ignore docs, run pre-commit-hooks only --- .pre-commit-config.yaml | 1 + echopype/calibrate/api.py | 1 - echopype/calibrate/calibrate_ek.py | 1 - echopype/convert/add_ancillary.py | 2 +- echopype/convert/convert.py | 1 - echopype/convert/utils/ek_date_conversion.py | 2 -- echopype/convert/utils/ek_raw_io.py | 2 +- echopype/preprocess/api.py | 2 -- echopype/preprocess/noise_est.py | 3 --- echopype/visualize/echogram.py | 2 +- 10 files changed, 4 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6ef439c81..3aee6ec9b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,4 @@ +exclude: ^docs/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 diff --git a/echopype/calibrate/api.py b/echopype/calibrate/api.py index afadf148f..8036d74b6 100644 --- a/echopype/calibrate/api.py +++ b/echopype/calibrate/api.py @@ -47,4 +47,3 @@ def compute_Sp(echodata: EchoData, **kwargs): """Compute point backscattering strength (Sp) from raw data. """ return _compute_cal(cal_type='Sp', echodata=echodata, **kwargs) - diff --git a/echopype/calibrate/calibrate_ek.py b/echopype/calibrate/calibrate_ek.py index a9e1397cf..b0a8410ac 100644 --- a/echopype/calibrate/calibrate_ek.py +++ b/echopype/calibrate/calibrate_ek.py @@ -687,4 +687,3 @@ def compute_Sp(self, waveform_mode='BB', encode_mode='complex'): and the corresponding range (``range``) in units meter. """ return self._compute_cal(cal_type='Sp', waveform_mode=waveform_mode, encode_mode=encode_mode) - diff --git a/echopype/convert/add_ancillary.py b/echopype/convert/add_ancillary.py index a0c7f04f5..eed7999d8 100644 --- a/echopype/convert/add_ancillary.py +++ b/echopype/convert/add_ancillary.py @@ -116,4 +116,4 @@ def mapping_get_multiple(mapping, keys, default=None): # https://github.com/zarr-developers/zarr-python/issues/65 old_dataset = zarr.open_group(f, mode="a") del old_dataset["Platform"] - ds_platform.to_zarr(f, mode="a", group="Platform") \ No newline at end of file + ds_platform.to_zarr(f, mode="a", group="Platform") diff --git a/echopype/convert/convert.py b/echopype/convert/convert.py index ebd00fc23..17ee15c30 100644 --- a/echopype/convert/convert.py +++ b/echopype/convert/convert.py @@ -67,4 +67,3 @@ def __new__(cls, file=None, xml_path=None, model=None, storage_options=None): storage_options=storage_options, ) return cls._instance - diff --git a/echopype/convert/utils/ek_date_conversion.py b/echopype/convert/utils/ek_date_conversion.py index 61147d9db..6f7058756 100644 --- a/echopype/convert/utils/ek_date_conversion.py +++ b/echopype/convert/utils/ek_date_conversion.py @@ -160,5 +160,3 @@ def datetime_to_unix(datetime_obj): timestamp = (datetime_obj - UTC_UNIX_EPOCH).total_seconds() return timestamp - - diff --git a/echopype/convert/utils/ek_raw_io.py b/echopype/convert/utils/ek_raw_io.py index 514b2b67f..a77878abc 100644 --- a/echopype/convert/utils/ek_raw_io.py +++ b/echopype/convert/utils/ek_raw_io.py @@ -85,7 +85,7 @@ class RawSimradFile(BufferedReader): } - def __init__(self, name, mode='rb', closefd=True, + def __init__(self, name, mode='rb', closefd=True, return_raw=False, buffer_size=1024*1024, storage_options={}): diff --git a/echopype/preprocess/api.py b/echopype/preprocess/api.py index 4b6d9ef6b..0793a3ec9 100644 --- a/echopype/preprocess/api.py +++ b/echopype/preprocess/api.py @@ -164,5 +164,3 @@ def remove_noise(ds_Sv, ping_num, range_bin_num, noise_max=None, SNR_threshold=3 def regrid(): return 1 - - diff --git a/echopype/preprocess/noise_est.py b/echopype/preprocess/noise_est.py index 1cbc104de..cf9fff72c 100644 --- a/echopype/preprocess/noise_est.py +++ b/echopype/preprocess/noise_est.py @@ -113,6 +113,3 @@ def remove_noise(self, noise_max=None, SNR_threshold=3): 'noise_max': noise_max } ) - - - diff --git a/echopype/visualize/echogram.py b/echopype/visualize/echogram.py index 3e58f7435..6c092b1a7 100644 --- a/echopype/visualize/echogram.py +++ b/echopype/visualize/echogram.py @@ -175,4 +175,4 @@ def plot(self, form, frequency=None, plot_ping_number=False, infer_burst=False, # tmp_echo.calibrate(save=False) # tmp_plot = EchoGram(tmp_echo) # tmp_plot.plot('Sv', frequency=38000, infer_burst=True, robust=True, cmap='jet') -# plt.show() \ No newline at end of file +# plt.show() From af3975692f3259e643cef7f267707177e17d6d77 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Fri, 30 Apr 2021 17:57:56 -0700 Subject: [PATCH 09/25] Black and isort CI scripts --- .ci_helpers/docker/setup-services.py | 16 +++++++++------- .ci_helpers/run-test.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.ci_helpers/docker/setup-services.py b/.ci_helpers/docker/setup-services.py index 2c593e73e..4172383f9 100755 --- a/.ci_helpers/docker/setup-services.py +++ b/.ci_helpers/docker/setup-services.py @@ -3,26 +3,26 @@ Script to help bring up docker services for testing. """ +import argparse import os import shutil import sys from pathlib import Path -import argparse -HERE = Path('.').absolute() +HERE = Path(".").absolute() BASE = Path(__file__).parent.absolute() COMPOSE_FILE = BASE / "docker-compose.yaml" TEST_DATA_PATH = HERE / "echopype" / "test_data" def parse_args(): - parser = argparse.ArgumentParser(description='Setup services for testing') + parser = argparse.ArgumentParser(description="Setup services for testing") parser.add_argument( - '--deploy', action='store_true', help="Flag to setup docker services" + "--deploy", action="store_true", help="Flag to setup docker services" ) parser.add_argument( - '--tear-down', - action='store_true', + "--tear-down", + action="store_true", help="Flag to tear down docker services", ) @@ -48,7 +48,9 @@ def parse_args(): os.system(f"docker-compose -f {COMPOSE_FILE} pull") print("3) Bringing up services.") - os.system(f"docker-compose -f {COMPOSE_FILE} up -d --remove-orphans --force-recreate") + os.system( + f"docker-compose -f {COMPOSE_FILE} up -d --remove-orphans --force-recreate" + ) print(f"4) Deleting old test folder at {TEST_DATA_PATH}") if TEST_DATA_PATH.exists(): diff --git a/.ci_helpers/run-test.py b/.ci_helpers/run-test.py index d13074f15..965529dd2 100644 --- a/.ci_helpers/run-test.py +++ b/.ci_helpers/run-test.py @@ -30,7 +30,7 @@ type=str, nargs="?", default="", - help="Comma separated list of changed files." + help="Comma separated list of changed files.", ) parser.add_argument( "--pytest-args", type=str, help="Optional pytest args", default="" From 415584b2f1045bd6e41e6c3f81d4a02d8d6379a8 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 13:06:30 -0700 Subject: [PATCH 10/25] Allow label option to run all tests on PR --- .github/workflows/pr.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 05e3ec30b..2690645ed 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -85,7 +85,13 @@ jobs: - name: Print Changed files shell: bash -l {0} run: echo "${{ steps.files.outputs.added_modified }}" + - name: Running all Tests + if: contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing') + shell: bash -l {0} + run: | + python .ci_helpers/run-test.py --local --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log - name: Running Tests + if: ${{ skipped() }} shell: bash -l {0} run: | python .ci_helpers/run-test.py --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" ${{ steps.files.outputs.added_modified }} |& tee ci_test_log.log From 362fd89232def3c5de934da8e9dcbc3d490a2f9f Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 13:10:32 -0700 Subject: [PATCH 11/25] Fix pr workflow yaml --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2690645ed..408a259b3 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -91,7 +91,7 @@ jobs: run: | python .ci_helpers/run-test.py --local --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log - name: Running Tests - if: ${{ skipped() }} + if: "!contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing')" shell: bash -l {0} run: | python .ci_helpers/run-test.py --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" ${{ steps.files.outputs.added_modified }} |& tee ci_test_log.log From 9f0b2206306919f4b16d0676f43724cd88777b81 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 13:20:21 -0700 Subject: [PATCH 12/25] Add coverage test --- .coveragerc | 8 ++++++++ .github/workflows/ci.yaml | 2 +- .github/workflows/pr.yaml | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 000000000..588c9d8bf --- /dev/null +++ b/.coveragerc @@ -0,0 +1,8 @@ +[run] +omit = + */echopype/tests/* + */echopype/test_data/* + */echopype/process/* + */echopype/convert/add_ancillary.py + */echopype/convert/convert_combine.py + */echopype/visualize/* \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4718859f0..3ba272c16 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -83,7 +83,7 @@ jobs: - name: Running All Tests shell: bash -l {0} run: | - python .ci_helpers/run-test.py --local --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log + python .ci_helpers/run-test.py --local --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log - name: Upload ci test log if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 408a259b3..1fcbce88e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -89,12 +89,12 @@ jobs: if: contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing') shell: bash -l {0} run: | - python .ci_helpers/run-test.py --local --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log + python .ci_helpers/run-test.py --local --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" |& tee ci_test_log.log - name: Running Tests if: "!contains(github.event.pull_request.labels.*.name, 'Needs Complete Testing')" shell: bash -l {0} run: | - python .ci_helpers/run-test.py --pytest-args="--log-cli-level=WARNING,-vv,--disable-warnings" ${{ steps.files.outputs.added_modified }} |& tee ci_test_log.log + python .ci_helpers/run-test.py --pytest-args="--cov=echopype,--cov-report=xml,--log-cli-level=WARNING,-vv,--disable-warnings" ${{ steps.files.outputs.added_modified }} |& tee ci_test_log.log - name: Upload ci test log if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 From b2e53406804923fe1c1569b326f92eebc99bd335 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 13:35:01 -0700 Subject: [PATCH 13/25] Add codecov upload for coverage test --- .github/workflows/ci.yaml | 11 +++++++++++ .github/workflows/pr.yaml | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3ba272c16..8fc9fc06d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -49,6 +49,9 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 + - name: Set environment variables + run: | + echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV - name: Cache conda uses: actions/cache@v2 env: @@ -90,3 +93,11 @@ jobs: with: name: ci_test_log path: ci_test_log.log + - name: Upload code coverage to Codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + flags: unittests + env_vars: RUNNER_OS,PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 1fcbce88e..a626794bf 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -46,6 +46,9 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 + - name: Set environment variables + run: | + echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV - name: Cache conda uses: actions/cache@v2 env: @@ -101,3 +104,11 @@ jobs: with: name: ci_test_log path: ci_test_log.log + - name: Upload code coverage to Codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + flags: unittests + env_vars: RUNNER_OS,PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false From 96f0addd0d8a4637ab74143dac85d03beb0e9284 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 13:37:11 -0700 Subject: [PATCH 14/25] Remove notebooks --- notebooks/environment.yml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 notebooks/environment.yml diff --git a/notebooks/environment.yml b/notebooks/environment.yml deleted file mode 100644 index a19286ba3..000000000 --- a/notebooks/environment.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: echopype -channels: - - conda-forge -dependencies: - - python=3.8 - - matplotlib - - netCDF4 - - numpy - - pynmea2 - - pytz - - scipy - - xarray - - zarr - # extras - - beautifulsoup4 - - click - - dask - - ipykernel - - pandas - - requests - - tox From 8e67ae4dda1cc2a5124381f9c4b3b8097938bf8a Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 14:28:13 -0700 Subject: [PATCH 15/25] Run pre-commit on non ep code --- .coveragerc | 4 ++-- .github/workflows/pypi.yaml | 4 ++-- README.md | 4 ++-- setup.cfg | 15 --------------- setup.py | 4 ++-- 5 files changed, 8 insertions(+), 23 deletions(-) diff --git a/.coveragerc b/.coveragerc index 588c9d8bf..487ac3692 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,8 +1,8 @@ [run] -omit = +omit = */echopype/tests/* */echopype/test_data/* */echopype/process/* */echopype/convert/add_ancillary.py */echopype/convert/convert_combine.py - */echopype/visualize/* \ No newline at end of file + */echopype/visualize/* diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index dd361ac47..08ee35240 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -84,14 +84,14 @@ jobs: python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade --pre echopype python -c "import echopype; print(echopype.__version__)" echo "=== Done testing wheel file ===" - + echo "=== Testing source tar file ===" # Install tar gz and check import python -m pip uninstall --yes echopype python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade --pre --no-binary=:all: echopype python -c "import echopype; print(echopype.__version__)" echo "=== Done testing source tar file ===" - + publish-pypi: name: Push echopype to production pypi needs: test-built-dist diff --git a/README.md b/README.md index ac069c64a..7b7167618 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Echopype is a package built to enable interoperability and scalability in ocean sonar data processing. These data are widely used for obtaining information about the distribution and abundance of marine animals, such as fish and krill. Our ability to collect large volumes of sonar data from a variety of ocean platforms has grown significantly in the last decade. However, most of the new data remain under-utilized. echopype aims to address the root cause of this problem - the lack of interoperable data format and scalable analysis workflows that adapt well with increasing data volume - by providing open-source tools as entry points for scientists to make discovery using these new data. -Watch the [echopype talk](https://www.youtube.com/watch?v=qboH7MyHrpU) +Watch the [echopype talk](https://www.youtube.com/watch?v=qboH7MyHrpU) at SciPy 2019 for background, discussions and a quick demo! ## Documentation @@ -53,7 +53,7 @@ Please report any bugs by [creating issues on GitHub](https://medium.com/nyc-pla Contributors ------------ -[Wu-Jung Lee](http://leewujung.github.io) (@leewujung) leads this project and together with +[Wu-Jung Lee](http://leewujung.github.io) (@leewujung) leads this project and together with [Kavin Nguyen](https://github.com/ngkavin) (@ngkavin), [Landung "Don" Setiawan](https://github.com/lsetiawan) (@lsetiawan), and [Imran Majeed](https://github.com/imranmaj) (@imranmaj) are primary developers of this package. [Emilio Mayorga](https://www.apl.washington.edu/people/profile.php?last_name=Mayorga&first_name=Emilio) (@emiliom) and [Valentina Staneva](https://escience.washington.edu/people/valentina-staneva/) (@valentina-s) diff --git a/setup.cfg b/setup.cfg index 503a71e36..360da7414 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,21 +20,11 @@ platforms = OS Independent [black] line-length = 100 -exclude = - echopype/tests - echopype/test_data - docs - setup.py [flake8] max-line-length = 100 output-file = flake8_log.txt tee = True -exclude = - echopype/tests - echopype/test_data - docs - setup.py [isort] known_first_party = echopype @@ -44,8 +34,3 @@ include_trailing_comma = True force_grid_wrap = 0 combine_as_imports = True line_length = 100 -skip = - echopype/tests - echopype/test_data - docs - setup.py diff --git a/setup.py b/setup.py index b99d6852b..09987f4e1 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, print_function -from setuptools import setup, find_packages + +from setuptools import find_packages, setup # Long description will go up on the pypi page with open("README.md") as file: @@ -14,7 +15,6 @@ long_description=LONG_DESCRIPTION, packages=find_packages(), include_package_data=True, - package_dir={"": "."}, install_requires=INSTALL_REQUIRES, tests_require=["tox", "pandas"], py_modules=["_echopype_version"], From cd8d402d722038b42bab1837dfeeee8d8c44eb0c Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Mon, 24 May 2021 14:30:27 -0700 Subject: [PATCH 16/25] Update pre-commit settings and run on ep code --- .pre-commit-config.yaml | 11 +- echopype/__init__.py | 7 +- echopype/calibrate/__init__.py | 2 +- echopype/calibrate/api.py | 42 +- echopype/calibrate/calibrate_azfp.py | 145 +- echopype/calibrate/calibrate_base.py | 33 +- echopype/calibrate/calibrate_ek.py | 585 ++++--- echopype/convert/__init__.py | 10 +- echopype/convert/add_ancillary.py | 177 ++- echopype/convert/api.py | 184 +-- echopype/convert/convert.py | 10 +- echopype/convert/convert_combine.py | 220 ++- echopype/convert/parse_ad2cp.py | 26 +- echopype/convert/parse_azfp.py | 408 +++-- echopype/convert/parse_base.py | 206 ++- echopype/convert/parse_ek60.py | 11 +- echopype/convert/parse_ek80.py | 29 +- echopype/convert/set_groups_ad2cp.py | 10 +- echopype/convert/set_groups_azfp.py | 337 ++-- echopype/convert/set_groups_base.py | 235 +-- echopype/convert/set_groups_ek60.py | 728 ++++++--- echopype/convert/set_groups_ek80.py | 881 +++++++---- echopype/convert/utils/ek_date_conversion.py | 17 +- echopype/convert/utils/ek_raw_io.py | 349 +++-- echopype/convert/utils/ek_raw_parsers.py | 1481 ++++++++++-------- echopype/echodata/echodata.py | 54 +- echopype/preprocess/api.py | 90 +- echopype/preprocess/noise_est.py | 63 +- echopype/utils/io.py | 32 +- echopype/utils/repr.py | 2 +- echopype/utils/uwa.py | 120 +- 31 files changed, 3947 insertions(+), 2558 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3aee6ec9b..2c3e76227 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,13 @@ -exclude: ^docs/ +exclude: | + (?x)^( + docs/| + echopype/tests/| + echopype/test_data/| + echopype/process/| + echopype/visualize/| + echopype/convert/add_ancillary.py| + echopype/convert/convert_combine.py + ) repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 diff --git a/echopype/__init__.py b/echopype/__init__.py index cbb800412..1454c0c3c 100644 --- a/echopype/__init__.py +++ b/echopype/__init__.py @@ -1,9 +1,10 @@ from __future__ import absolute_import, division, print_function + +from _echopype_version import version as __version__ # noqa + +from . import calibrate from .convert.api import open_raw from .echodata.api import open_converted from .process import Process -from . import calibrate - -from _echopype_version import version as __version__ # noqa __all__ = [open_raw, open_converted, calibrate] diff --git a/echopype/calibrate/__init__.py b/echopype/calibrate/__init__.py index 1c7e65d9a..92caef4de 100644 --- a/echopype/calibrate/__init__.py +++ b/echopype/calibrate/__init__.py @@ -1,3 +1,3 @@ -from .api import compute_Sv, compute_Sp +from .api import compute_Sp, compute_Sv # __all__ = [compute_Sv, compute_Sp] diff --git a/echopype/calibrate/api.py b/echopype/calibrate/api.py index 8036d74b6..1c8788e20 100644 --- a/echopype/calibrate/api.py +++ b/echopype/calibrate/api.py @@ -1,27 +1,23 @@ -from .calibrate_ek import CalibrateEK60, CalibrateEK80 -from .calibrate_azfp import CalibrateAZFP from ..echodata import EchoData +from .calibrate_azfp import CalibrateAZFP +from .calibrate_ek import CalibrateEK60, CalibrateEK80 -CALIBRATOR = { - 'EK60': CalibrateEK60, - 'EK80': CalibrateEK80, - 'AZFP': CalibrateAZFP -} +CALIBRATOR = {"EK60": CalibrateEK60, "EK80": CalibrateEK80, "AZFP": CalibrateAZFP} def _compute_cal( - cal_type, - echodata: EchoData, - env_params=None, - cal_params=None, - waveform_mode=None, - encode_mode=None + cal_type, + echodata: EchoData, + env_params=None, + cal_params=None, + waveform_mode=None, + encode_mode=None, ): # Sanity check on inputs - if (waveform_mode is not None) and (waveform_mode not in ('BB', 'CW')): - raise ValueError('Input waveform_mode not recognized!') - if (encode_mode is not None) and (encode_mode not in ('complex', 'power')): - raise ValueError('Input encode_mode not recognized!') + if (waveform_mode is not None) and (waveform_mode not in ("BB", "CW")): + raise ValueError("Input waveform_mode not recognized!") + if (encode_mode is not None) and (encode_mode not in ("complex", "power")): + raise ValueError("Input encode_mode not recognized!") # Set up calibration object cal_obj = CALIBRATOR[echodata.sonar_model]( @@ -31,19 +27,17 @@ def _compute_cal( waveform_mode=waveform_mode, ) # Perform calibration - if cal_type == 'Sv': + if cal_type == "Sv": return cal_obj.compute_Sv(waveform_mode=waveform_mode, encode_mode=encode_mode) else: return cal_obj.compute_Sp(waveform_mode=waveform_mode, encode_mode=encode_mode) def compute_Sv(echodata: EchoData, **kwargs): - """Compute volume backscattering strength (Sv) from raw data. - """ - return _compute_cal(cal_type='Sv', echodata=echodata, **kwargs) + """Compute volume backscattering strength (Sv) from raw data.""" + return _compute_cal(cal_type="Sv", echodata=echodata, **kwargs) def compute_Sp(echodata: EchoData, **kwargs): - """Compute point backscattering strength (Sp) from raw data. - """ - return _compute_cal(cal_type='Sp', echodata=echodata, **kwargs) + """Compute point backscattering strength (Sp) from raw data.""" + return _compute_cal(cal_type="Sp", echodata=echodata, **kwargs) diff --git a/echopype/calibrate/calibrate_azfp.py b/echopype/calibrate/calibrate_azfp.py index bcae7fd04..118a08d62 100644 --- a/echopype/calibrate/calibrate_azfp.py +++ b/echopype/calibrate/calibrate_azfp.py @@ -1,17 +1,17 @@ import numpy as np -from .calibrate_ek import CalibrateBase -from .calibrate_base import CAL_PARAMS, ENV_PARAMS + from ..utils import uwa +from .calibrate_base import CAL_PARAMS, ENV_PARAMS +from .calibrate_ek import CalibrateBase class CalibrateAZFP(CalibrateBase): - def __init__(self, echodata, env_params, cal_params, **kwargs): super().__init__(echodata) # initialize env and cal params self.env_params = dict.fromkeys(ENV_PARAMS) - self.cal_params = dict.fromkeys(CAL_PARAMS['AZFP']) + self.cal_params = dict.fromkeys(CAL_PARAMS["AZFP"]) # load env and cal parameters if env_params is None: @@ -32,9 +32,11 @@ def get_cal_params(self, cal_params): cal_params : dict """ # Params from the Beam group - for p in ['EL', 'DS', 'TVR', 'VTX', 'Sv_offset', 'equivalent_beam_angle']: + for p in ["EL", "DS", "TVR", "VTX", "Sv_offset", "equivalent_beam_angle"]: # substitute if None in user input - self.cal_params[p] = cal_params[p] if p in cal_params else self.echodata.beam[p] + self.cal_params[p] = ( + cal_params[p] if p in cal_params else self.echodata.beam[p] + ) def get_env_params(self, env_params): """Get env params using user inputs or values from data file. @@ -44,30 +46,34 @@ def get_env_params(self, env_params): env_params : dict """ # Temperature comes from either user input or data file - self.env_params['temperature'] = (env_params['temperature'] - if 'temperature' in env_params - else self.echodata.environment['temperature']) + self.env_params["temperature"] = ( + env_params["temperature"] + if "temperature" in env_params + else self.echodata.environment["temperature"] + ) # Salinity and pressure always come from user input - if ('salinity' not in env_params) or ('pressure' not in env_params): - raise ReferenceError('Please supply both salinity and pressure in env_params.') + if ("salinity" not in env_params) or ("pressure" not in env_params): + raise ReferenceError( + "Please supply both salinity and pressure in env_params." + ) else: - self.env_params['salinity'] = env_params['salinity'] - self.env_params['pressure'] = env_params['pressure'] + self.env_params["salinity"] = env_params["salinity"] + self.env_params["pressure"] = env_params["pressure"] # Always calculate sound speed and absorption - self.env_params['sound_speed'] = uwa.calc_sound_speed( - temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure'], - formula_source='AZFP' + self.env_params["sound_speed"] = uwa.calc_sound_speed( + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + formula_source="AZFP", ) - self.env_params['sound_absorption'] = uwa.calc_absorption( - frequency=self.echodata.beam['frequency'], - temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure'], - formula_source='AZFP' + self.env_params["sound_absorption"] = uwa.calc_absorption( + frequency=self.echodata.beam["frequency"], + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + formula_source="AZFP", ) def compute_range_meter(self, cal_type): @@ -82,23 +88,31 @@ def compute_range_meter(self, cal_type): 'Sp' for calculating point backscattering strength """ # Notation below follows p.86 of user manual - N = self.echodata.vendor['number_of_samples_per_average_bin'] # samples per bin - f = self.echodata.vendor['digitization_rate'] # digitization rate - L = self.echodata.vendor['lockout_index'] # number of lockout samples - sound_speed = self.env_params['sound_speed'] - bins_to_avg = 1 # keep this in ref of AZFP matlab code, set to 1 since we want to calculate from raw data + N = self.echodata.vendor["number_of_samples_per_average_bin"] # samples per bin + f = self.echodata.vendor["digitization_rate"] # digitization rate + L = self.echodata.vendor["lockout_index"] # number of lockout samples + sound_speed = self.env_params["sound_speed"] + bins_to_avg = 1 # keep this in ref of AZFP matlab code, set to 1 since we want to calculate from raw data # Calculate range using parameters for each freq # This is "the range to the centre of the sampling volume for bin m" from p.86 of user manual - if cal_type == 'Sv': + if cal_type == "Sv": range_offset = 0 else: - range_offset = sound_speed * self.echodata.beam['transmit_duration_nominal'] / 4 # from matlab code - range_meter = (sound_speed * L / (2 * f) - + sound_speed / 4 * (((2 * (self.echodata.beam.range_bin + 1) - 1) * N * bins_to_avg - 1) / f - + self.echodata.beam['transmit_duration_nominal']) - - range_offset) - range_meter.name = 'range' # add name to facilitate xr.merge + range_offset = ( + sound_speed * self.echodata.beam["transmit_duration_nominal"] / 4 + ) # from matlab code + range_meter = ( + sound_speed * L / (2 * f) + + sound_speed + / 4 + * ( + ((2 * (self.echodata.beam.range_bin + 1) - 1) * N * bins_to_avg - 1) / f + + self.echodata.beam["transmit_duration_nominal"] + ) + - range_offset + ) + range_meter.name = "range" # add name to facilitate xr.merge self.range_meter = range_meter @@ -112,30 +126,49 @@ def _cal_power(self, cal_type, **kwargs): See calc_Sv_offset() in convert/azfp.py """ # Compute range in meters - self.compute_range_meter(cal_type=cal_type) # range computation different for Sv and Sp per AZFP matlab code + self.compute_range_meter( + cal_type=cal_type + ) # range computation different for Sv and Sp per AZFP matlab code # Compute various params - spreading_loss = 20 * np.log10(self.range_meter) # TODO: take care of dividing by zero encountered in log10 - absorption_loss = 2 * self.env_params['sound_absorption'] * self.range_meter - SL = self.cal_params['TVR'] + 20 * np.log10(self.cal_params['VTX']) # eq.(2) - a = self.cal_params['DS'] # scaling factor (slope) in Fig.G-1, units Volts/dB], see p.84 - EL = self.cal_params['EL'] - 2.5 / a + self.echodata.beam.backscatter_r / (26214 * a) # eq.(5) - - if cal_type == 'Sv': + spreading_loss = 20 * np.log10( + self.range_meter + ) # TODO: take care of dividing by zero encountered in log10 + absorption_loss = 2 * self.env_params["sound_absorption"] * self.range_meter + SL = self.cal_params["TVR"] + 20 * np.log10(self.cal_params["VTX"]) # eq.(2) + a = self.cal_params[ + "DS" + ] # scaling factor (slope) in Fig.G-1, units Volts/dB], see p.84 + EL = ( + self.cal_params["EL"] + - 2.5 / a + + self.echodata.beam.backscatter_r / (26214 * a) + ) # eq.(5) + + if cal_type == "Sv": # eq.(9) - out = (EL - SL + spreading_loss + absorption_loss - - 10 * np.log10(0.5 * self.env_params['sound_speed'] * - self.echodata.beam['transmit_duration_nominal'] * - self.cal_params['equivalent_beam_angle']) - + self.cal_params['Sv_offset']) # see p.90-91 for this correction to Sv - out.name = 'Sv' - - elif cal_type == 'Sp': + out = ( + EL + - SL + + spreading_loss + + absorption_loss + - 10 + * np.log10( + 0.5 + * self.env_params["sound_speed"] + * self.echodata.beam["transmit_duration_nominal"] + * self.cal_params["equivalent_beam_angle"] + ) + + self.cal_params["Sv_offset"] + ) # see p.90-91 for this correction to Sv + out.name = "Sv" + + elif cal_type == "Sp": # eq.(10) out = EL - SL + 2 * spreading_loss + absorption_loss - out.name = 'Sp' + out.name = "Sp" else: - raise ValueError('cal_type not recognized!') + raise ValueError("cal_type not recognized!") # Attach calculated range (with units meter) into data set out = out.to_dataset() @@ -147,7 +180,7 @@ def _cal_power(self, cal_type, **kwargs): return out def compute_Sv(self, **kwargs): - return self._cal_power(cal_type='Sv') + return self._cal_power(cal_type="Sv") def compute_Sp(self, **kwargs): - return self._cal_power(cal_type='Sp') + return self._cal_power(cal_type="Sp") diff --git a/echopype/calibrate/calibrate_base.py b/echopype/calibrate/calibrate_base.py index cdd6c1a58..f457e4f69 100644 --- a/echopype/calibrate/calibrate_base.py +++ b/echopype/calibrate/calibrate_base.py @@ -1,39 +1,23 @@ import xarray as xr -ENV_PARAMS = ( - 'temperature', - 'salinity', - 'pressure', - 'sound_speed', - 'sound_absorption' -) +ENV_PARAMS = ("temperature", "salinity", "pressure", "sound_speed", "sound_absorption") CAL_PARAMS = { - 'EK': ( - 'sa_correction', - 'gain_correction', - 'equivalent_beam_angle' - ), - 'AZFP': ( - 'EL', - 'DS', - 'TVR', - 'VTX', - 'equivalent_beam_angle', - 'Sv_offset' - ) + "EK": ("sa_correction", "gain_correction", "equivalent_beam_angle"), + "AZFP": ("EL", "DS", "TVR", "VTX", "equivalent_beam_angle", "Sv_offset"), } class CalibrateBase: - """Class to handle calibration for all sonar models. - """ + """Class to handle calibration for all sonar models.""" def __init__(self, echodata): self.echodata = echodata self.env_params = None # env_params are set in child class self.cal_params = None # cal_params are set in child class - self.range_meter = None # range_meter is computed in compute_Sv/Sp in child class + self.range_meter = ( + None # range_meter is computed in compute_Sv/Sp in child class + ) def get_env_params(self, **kwargs): pass @@ -69,8 +53,7 @@ def compute_Sp(self, **kwargs): pass def _add_params_to_output(self, ds_out): - """Add all cal and env parameters to output Sv dataset. - """ + """Add all cal and env parameters to output Sv dataset.""" # Add env_params for key, val in self.env_params.items(): ds_out[key] = val diff --git a/echopype/calibrate/calibrate_ek.py b/echopype/calibrate/calibrate_ek.py index b0a8410ac..8d0e82855 100644 --- a/echopype/calibrate/calibrate_ek.py +++ b/echopype/calibrate/calibrate_ek.py @@ -1,18 +1,18 @@ import numpy as np -from scipy import signal import xarray as xr +from scipy import signal + from ..utils import uwa -from .calibrate_base import CalibrateBase, CAL_PARAMS, ENV_PARAMS +from .calibrate_base import CAL_PARAMS, ENV_PARAMS, CalibrateBase class CalibrateEK(CalibrateBase): - def __init__(self, echodata): super().__init__(echodata) # cal params specific to EK echosounders self.env_params = dict.fromkeys(ENV_PARAMS) - self.cal_params = dict.fromkeys(CAL_PARAMS['EK']) + self.cal_params = dict.fromkeys(CAL_PARAMS["EK"]) def compute_range_meter(self, waveform_mode, tvg_correction_factor): """ @@ -30,26 +30,39 @@ def compute_range_meter(self, waveform_mode, tvg_correction_factor): range_meter : xr.DataArray range in units meter """ - if waveform_mode == 'CW': - sample_thickness = self.echodata.beam['sample_interval'] * self.env_params['sound_speed'] / 2 + if waveform_mode == "CW": + sample_thickness = ( + self.echodata.beam["sample_interval"] + * self.env_params["sound_speed"] + / 2 + ) # TODO: Check with the AFSC about the half sample difference - range_meter = (self.echodata.beam.range_bin - - tvg_correction_factor) * sample_thickness # [frequency x range_bin] - elif waveform_mode == 'BB': + range_meter = ( + self.echodata.beam.range_bin - tvg_correction_factor + ) * sample_thickness # [frequency x range_bin] + elif waveform_mode == "BB": # TODO: bug: right now only first ping_time has non-nan range - shift = self.echodata.beam['transmit_duration_nominal'] # based on Lar Anderson's Matlab code + shift = self.echodata.beam[ + "transmit_duration_nominal" + ] # based on Lar Anderson's Matlab code # TODO: once we allow putting in arbitrary sound_speed, change below to use linearly-interpolated values - range_meter = ((self.echodata.beam.range_bin * self.echodata.beam['sample_interval'] - shift) - * self.env_params['sound_speed'].squeeze() / 2) + range_meter = ( + ( + self.echodata.beam.range_bin * self.echodata.beam["sample_interval"] + - shift + ) + * self.env_params["sound_speed"].squeeze() + / 2 + ) # TODO: Lar Anderson's code include a slicing by minRange with a default of 0.02 m, # need to ask why and see if necessary here else: - raise ValueError('Input waveform_mode not recognized!') + raise ValueError("Input waveform_mode not recognized!") # make order of dims conform with the order of backscatter data - range_meter = range_meter.transpose('frequency', 'ping_time', 'range_bin') + range_meter = range_meter.transpose("frequency", "ping_time", "range_bin") range_meter = range_meter.where(range_meter > 0, 0) # set negative ranges to 0 - range_meter.name = 'range' # add name to facilitate xr.merge + range_meter.name = "range" # add name to facilitate xr.merge self.range_meter = range_meter @@ -68,11 +81,11 @@ def _get_vend_cal_params_power(self, param): if param not in ds_vend: return None - if param not in ['sa_correction', 'gain_correction']: + if param not in ["sa_correction", "gain_correction"]: raise ValueError(f"Unknown parameter {param}") # Drop NaN ping_time for transmit_duration_nominal - if np.any(np.isnan(self.echodata.beam['transmit_duration_nominal'])): + if np.any(np.isnan(self.echodata.beam["transmit_duration_nominal"])): # TODO: resolve performance warning: # /Users/wu-jung/miniconda3/envs/echopype_jan2021/lib/python3.8/site-packages/xarray/core/indexing.py:1369: # PerformanceWarning: Slicing is producing a large chunk. To accept the large @@ -83,14 +96,19 @@ def _get_vend_cal_params_power(self, param): # >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): # ... array[indexer] # return self.array[key] - self.echodata.beam = self.echodata.beam.dropna(dim='ping_time', how='any', - subset=['transmit_duration_nominal']) + self.echodata.beam = self.echodata.beam.dropna( + dim="ping_time", how="any", subset=["transmit_duration_nominal"] + ) # Find index with correct pulse length - unique_pulse_length = np.unique(self.echodata.beam['transmit_duration_nominal'], axis=1) - idx_wanted = np.abs(ds_vend['pulse_length'] - unique_pulse_length).argmin(dim='pulse_length_bin') + unique_pulse_length = np.unique( + self.echodata.beam["transmit_duration_nominal"], axis=1 + ) + idx_wanted = np.abs(ds_vend["pulse_length"] - unique_pulse_length).argmin( + dim="pulse_length_bin" + ) - return ds_vend[param].sel(pulse_length_bin=idx_wanted).drop('pulse_length_bin') + return ds_vend[param].sel(pulse_length_bin=idx_wanted).drop("pulse_length_bin") def get_cal_params(self, cal_params): """Get cal params using user inputs or values from data file. @@ -100,15 +118,19 @@ def get_cal_params(self, cal_params): cal_params : dict """ # Params from the Vendor group - params_from_vend = ['sa_correction', 'gain_correction'] + params_from_vend = ["sa_correction", "gain_correction"] for p in params_from_vend: # substitute if None in user input - self.cal_params[p] = cal_params[p] if p in cal_params else self._get_vend_cal_params_power(p) + self.cal_params[p] = ( + cal_params[p] if p in cal_params else self._get_vend_cal_params_power(p) + ) # Other params - self.cal_params['equivalent_beam_angle'] = (cal_params['equivalent_beam_angle'] - if 'equivalent_beam_angle' in cal_params - else self.echodata.beam['equivalent_beam_angle']) + self.cal_params["equivalent_beam_angle"] = ( + cal_params["equivalent_beam_angle"] + if "equivalent_beam_angle" in cal_params + else self.echodata.beam["equivalent_beam_angle"] + ) def _cal_power(self, cal_type, use_beam_power=False): """Calibrate power data from EK60 and EK80. @@ -134,42 +156,49 @@ def _cal_power(self, cal_type, use_beam_power=False): beam = self.echodata.beam # Derived params - wavelength = self.env_params['sound_speed'] / beam['frequency'] # wavelength + wavelength = self.env_params["sound_speed"] / beam["frequency"] # wavelength range_meter = self.range_meter # Transmission loss spreading_loss = 20 * np.log10(range_meter.where(range_meter >= 1, other=1)) - absorption_loss = 2 * self.env_params['sound_absorption'] * range_meter + absorption_loss = 2 * self.env_params["sound_absorption"] * range_meter - if cal_type == 'Sv': + if cal_type == "Sv": # Calc gain - CSv = (10 * np.log10(beam['transmit_power']) - + 2 * self.cal_params['gain_correction'] - + self.cal_params['equivalent_beam_angle'] - + 10 * np.log10(wavelength ** 2 - * beam['transmit_duration_nominal'] - * self.env_params['sound_speed'] - / (32 * np.pi ** 2))) + CSv = ( + 10 * np.log10(beam["transmit_power"]) + + 2 * self.cal_params["gain_correction"] + + self.cal_params["equivalent_beam_angle"] + + 10 + * np.log10( + wavelength ** 2 + * beam["transmit_duration_nominal"] + * self.env_params["sound_speed"] + / (32 * np.pi ** 2) + ) + ) # Calibration and echo integration - out = (beam['backscatter_r'] - + spreading_loss - + absorption_loss - - CSv - 2 * self.cal_params['sa_correction']) - out.name = 'Sv' - - elif cal_type == 'Sp': + out = ( + beam["backscatter_r"] + + spreading_loss + + absorption_loss + - CSv + - 2 * self.cal_params["sa_correction"] + ) + out.name = "Sv" + + elif cal_type == "Sp": # Calc gain - CSp = (10 * np.log10(beam['transmit_power']) - + 2 * self.cal_params['gain_correction'] - + 10 * np.log10(wavelength ** 2 / (16 * np.pi ** 2))) + CSp = ( + 10 * np.log10(beam["transmit_power"]) + + 2 * self.cal_params["gain_correction"] + + 10 * np.log10(wavelength ** 2 / (16 * np.pi ** 2)) + ) # Calibration and echo integration - out = (beam['backscatter_r'] - + spreading_loss * 2 - + absorption_loss - - CSp) - out.name = 'Sp' + out = beam["backscatter_r"] + spreading_loss * 2 + absorption_loss - CSp + out.name = "Sp" # Attach calculated range (with units meter) into data set out = out.to_dataset() @@ -182,7 +211,6 @@ def _cal_power(self, cal_type, use_beam_power=False): class CalibrateEK60(CalibrateEK): - def __init__(self, echodata, env_params, cal_params, **kwargs): super().__init__(echodata) @@ -195,7 +223,7 @@ def __init__(self, echodata, env_params, cal_params, **kwargs): self.get_cal_params(cal_params) # default to CW mode recorded as power samples - self.compute_range_meter(waveform_mode='CW', tvg_correction_factor=2) + self.compute_range_meter(waveform_mode="CW", tvg_correction_factor=2) def get_env_params(self, env_params, **kwargs): """Get env params using user inputs or values from data file. @@ -209,30 +237,42 @@ def get_env_params(self, env_params, **kwargs): env_params : dict """ # Re-calculate environment parameters if user supply all env variables - if ('temperature' in env_params) and ('salinity' in env_params) and ('pressure' in env_params): - for p in ['temperature', 'salinity', 'pressure']: + if ( + ("temperature" in env_params) + and ("salinity" in env_params) + and ("pressure" in env_params) + ): + for p in ["temperature", "salinity", "pressure"]: self.env_params[p] = env_params[p] - self.env_params['sound_speed'] = uwa.calc_sound_speed(temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure']) - self.env_params['sound_absorption'] = uwa.calc_absorption(frequency=self.echodata.beam['frequency'], - temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure']) + self.env_params["sound_speed"] = uwa.calc_sound_speed( + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + ) + self.env_params["sound_absorption"] = uwa.calc_absorption( + frequency=self.echodata.beam["frequency"], + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + ) # Otherwise get sound speed and absorption from user inputs or raw data file else: - self.env_params['sound_speed'] = (env_params['sound_speed'] - if 'sound_speed' in env_params - else self.echodata.environment['sound_speed_indicative']) - self.env_params['sound_absorption'] = (env_params['sound_absorption'] - if 'sound_absorption' in env_params - else self.echodata.environment['absorption_indicative']) + self.env_params["sound_speed"] = ( + env_params["sound_speed"] + if "sound_speed" in env_params + else self.echodata.environment["sound_speed_indicative"] + ) + self.env_params["sound_absorption"] = ( + env_params["sound_absorption"] + if "sound_absorption" in env_params + else self.echodata.environment["absorption_indicative"] + ) def compute_Sv(self, **kwargs): - return self._cal_power(cal_type='Sv') + return self._cal_power(cal_type="Sv") def compute_Sp(self, **kwargs): - return self._cal_power(cal_type='Sp') + return self._cal_power(cal_type="Sp") class CalibrateEK80(CalibrateEK): @@ -247,7 +287,7 @@ def __init__(self, echodata, env_params, cal_params, waveform_mode): # cal params are those used by both complex and power data calibration # TODO: add complex data-specific params, like the freq-dependent gain factor self.env_params = dict.fromkeys(ENV_PARAMS) - self.cal_params = dict.fromkeys(CAL_PARAMS['EK']) + self.cal_params = dict.fromkeys(CAL_PARAMS["EK"]) # TODO: make waveform_mode and encode_mode class attributes # load env and cal parameters @@ -277,41 +317,63 @@ def get_env_params(self, env_params, waveform_mode=None): ``BB`` for BB-mode samples, recorded as complex samples """ # Use center frequency if in BB mode, else use nominal channel frequency - if waveform_mode == 'BB': - freq = (self.echodata.beam['frequency_start'] + self.echodata.beam['frequency_end']) / 2 + if waveform_mode == "BB": + freq = ( + self.echodata.beam["frequency_start"] + + self.echodata.beam["frequency_end"] + ) / 2 else: - freq = self.echodata.beam['frequency'] + freq = self.echodata.beam["frequency"] # Re-calculate environment parameters if user supply all env variables - if ('temperature' in env_params) and ('salinity' in env_params) and ('pressure' in env_params): - for p in ['temperature', 'salinity', 'pressure']: + if ( + ("temperature" in env_params) + and ("salinity" in env_params) + and ("pressure" in env_params) + ): + for p in ["temperature", "salinity", "pressure"]: self.env_params[p] = env_params[p] - self.env_params['sound_speed'] = uwa.calc_sound_speed(temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure']) - self.env_params['sound_absorption'] = uwa.calc_absorption(frequency=freq, - temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure']) + self.env_params["sound_speed"] = uwa.calc_sound_speed( + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + ) + self.env_params["sound_absorption"] = uwa.calc_absorption( + frequency=freq, + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + ) # Otherwise # get temperature, salinity, and pressure from raw data file # get sound speed from user inputs or raw data file # get absorption from user inputs or computing from env params stored in raw data file else: # pressure is encoded as "depth" in EK80 # TODO: change depth to pressure in EK80 file? - for p1, p2 in zip(['temperature', 'salinity', 'pressure'], - ['temperature', 'salinity', 'depth']): - self.env_params[p1] = env_params[p1] if p1 in env_params else self.echodata.environment[p2] - self.env_params['sound_speed'] = (env_params['sound_speed'] - if 'sound_speed' in env_params - else self.echodata.environment['sound_speed_indicative']) - self.env_params['sound_absorption'] = ( - env_params['sound_absorption'] - if 'sound_absorption' in env_params - else uwa.calc_absorption(frequency=freq, - temperature=self.env_params['temperature'], - salinity=self.env_params['salinity'], - pressure=self.env_params['pressure'])) + for p1, p2 in zip( + ["temperature", "salinity", "pressure"], + ["temperature", "salinity", "depth"], + ): + self.env_params[p1] = ( + env_params[p1] + if p1 in env_params + else self.echodata.environment[p2] + ) + self.env_params["sound_speed"] = ( + env_params["sound_speed"] + if "sound_speed" in env_params + else self.echodata.environment["sound_speed_indicative"] + ) + self.env_params["sound_absorption"] = ( + env_params["sound_absorption"] + if "sound_absorption" in env_params + else uwa.calc_absorption( + frequency=freq, + temperature=self.env_params["temperature"], + salinity=self.env_params["salinity"], + pressure=self.env_params["pressure"], + ) + ) def _get_vend_cal_params_complex(self, channel_id, filter_name, param_type): """Get filter coefficients stored in the Vendor group attributes. @@ -325,33 +387,46 @@ def _get_vend_cal_params_complex(self, channel_id, filter_name, param_type): param_type : str 'coeff' or 'decimation' """ - if param_type == 'coeff': - v = (self.echodata.vendor.attrs['%s %s filter_r' % (channel_id, filter_name)] - + 1j * np.array(self.echodata.vendor.attrs['%s %s filter_i' % (channel_id, filter_name)])) + if param_type == "coeff": + v = self.echodata.vendor.attrs[ + "%s %s filter_r" % (channel_id, filter_name) + ] + 1j * np.array( + self.echodata.vendor.attrs["%s %s filter_i" % (channel_id, filter_name)] + ) if v.size == 1: v = np.expand_dims(v, axis=0) # expand dims for convolution return v else: - return self.echodata.vendor.attrs['%s %s decimation' % (channel_id, filter_name)] - - def _tapered_chirp(self, transmit_duration_nominal, slope, transmit_power, - frequency=None, frequency_start=None, frequency_end=None): - """Create a baseline chirp template. - """ + return self.echodata.vendor.attrs[ + "%s %s decimation" % (channel_id, filter_name) + ] + + def _tapered_chirp( + self, + transmit_duration_nominal, + slope, + transmit_power, + frequency=None, + frequency_start=None, + frequency_end=None, + ): + """Create a baseline chirp template.""" if frequency_start is None and frequency_end is None: # CW waveform frequency_start = frequency frequency_end = frequency t = np.arange(0, transmit_duration_nominal, 1 / self.fs) - nwtx = (int(2 * np.floor(slope * t.size))) # length of tapering window + nwtx = int(2 * np.floor(slope * t.size)) # length of tapering window wtx_tmp = np.hanning(nwtx) # hanning window - nwtxh = (int(np.round(nwtx / 2))) # half length of the hanning window - wtx = np.concatenate([wtx_tmp[0:nwtxh], - np.ones((t.size - nwtx)), - wtx_tmp[nwtxh:]]) # assemble full tapering window - y_tmp = (np.sqrt((transmit_power / 4) * (2 * self.z_et)) # amplitude - * signal.chirp(t, frequency_start, t[-1], frequency_end) - * wtx) # taper and scale linear chirp + nwtxh = int(np.round(nwtx / 2)) # half length of the hanning window + wtx = np.concatenate( + [wtx_tmp[0:nwtxh], np.ones((t.size - nwtx)), wtx_tmp[nwtxh:]] + ) # assemble full tapering window + y_tmp = ( + np.sqrt((transmit_power / 4) * (2 * self.z_et)) # amplitude + * signal.chirp(t, frequency_start, t[-1], frequency_end) + * wtx + ) # taper and scale linear chirp return y_tmp / np.max(np.abs(y_tmp)), t # amp has no actual effect def _filter_decimate_chirp(self, y, ch_id): @@ -365,10 +440,10 @@ def _filter_decimate_chirp(self, y, ch_id): channel_id to select the right coefficients and factors """ # filter coefficients and decimation factor - wbt_fil = self._get_vend_cal_params_complex(ch_id, 'WBT', 'coeff') - pc_fil = self._get_vend_cal_params_complex(ch_id, 'PC', 'coeff') - wbt_decifac = self._get_vend_cal_params_complex(ch_id, 'WBT', 'decimation') - pc_decifac = self._get_vend_cal_params_complex(ch_id, 'PC', 'decimation') + wbt_fil = self._get_vend_cal_params_complex(ch_id, "WBT", "coeff") + pc_fil = self._get_vend_cal_params_complex(ch_id, "PC", "coeff") + wbt_decifac = self._get_vend_cal_params_complex(ch_id, "WBT", "decimation") + pc_decifac = self._get_vend_cal_params_complex(ch_id, "PC", "decimation") # WBT filter and decimation ytx_wbt = signal.convolve(y, wbt_fil) @@ -379,7 +454,9 @@ def _filter_decimate_chirp(self, y, ch_id): pc_fil = [pc_fil.squeeze()] ytx_pc = signal.convolve(ytx_wbt_deci, pc_fil) ytx_pc_deci = ytx_pc[0::pc_decifac] - ytx_pc_deci_time = np.arange(ytx_pc_deci.size) * 1 / self.fs * wbt_decifac * pc_decifac + ytx_pc_deci_time = ( + np.arange(ytx_pc_deci.size) * 1 / self.fs * wbt_decifac * pc_decifac + ) return ytx_pc_deci, ytx_pc_deci_time @@ -397,12 +474,16 @@ def _get_tau_effective(ytx, fs_deci, waveform_mode): ``CW`` for CW-mode samples, either recorded as complex or power samples ``BB`` for BB-mode samples, recorded as complex samples """ - if waveform_mode == 'BB': - ytxa = signal.convolve(ytx, np.flip(np.conj(ytx))) / np.linalg.norm(ytx) ** 2 + if waveform_mode == "BB": + ytxa = ( + signal.convolve(ytx, np.flip(np.conj(ytx))) / np.linalg.norm(ytx) ** 2 + ) ptxa = abs(ytxa) ** 2 - elif waveform_mode == 'CW': + elif waveform_mode == "CW": ptxa = np.abs(ytx) ** 2 # energy of transmit signal - return ptxa.sum() / (ptxa.max() * fs_deci) # TODO: verify fs_deci = 1.5e6 in spheroid data sets + return ptxa.sum() / ( + ptxa.max() * fs_deci + ) # TODO: verify fs_deci = 1.5e6 in spheroid data sets def get_transmit_chirp(self, waveform_mode): """Reconstruct transmit signal and compute effective pulse length. @@ -414,36 +495,52 @@ def get_transmit_chirp(self, waveform_mode): ``BB`` for BB-mode samples, recorded as complex samples """ # Make sure it is BB mode data - if waveform_mode == 'BB' \ - and (('frequency_start' not in self.echodata.beam) - or ('frequency_end' not in self.echodata.beam)): - raise TypeError('File does not contain BB mode complex samples!') + if waveform_mode == "BB" and ( + ("frequency_start" not in self.echodata.beam) + or ("frequency_end" not in self.echodata.beam) + ): + raise TypeError("File does not contain BB mode complex samples!") y_all = {} y_time_all = {} tau_effective = {} for freq in self.echodata.beam.frequency.values: # TODO: currently only deal with the case with a fixed tx key param values within a channel - if waveform_mode == 'BB': - tx_param_names = ['transmit_duration_nominal', 'slope', 'transmit_power', - 'frequency_start', 'frequency_end'] + if waveform_mode == "BB": + tx_param_names = [ + "transmit_duration_nominal", + "slope", + "transmit_power", + "frequency_start", + "frequency_end", + ] else: - tx_param_names = ['transmit_duration_nominal', 'slope', 'transmit_power', - 'frequency'] + tx_param_names = [ + "transmit_duration_nominal", + "slope", + "transmit_power", + "frequency", + ] tx_params = {} for p in tx_param_names: tx_params[p] = np.unique(self.echodata.beam[p].sel(frequency=freq)) if tx_params[p].size != 1: - raise TypeError('File contains changing %s!' % p) + raise TypeError("File contains changing %s!" % p) y_tmp, _ = self._tapered_chirp(**tx_params) # Filter and decimate chirp template - channel_id = str(self.echodata.beam.sel(frequency=freq)['channel_id'].values) - fs_deci = 1 / self.echodata.beam.sel(frequency=freq)['sample_interval'].values + channel_id = str( + self.echodata.beam.sel(frequency=freq)["channel_id"].values + ) + fs_deci = ( + 1 / self.echodata.beam.sel(frequency=freq)["sample_interval"].values + ) y_tmp, y_tmp_time = self._filter_decimate_chirp(y_tmp, channel_id) # Compute effective pulse length - tau_effective_tmp = self._get_tau_effective(y_tmp, fs_deci, waveform_mode=waveform_mode) + tau_effective_tmp = self._get_tau_effective( + y_tmp, fs_deci, waveform_mode=waveform_mode + ) y_all[channel_id] = y_tmp y_time_all[channel_id] = y_tmp_time @@ -459,22 +556,33 @@ def compress_pulse(self, chirp): chirp : dict transmit chirp replica indexed by channel_id """ - backscatter = self.echodata.beam['backscatter_r'] + 1j * self.echodata.beam['backscatter_i'] + backscatter = ( + self.echodata.beam["backscatter_r"] + + 1j * self.echodata.beam["backscatter_i"] + ) pc_all = [] for freq in self.echodata.beam.frequency.values: - backscatter_freq = (backscatter.sel(frequency=freq) - .dropna(dim='range_bin', how='all') - .dropna(dim='quadrant', how='all') - .dropna(dim='ping_time')) - channel_id = str(self.echodata.beam.sel(frequency=freq)['channel_id'].values) - replica = xr.DataArray(np.conj(chirp[channel_id]), dims='window') + backscatter_freq = ( + backscatter.sel(frequency=freq) + .dropna(dim="range_bin", how="all") + .dropna(dim="quadrant", how="all") + .dropna(dim="ping_time") + ) + channel_id = str( + self.echodata.beam.sel(frequency=freq)["channel_id"].values + ) + replica = xr.DataArray(np.conj(chirp[channel_id]), dims="window") # Pulse compression via rolling - pc = (backscatter_freq.rolling(range_bin=replica.size).construct('window').dot(replica) - / np.linalg.norm(chirp[channel_id]) ** 2) + pc = ( + backscatter_freq.rolling(range_bin=replica.size) + .construct("window") + .dot(replica) + / np.linalg.norm(chirp[channel_id]) ** 2 + ) # Expand dimension and add name to allow merge - pc = pc.expand_dims(dim='frequency') - pc.name = 'pulse_compressed_output' + pc = pc.expand_dims(dim="frequency") + pc.name = "pulse_compressed_output" pc_all.append(pc) pc_merge = xr.merge(pc_all) @@ -497,70 +605,101 @@ def _cal_complex(self, cal_type, waveform_mode): chirp, _, tau_effective = self.get_transmit_chirp(waveform_mode=waveform_mode) # pulse compression - if waveform_mode == 'BB': + if waveform_mode == "BB": pc = self.compress_pulse(chirp) - prx = (self.echodata.beam.quadrant.size - * np.abs(pc.mean(dim='quadrant')) ** 2 - / (2 * np.sqrt(2)) ** 2 - * (np.abs(self.z_er + self.z_et) / self.z_er) ** 2 - / self.z_et) + prx = ( + self.echodata.beam.quadrant.size + * np.abs(pc.mean(dim="quadrant")) ** 2 + / (2 * np.sqrt(2)) ** 2 + * (np.abs(self.z_er + self.z_et) / self.z_er) ** 2 + / self.z_et + ) else: - backscatter_cw = self.echodata.beam['backscatter_r'] + 1j * self.echodata.beam['backscatter_i'] - prx = (self.echodata.beam.quadrant.size - * np.abs(backscatter_cw.mean(dim='quadrant')) ** 2 - / (2 * np.sqrt(2)) ** 2 - * (np.abs(self.z_er + self.z_et) / self.z_er) ** 2 - / self.z_et) - prx.name = 'received_power' + backscatter_cw = ( + self.echodata.beam["backscatter_r"] + + 1j * self.echodata.beam["backscatter_i"] + ) + prx = ( + self.echodata.beam.quadrant.size + * np.abs(backscatter_cw.mean(dim="quadrant")) ** 2 + / (2 * np.sqrt(2)) ** 2 + * (np.abs(self.z_er + self.z_et) / self.z_er) ** 2 + / self.z_et + ) + prx.name = "received_power" prx = prx.to_dataset() # Derived params - sound_speed = self.env_params['sound_speed'].squeeze() + sound_speed = self.env_params["sound_speed"].squeeze() range_meter = self.range_meter freq_nominal = self.echodata.beam.frequency - if waveform_mode == 'BB': - freq_center = (self.echodata.beam['frequency_start'] + self.echodata.beam['frequency_end']) / 2 + if waveform_mode == "BB": + freq_center = ( + self.echodata.beam["frequency_start"] + + self.echodata.beam["frequency_end"] + ) / 2 wavelength = sound_speed / freq_center - elif waveform_mode == 'CW': + elif waveform_mode == "CW": wavelength = sound_speed / freq_nominal # gain = self.echodata.vendor['gain'] # TODO: need to interpolate gain to at freq_center gain = 27 # Transmission loss - spreading_loss = 20 * np.log10(range_meter.where(range_meter >= 1, other=1)).squeeze() - absorption_loss = 2 * self.env_params['sound_absorption'].squeeze() * range_meter.squeeze() + spreading_loss = ( + 20 * np.log10(range_meter.where(range_meter >= 1, other=1)).squeeze() + ) + absorption_loss = ( + 2 * self.env_params["sound_absorption"].squeeze() * range_meter.squeeze() + ) # TODO: both Sv and Sp are off by ~<0.5 dB from matlab outputs. # Is this due to the use of 'single' in matlab code? - if cal_type == 'Sv': + if cal_type == "Sv": # get equivalent beam angle - if waveform_mode == 'BB': - psifc = self.echodata.beam['equivalent_beam_angle'] + 10 * np.log10(freq_nominal / freq_center) - elif waveform_mode == 'CW': - psifc = self.echodata.beam['equivalent_beam_angle'] + if waveform_mode == "BB": + psifc = self.echodata.beam["equivalent_beam_angle"] + 10 * np.log10( + freq_nominal / freq_center + ) + elif waveform_mode == "CW": + psifc = self.echodata.beam["equivalent_beam_angle"] # effective pulse length - tau_effective = xr.DataArray(data=list(tau_effective.values()), - coords=[self.echodata.beam.frequency, - self.echodata.beam.ping_time], - dims=['frequency', 'ping_time']) - out = (10 * np.log10(prx) - + spreading_loss + absorption_loss - - 10 * np.log10(wavelength ** 2 - * self.echodata.beam['transmit_power'] - * sound_speed - / (32 * np.pi ** 2)) - - 2 * gain - 10 * np.log10(tau_effective) - psifc) - out = out.rename_vars({list(out.data_vars.keys())[0]: 'Sv'}) - - elif cal_type == 'Sp': - out = (10 * np.log10(prx) - + 2 * spreading_loss + absorption_loss - - 10 * np.log10(wavelength ** 2 - * self.echodata.beam['transmit_power'] - / (16 * np.pi ** 2)) - - 2 * gain) - out = out.rename_vars({list(out.data_vars.keys())[0]: 'Sp'}) + tau_effective = xr.DataArray( + data=list(tau_effective.values()), + coords=[self.echodata.beam.frequency, self.echodata.beam.ping_time], + dims=["frequency", "ping_time"], + ) + out = ( + 10 * np.log10(prx) + + spreading_loss + + absorption_loss + - 10 + * np.log10( + wavelength ** 2 + * self.echodata.beam["transmit_power"] + * sound_speed + / (32 * np.pi ** 2) + ) + - 2 * gain + - 10 * np.log10(tau_effective) + - psifc + ) + out = out.rename_vars({list(out.data_vars.keys())[0]: "Sv"}) + + elif cal_type == "Sp": + out = ( + 10 * np.log10(prx) + + 2 * spreading_loss + + absorption_loss + - 10 + * np.log10( + wavelength ** 2 + * self.echodata.beam["transmit_power"] + / (16 * np.pi ** 2) + ) + - 2 * gain + ) + out = out.rename_vars({list(out.data_vars.keys())[0]: "Sp"}) # Attach calculated range (with units meter) into data set out = out.merge(range_meter) @@ -591,21 +730,23 @@ def _compute_cal(self, cal_type, waveform_mode, encode_mode): Dataset containing either Sv or Sp. """ # Raise error for wrong inputs - if waveform_mode not in ('BB', 'CW'): - raise ValueError('Input waveform_mode not recognized!') - if encode_mode not in ('complex', 'power'): - raise ValueError('Input encode_mode not recognized!') + if waveform_mode not in ("BB", "CW"): + raise ValueError("Input waveform_mode not recognized!") + if encode_mode not in ("complex", "power"): + raise ValueError("Input encode_mode not recognized!") # Set flag_complex # - True: complex cal # - False: power cal # BB: complex only, CW: complex or power - if waveform_mode == 'BB': - if encode_mode == 'power': # BB waveform forces to collect complex samples - raise ValueError("encode_mode='power' not allowed when waveform_mode='BB'!") + if waveform_mode == "BB": + if encode_mode == "power": # BB waveform forces to collect complex samples + raise ValueError( + "encode_mode='power' not allowed when waveform_mode='BB'!" + ) flag_complex = True else: - if encode_mode == 'complex': + if encode_mode == "complex": flag_complex = True else: flag_complex = False @@ -619,30 +760,42 @@ def _compute_cal(self, cal_type, waveform_mode, encode_mode): # Warn user about additional data in the raw file if another type exists if self.echodata.beam_power is not None: # both power and complex samples exist - if encode_mode == 'power': + if encode_mode == "power": use_beam_power = True # switch source of backscatter data - print('Only power samples are calibrated, but complex samples also exist in the raw data file!') + print( + "Only power samples are calibrated, but complex samples also exist in the raw data file!" + ) else: - print('Only complex samples are calibrated, but power samples also exist in the raw data file!') + print( + "Only complex samples are calibrated, but power samples also exist in the raw data file!" + ) else: # only power OR complex samples exist - if 'quadrant' in self.echodata.beam.dims: # data contain only complex samples - if encode_mode == 'power': - raise TypeError('File does not contain power samples!') # user selects the wrong encode_mode + if ( + "quadrant" in self.echodata.beam.dims + ): # data contain only complex samples + if encode_mode == "power": + raise TypeError( + "File does not contain power samples!" + ) # user selects the wrong encode_mode else: # data contain only power samples - if encode_mode == 'complex': - raise TypeError('File does not contain complex samples!') # user selects the wrong encode_mode + if encode_mode == "complex": + raise TypeError( + "File does not contain complex samples!" + ) # user selects the wrong encode_mode # Compute Sv if flag_complex: - self.compute_range_meter(waveform_mode=waveform_mode, tvg_correction_factor=0) + self.compute_range_meter( + waveform_mode=waveform_mode, tvg_correction_factor=0 + ) ds_cal = self._cal_complex(cal_type=cal_type, waveform_mode=waveform_mode) else: - self.compute_range_meter(waveform_mode='CW', tvg_correction_factor=0) + self.compute_range_meter(waveform_mode="CW", tvg_correction_factor=0) ds_cal = self._cal_power(cal_type=cal_type, use_beam_power=use_beam_power) return ds_cal - def compute_Sv(self, waveform_mode='BB', encode_mode='complex'): + def compute_Sv(self, waveform_mode="BB", encode_mode="complex"): """Compute volume backscattering strength (Sv). Parameters @@ -663,9 +816,11 @@ def compute_Sv(self, waveform_mode='BB', encode_mode='complex'): A DataSet containing volume backscattering strength (``Sv``) and the corresponding range (``range``) in units meter. """ - return self._compute_cal(cal_type='Sv', waveform_mode=waveform_mode, encode_mode=encode_mode) + return self._compute_cal( + cal_type="Sv", waveform_mode=waveform_mode, encode_mode=encode_mode + ) - def compute_Sp(self, waveform_mode='BB', encode_mode='complex'): + def compute_Sp(self, waveform_mode="BB", encode_mode="complex"): """Compute point backscattering strength (Sp). Parameters @@ -686,4 +841,6 @@ def compute_Sp(self, waveform_mode='BB', encode_mode='complex'): A DataSet containing point backscattering strength (``Sp``) and the corresponding range (``range``) in units meter. """ - return self._compute_cal(cal_type='Sp', waveform_mode=waveform_mode, encode_mode=encode_mode) + return self._compute_cal( + cal_type="Sp", waveform_mode=waveform_mode, encode_mode=encode_mode + ) diff --git a/echopype/convert/__init__.py b/echopype/convert/__init__.py index 985e81ffa..4ab1b41c9 100644 --- a/echopype/convert/__init__.py +++ b/echopype/convert/__init__.py @@ -7,13 +7,13 @@ - Simrad EK80 echosounder ``.raw`` data - ASL Environmental Sciences AZFP echosounder ``.01A`` data """ -from .convert import Convert, ConvertEK80 # TODO remove ConvertEK80 in later version -from .parse_ek60 import ParseEK60 -from .parse_ek80 import ParseEK80 -from .parse_azfp import ParseAZFP +from .convert import Convert, ConvertEK80 # TODO remove ConvertEK80 in later version from .parse_ad2cp import ParseAd2cp +from .parse_azfp import ParseAZFP from .parse_base import ParseBase +from .parse_ek60 import ParseEK60 +from .parse_ek80 import ParseEK80 +from .set_groups_ad2cp import SetGroupsAd2cp from .set_groups_azfp import SetGroupsAZFP from .set_groups_ek60 import SetGroupsEK60 from .set_groups_ek80 import SetGroupsEK80 -from .set_groups_ad2cp import SetGroupsAd2cp \ No newline at end of file diff --git a/echopype/convert/add_ancillary.py b/echopype/convert/add_ancillary.py index eed7999d8..f3f90e77f 100644 --- a/echopype/convert/add_ancillary.py +++ b/echopype/convert/add_ancillary.py @@ -1,6 +1,3 @@ - - - def update_platform(self, files=None, extra_platform_data=None): """ Parameters @@ -21,17 +18,17 @@ def update_platform(self, files=None, extra_platform_data=None): # saildrone specific hack if "trajectory" in extra_platform_data: extra_platform_data = extra_platform_data.isel(trajectory=0).drop("trajectory") - extra_platform_data = extra_platform_data.swap_dims({'obs': 'time'}) + extra_platform_data = extra_platform_data.swap_dims({"obs": "time"}) # Try to find the time dimension in the extra_platform_data - possible_time_keys = ['time', 'ping_time', 'location_time'] - time_name = '' + possible_time_keys = ["time", "ping_time", "location_time"] + time_name = "" for k in possible_time_keys: if k in extra_platform_data: time_name = k break if not time_name: - raise ValueError('Time dimension not found') + raise ValueError("Time dimension not found") for f in files: ds_beam = xr.open_dataset(f, group="Beam", engine=engine) @@ -41,7 +38,9 @@ def update_platform(self, files=None, extra_platform_data=None): # start_time, end_time = min(ds_beam["ping_time"]), max(ds_beam["ping_time"]) start_time, end_time = ds_beam.ping_time.min(), ds_beam.ping_time.max() - extra_platform_data = extra_platform_data.sel({time_name: slice(start_time, end_time)}) + extra_platform_data = extra_platform_data.sel( + {time_name: slice(start_time, end_time)} + ) def mapping_get_multiple(mapping, keys, default=None): for key in keys: @@ -49,48 +48,128 @@ def mapping_get_multiple(mapping, keys, default=None): return mapping[key] return default - if self.sonar_model in ['EK80', 'EA640']: - ds_platform = ds_platform.reindex({ - "mru_time": extra_platform_data[time_name].values, - "location_time": extra_platform_data[time_name].values, - }) + if self.sonar_model in ["EK80", "EA640"]: + ds_platform = ds_platform.reindex( + { + "mru_time": extra_platform_data[time_name].values, + "location_time": extra_platform_data[time_name].values, + } + ) # merge extra platform data num_obs = len(extra_platform_data[time_name]) - ds_platform = ds_platform.update({ - "pitch": ("mru_time", mapping_get_multiple( - extra_platform_data, ["pitch", "PITCH"], np.full(num_obs, np.nan))), - "roll": ("mru_time", mapping_get_multiple( - extra_platform_data, ["roll", "ROLL"], np.full(num_obs, np.nan))), - "heave": ("mru_time", mapping_get_multiple( - extra_platform_data, ["heave", "HEAVE"], np.full(num_obs, np.nan))), - "latitude": ("location_time", mapping_get_multiple( - extra_platform_data, ["lat", "latitude", "LATITUDE"], default=np.full(num_obs, np.nan))), - "longitude": ("location_time", mapping_get_multiple( - extra_platform_data, ["lon", "longitude", "LONGITUDE"], default=np.full(num_obs, np.nan))), - "water_level": ("location_time", mapping_get_multiple( - extra_platform_data, ["water_level", "WATER_LEVEL"], np.ones(num_obs))) - }) - elif self.sonar_model == 'EK60': - ds_platform = ds_platform.reindex({ - "ping_time": extra_platform_data[time_name].values, - "location_time": extra_platform_data[time_name].values, - }) + ds_platform = ds_platform.update( + { + "pitch": ( + "mru_time", + mapping_get_multiple( + extra_platform_data, + ["pitch", "PITCH"], + np.full(num_obs, np.nan), + ), + ), + "roll": ( + "mru_time", + mapping_get_multiple( + extra_platform_data, + ["roll", "ROLL"], + np.full(num_obs, np.nan), + ), + ), + "heave": ( + "mru_time", + mapping_get_multiple( + extra_platform_data, + ["heave", "HEAVE"], + np.full(num_obs, np.nan), + ), + ), + "latitude": ( + "location_time", + mapping_get_multiple( + extra_platform_data, + ["lat", "latitude", "LATITUDE"], + default=np.full(num_obs, np.nan), + ), + ), + "longitude": ( + "location_time", + mapping_get_multiple( + extra_platform_data, + ["lon", "longitude", "LONGITUDE"], + default=np.full(num_obs, np.nan), + ), + ), + "water_level": ( + "location_time", + mapping_get_multiple( + extra_platform_data, + ["water_level", "WATER_LEVEL"], + np.ones(num_obs), + ), + ), + } + ) + elif self.sonar_model == "EK60": + ds_platform = ds_platform.reindex( + { + "ping_time": extra_platform_data[time_name].values, + "location_time": extra_platform_data[time_name].values, + } + ) # merge extra platform data num_obs = len(extra_platform_data[time_name]) - ds_platform = ds_platform.update({ - "pitch": ("ping_time", mapping_get_multiple( - extra_platform_data, ["pitch", "PITCH"], np.full(num_obs, np.nan))), - "roll": ("ping_time", mapping_get_multiple( - extra_platform_data, ["roll", "ROLL"], np.full(num_obs, np.nan))), - "heave": ("ping_time", mapping_get_multiple( - extra_platform_data, ["heave", "HEAVE"], np.full(num_obs, np.nan))), - "latitude": ("location_time", mapping_get_multiple( - extra_platform_data, ["lat", "latitude", "LATITUDE"], default=np.full(num_obs, np.nan))), - "longitude": ("location_time", mapping_get_multiple( - extra_platform_data, ["lon", "longitude", "LONGITUDE"], default=np.full(num_obs, np.nan))), - "water_level": ("location_time", mapping_get_multiple( - extra_platform_data, ["water_level", "WATER_LEVEL"], np.ones(num_obs))) - }) + ds_platform = ds_platform.update( + { + "pitch": ( + "ping_time", + mapping_get_multiple( + extra_platform_data, + ["pitch", "PITCH"], + np.full(num_obs, np.nan), + ), + ), + "roll": ( + "ping_time", + mapping_get_multiple( + extra_platform_data, + ["roll", "ROLL"], + np.full(num_obs, np.nan), + ), + ), + "heave": ( + "ping_time", + mapping_get_multiple( + extra_platform_data, + ["heave", "HEAVE"], + np.full(num_obs, np.nan), + ), + ), + "latitude": ( + "location_time", + mapping_get_multiple( + extra_platform_data, + ["lat", "latitude", "LATITUDE"], + default=np.full(num_obs, np.nan), + ), + ), + "longitude": ( + "location_time", + mapping_get_multiple( + extra_platform_data, + ["lon", "longitude", "LONGITUDE"], + default=np.full(num_obs, np.nan), + ), + ), + "water_level": ( + "location_time", + mapping_get_multiple( + extra_platform_data, + ["water_level", "WATER_LEVEL"], + np.ones(num_obs), + ), + ), + } + ) # need to close the file in order to remove it # (and need to close the file so to_netcdf can write to it) @@ -102,12 +181,12 @@ def mapping_get_multiple(mapping, keys, default=None): # Copy groups over to temporary file # TODO: Add in documentation: recommended to use Zarr if using add_platform new_dataset_filename = f + ".temp" - groups = ['Provenance', 'Environment', 'Beam', 'Sonar', 'Vendor'] + groups = ["Provenance", "Environment", "Beam", "Sonar", "Vendor"] with xr.open_dataset(f) as ds_top: - ds_top.to_netcdf(new_dataset_filename, mode='w') + ds_top.to_netcdf(new_dataset_filename, mode="w") for group in groups: with xr.open_dataset(f, group=group) as ds: - ds.to_netcdf(new_dataset_filename, mode='a', group=group) + ds.to_netcdf(new_dataset_filename, mode="a", group=group) ds_platform.to_netcdf(new_dataset_filename, mode="a", group="Platform") # Replace original file with temporary file os.remove(f) diff --git a/echopype/convert/api.py b/echopype/convert/api.py index fdfcdf888..8bd9dd6d8 100644 --- a/echopype/convert/api.py +++ b/echopype/convert/api.py @@ -1,24 +1,22 @@ +import os import warnings from datetime import datetime as dt from pathlib import Path -import os import fsspec -from fsspec.implementations.local import LocalFileSystem import zarr +from fsspec.implementations.local import LocalFileSystem +from ..echodata.echodata import XARRAY_ENGINE_MAP, EchoData from ..utils import io - -from ..echodata.echodata import EchoData, XARRAY_ENGINE_MAP - +from .parse_ad2cp import ParseAd2cp from .parse_azfp import ParseAZFP from .parse_ek60 import ParseEK60 from .parse_ek80 import ParseEK80 -from .parse_ad2cp import ParseAd2cp +from .set_groups_ad2cp import SetGroupsAd2cp from .set_groups_azfp import SetGroupsAZFP from .set_groups_ek60 import SetGroupsEK60 from .set_groups_ek80 import SetGroupsEK80 -from .set_groups_ad2cp import SetGroupsAd2cp MODELS = { "AZFP": { @@ -27,18 +25,38 @@ "parser": ParseAZFP, "set_groups": SetGroupsAZFP, }, - "EK60": {"ext": ".raw", "xml": False, "parser": ParseEK60, "set_groups": SetGroupsEK60}, - "EK80": {"ext": ".raw", "xml": False, "parser": ParseEK80, "set_groups": SetGroupsEK80}, - "EA640": {"ext": ".raw", "xml": False, "parser": ParseEK80, "set_groups": SetGroupsEK80}, - "AD2CP": {"ext": ".ad2cp", "xml": False, "parser": ParseAd2cp, "set_groups": SetGroupsAd2cp}, + "EK60": { + "ext": ".raw", + "xml": False, + "parser": ParseEK60, + "set_groups": SetGroupsEK60, + }, + "EK80": { + "ext": ".raw", + "xml": False, + "parser": ParseEK80, + "set_groups": SetGroupsEK80, + }, + "EA640": { + "ext": ".raw", + "xml": False, + "parser": ParseEK80, + "set_groups": SetGroupsEK80, + }, + "AD2CP": { + "ext": ".ad2cp", + "xml": False, + "parser": ParseAd2cp, + "set_groups": SetGroupsAd2cp, + }, } COMPRESSION_SETTINGS = { - 'netcdf4': {'zlib': True, 'complevel': 4}, - 'zarr': {'compressor': zarr.Blosc(cname='zstd', clevel=3, shuffle=2)}, + "netcdf4": {"zlib": True, "complevel": 4}, + "zarr": {"compressor": zarr.Blosc(cname="zstd", clevel=3, shuffle=2)}, } -DEFAULT_CHUNK_SIZE = {'range_bin': 25000, 'ping_time': 2500} +DEFAULT_CHUNK_SIZE = {"range_bin": 25000, "ping_time": 2500} NMEA_SENTENCE_DEFAULT = ["GGA", "GLL", "RMC"] @@ -50,9 +68,7 @@ def _normalize_path(out_f, convert_type, output_storage_options): return out_f -def _validate_path( - source_file, file_format, output_storage_options={}, save_path=None -): +def _validate_path(source_file, file_format, output_storage_options={}, save_path=None): """Assemble output file names and path. Parameters @@ -79,7 +95,7 @@ def _validate_path( else: if not isinstance(save_path, Path) and not isinstance(save_path, str): - raise TypeError('save_path must be a string or Path') + raise TypeError("save_path must be a string or Path") fsmap = fsspec.get_mapper(str(save_path), **output_storage_options) output_fs = fsmap.fs @@ -145,9 +161,7 @@ def to_file( Additional keywords to pass to the filesystem class. """ if parallel: - raise NotImplementedError( - "Parallel conversion is not yet implemented." - ) + raise NotImplementedError("Parallel conversion is not yet implemented.") if engine not in XARRAY_ENGINE_MAP.values(): raise ValueError("Unknown type to convert file to!") @@ -161,9 +175,7 @@ def to_file( ) # Get all existing files - fs = fsspec.get_mapper( - output_file, **output_storage_options - ).fs # get file system + fs = fsspec.get_mapper(output_file, **output_storage_options).fs # get file system exists = True if fs.exists(output_file) else False # Sequential or parallel conversion @@ -179,9 +191,7 @@ def to_file( print(f"{dt.now().strftime('%H:%M:%S')} saving {output_file}") _save_groups_to_file( echodata, - output_path=_normalize_path( - output_file, engine, output_storage_options - ), + output_path=_normalize_path(output_file, engine, output_storage_options), engine=engine, compress=compress, ) @@ -195,34 +205,34 @@ def _save_groups_to_file(echodata, output_path, engine, compress=True): # TODO: in terms of chunking, would using rechunker at the end be faster and more convenient? # Top-level group - io.save_file(echodata.top, path=output_path, mode='w', engine=engine) + io.save_file(echodata.top, path=output_path, mode="w", engine=engine) # Provenance group io.save_file( echodata.provenance, path=output_path, - group='Provenance', - mode='a', + group="Provenance", + mode="a", engine=engine, ) # Environment group io.save_file( echodata.environment.chunk( - {'ping_time': DEFAULT_CHUNK_SIZE['ping_time']} + {"ping_time": DEFAULT_CHUNK_SIZE["ping_time"]} ), # TODO: chunking necessary? path=output_path, - mode='a', + mode="a", engine=engine, - group='Environment', + group="Environment", ) # Sonar group io.save_file( echodata.sonar, path=output_path, - group='Sonar', - mode='a', + group="Sonar", + mode="a", engine=engine, ) @@ -231,48 +241,42 @@ def _save_groups_to_file(echodata, output_path, engine, compress=True): io.save_file( echodata.beam.chunk( { - 'ping_time': DEFAULT_CHUNK_SIZE['ping_time'], + "ping_time": DEFAULT_CHUNK_SIZE["ping_time"], } ), path=output_path, - mode='a', + mode="a", engine=engine, - group='Beam', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Beam", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) else: io.save_file( echodata.beam.chunk( { - 'range_bin': DEFAULT_CHUNK_SIZE['range_bin'], - 'ping_time': DEFAULT_CHUNK_SIZE['ping_time'], + "range_bin": DEFAULT_CHUNK_SIZE["range_bin"], + "ping_time": DEFAULT_CHUNK_SIZE["ping_time"], } ), path=output_path, - mode='a', + mode="a", engine=engine, - group='Beam', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Beam", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) if echodata.beam_power is not None: io.save_file( echodata.beam_power.chunk( { - 'range_bin': DEFAULT_CHUNK_SIZE['range_bin'], - 'ping_time': DEFAULT_CHUNK_SIZE['ping_time'], + "range_bin": DEFAULT_CHUNK_SIZE["range_bin"], + "ping_time": DEFAULT_CHUNK_SIZE["ping_time"], } ), path=output_path, - mode='a', + mode="a", engine=engine, - group='Beam_power', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Beam_power", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) # Beam Complex Group: only AD2CP has Beam Complex @@ -280,63 +284,53 @@ def _save_groups_to_file(echodata, output_path, engine, compress=True): io.save_file( echodata.beam_complex, path=output_path, - mode='a', + mode="a", engine=engine, - group='Beam', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Beam", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) # Platform group io.save_file( echodata.platform, # TODO: chunking necessary? location_time and mru_time (EK80) only path=output_path, - mode='a', + mode="a", engine=engine, - group='Platform', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Platform", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) # Platform/NMEA group: some sonar model does not produce NMEA data - if hasattr(echodata, 'nmea'): + if hasattr(echodata, "nmea"): io.save_file( echodata.nmea, # TODO: chunking necessary? path=output_path, - mode='a', + mode="a", engine=engine, - group='Platform/NMEA', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Platform/NMEA", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) # Vendor-specific group if "ping_time" in echodata.vendor: io.save_file( echodata.vendor.chunk( - {'ping_time': DEFAULT_CHUNK_SIZE['ping_time']} + {"ping_time": DEFAULT_CHUNK_SIZE["ping_time"]} ), # TODO: chunking necessary? path=output_path, - mode='a', + mode="a", engine=engine, - group='Vendor', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Vendor", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) else: io.save_file( echodata.vendor, # TODO: chunking necessary? path=output_path, - mode='a', + mode="a", engine=engine, - group='Vendor', - compression_settings=COMPRESSION_SETTINGS[engine] - if compress - else None, + group="Vendor", + compression_settings=COMPRESSION_SETTINGS[engine] if compress else None, ) @@ -410,15 +404,11 @@ def _check_file(raw_file, sonar_model, xml_path=None, storage_options={}): raise ValueError(f"XML file is required for {sonar_model} raw data") else: if ".XML" not in Path(xml_path).suffix.upper(): - raise ValueError( - f"{Path(xml_path).name} is not an XML file" - ) + raise ValueError(f"{Path(xml_path).name} is not an XML file") xmlmap = fsspec.get_mapper(str(xml_path), **storage_options) if not xmlmap.fs.exists(xmlmap.root): - raise FileNotFoundError( - f"There is no file named {Path(xml_path).name}" - ) + raise FileNotFoundError(f"There is no file named {Path(xml_path).name}") xml = xml_path else: @@ -429,14 +419,10 @@ def _check_file(raw_file, sonar_model, xml_path=None, storage_options={}): fsmap = fsspec.get_mapper(raw_file, **storage_options) ext = MODELS[sonar_model]["ext"] if not fsmap.fs.exists(fsmap.root): - raise FileNotFoundError( - f"There is no file named {Path(raw_file).name}" - ) + raise FileNotFoundError(f"There is no file named {Path(raw_file).name}") if Path(raw_file).suffix.upper() != ext.upper(): - raise ValueError( - f"Expecting a {ext} file but got {raw_file}" - ) + raise ValueError(f"Expecting a {ext} file but got {raw_file}") return str(raw_file), str(xml) @@ -517,9 +503,7 @@ def open_raw( raise TypeError("file must be a string or Path") # Check file extension and existence - file_chk, xml_chk = _check_file( - raw_file, sonar_model, xml_path, storage_options - ) + file_chk, xml_chk = _check_file(raw_file, sonar_model, xml_path, storage_options) # TODO: the if-else below only works for the AZFP vs EK contrast, # but is brittle since it is abusing params by using it implicitly @@ -541,13 +525,11 @@ def open_raw( params=_set_convert_params(convert_params), ) # Set up echodata object - echodata = EchoData( - source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model - ) + echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model) # Top-level date_created varies depending on sonar model if sonar_model in ["EK60", "EK80"]: echodata.top = setgrouper.set_toplevel( - sonar_model=sonar_model, date_created=parser.config_datagram['timestamp'] + sonar_model=sonar_model, date_created=parser.config_datagram["timestamp"] ) else: echodata.top = setgrouper.set_toplevel( diff --git a/echopype/convert/convert.py b/echopype/convert/convert.py index 17ee15c30..9ac445162 100644 --- a/echopype/convert/convert.py +++ b/echopype/convert/convert.py @@ -1,7 +1,6 @@ """ UI class for converting raw data from different echosounders to netcdf or zarr. """ -from echopype.echodata.echodata import EchoData import os import warnings from datetime import datetime as dt @@ -10,17 +9,18 @@ import fsspec from fsspec.implementations.local import LocalFileSystem +from echopype.echodata.echodata import EchoData + from ..utils import io +from .api import MODELS, open_raw +from .parse_ad2cp import ParseAd2cp from .parse_azfp import ParseAZFP from .parse_ek60 import ParseEK60 from .parse_ek80 import ParseEK80 -from .parse_ad2cp import ParseAd2cp +from .set_groups_ad2cp import SetGroupsAd2cp from .set_groups_azfp import SetGroupsAZFP from .set_groups_ek60 import SetGroupsEK60 from .set_groups_ek80 import SetGroupsEK80 -from .set_groups_ad2cp import SetGroupsAd2cp -from .api import open_raw, MODELS - warnings.simplefilter("always", DeprecationWarning) diff --git a/echopype/convert/convert_combine.py b/echopype/convert/convert_combine.py index 4ed2e384e..d6e205bb2 100644 --- a/echopype/convert/convert_combine.py +++ b/echopype/convert/convert_combine.py @@ -16,12 +16,17 @@ def get_combined_save_path(source_paths): def get_combined_fname(path): fname, ext = os.path.splitext(path) - return fname + '__combined' + ext + return fname + "__combined" + ext sample_file = source_paths[0] - save_path = get_combined_fname(sample_file.root) if isinstance(sample_file, FSMap) else get_combined_fname \ - (sample_file) - if isinstance(sample_file, FSMap) and not isinstance(sample_file.fs, LocalFileSystem): + save_path = ( + get_combined_fname(sample_file.root) + if isinstance(sample_file, FSMap) + else get_combined_fname(sample_file) + ) + if isinstance(sample_file, FSMap) and not isinstance( + sample_file.fs, LocalFileSystem + ): return sample_file.fs.get_mapper(save_path) return save_path @@ -32,34 +37,38 @@ def remove_indiv_files(path): path.fs.delete(path.root, recursive=True) else: fname, ext = os.path.splitext(path) - if ext == '.zarr': + if ext == ".zarr": shutil.rmtree(path) else: os.remove(path) def assemble_combined_provenance(input_paths): - prov_dict = {'conversion_software_name': 'echopype', - 'conversion_software_version': ECHOPYPE_VERSION, - 'conversion_time': dt.utcnow().isoformat(timespec='seconds') + 'Z', # use UTC time - 'src_filenames': input_paths} + prov_dict = { + "conversion_software_name": "echopype", + "conversion_software_version": ECHOPYPE_VERSION, + "conversion_time": dt.utcnow().isoformat(timespec="seconds") + + "Z", # use UTC time + "src_filenames": input_paths, + } ds = xr.Dataset() return ds.assign_attrs(prov_dict) def perform_combination(sonar_model, input_paths, output_path, engine): - """Opens a list of Netcdf/Zarr files as a single dataset and saves it to a single file. - """ + """Opens a list of Netcdf/Zarr files as a single dataset and saves it to a single file.""" # TODO: there should be compression option for the combined file too... def coerce_type(ds, group): - if group == 'Beam': - if sonar_model == 'EK80': - ds['transceiver_software_version'] = ds['transceiver_software_version'].astype(' Dict[str, Dict[str, Any]]: def get_pulse_compressed(self): for i in range(1, 3 + 1): - if ( - "GETECHO" in self.config - and self.config["GETECHO"][f"PULSECOMP{i}"] > 0 - ): + if "GETECHO" in self.config and self.config["GETECHO"][f"PULSECOMP{i}"] > 0: return i return 0 class Ad2cpDataPacket: """ - Represents a single data packet. Each data packet consists of a header data record followed by a + Represents a single data packet. Each data packet consists of a header data record followed by a """ def __init__( @@ -433,7 +430,10 @@ def _read_data_record(self, f: BinaryIO): elif self.data_exclude["id"] in (0x17, 0x1B): # bottom track self.data_record_type = DataRecordType.BOTTOM_TRACK elif self.data_exclude["id"] in (0x23, 0x24): # echosounder raw - if self.parser.get_pulse_compressed() > 0 and len(self.parser.echosounder_raw_transmit_packets) == 0: + if ( + self.parser.get_pulse_compressed() > 0 + and len(self.parser.echosounder_raw_transmit_packets) == 0 + ): # first echosounder raw packet is the transmit packet # if pulse compression is enabled self.data_record_type = DataRecordType.ECHOSOUNDER_RAW_TRANSMIT @@ -566,7 +566,7 @@ def _read_exact(f: BinaryIO, total_num_bytes_to_read: int) -> bytes: (see https://man7.org/linux/man-pages/man2/read.2.html#RETURN_VALUE, note "It is not an error if this number is smaller than the number of bytes requested") (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/read?view=msvc-160#return-value, - note "_read returns the number of bytes read, + note "_read returns the number of bytes read, which might be less than buffer_size...if the file was opened in text mode") """ @@ -597,7 +597,7 @@ def _postprocess(self, field_name): "data_record_checksum", "header_checksum", "version", - "offset_of_data" + "offset_of_data", ): self.data_exclude[field_name] = self.data[field_name] del self.data[field_name] diff --git a/echopype/convert/parse_azfp.py b/echopype/convert/parse_azfp.py index a4c6c41fa..48f6e354d 100644 --- a/echopype/convert/parse_azfp.py +++ b/echopype/convert/parse_azfp.py @@ -1,20 +1,21 @@ -from collections import defaultdict -import xml.dom.minidom -from struct import unpack -from datetime import datetime as dt -from datetime import timezone import math -import numpy as np import os +import xml.dom.minidom +from collections import defaultdict +from datetime import datetime as dt, timezone +from struct import unpack + import fsspec +import numpy as np + from .parse_base import ParseBase -FILENAME_DATETIME_AZFP = '\\w+.01A' +FILENAME_DATETIME_AZFP = "\\w+.01A" class ParseAZFP(ParseBase): - """Class for converting data from ASL Environmental Sciences AZFP echosounder. - """ + """Class for converting data from ASL Environmental Sciences AZFP echosounder.""" + def __init__(self, file, params, storage_options={}): super().__init__(file, storage_options) # Parent class attributes @@ -24,30 +25,59 @@ def __init__(self, file, params, storage_options={}): # Class attributes self.parameters = dict() self.unpacked_data = defaultdict(list) - self.sonar_type = 'AZFP' + self.sonar_type = "AZFP" def load_AZFP_xml(self): """Parse XML file to get params for reading AZFP data.""" """Parses the AZFP XML file. """ + def get_value_by_tag_name(tag_name, element=0): """Returns the value in an XML tag given the tag name and the number of occurrences.""" return px.getElementsByTagName(tag_name)[element].childNodes[0].data + # TODO: consider writing a ParamAZFPxml class for storing parameters xmlmap = fsspec.get_mapper(self.xml_path, **self.storage_options) px = xml.dom.minidom.parse(xmlmap.fs.open(xmlmap.root)) - int_params = {'NumFreq': 'num_freq', 'SerialNumber': 'serial_number', - 'BurstInterval': 'burst_interval', - 'PingsPerBurst': 'pings_per_burst', 'AverageBurstPings': 'average_burst_pings', - 'SensorsFlag': 'sensors_flag'} - float_params = ['ka', 'kb', 'kc', 'A', 'B', 'C', # Temperature coeffs - 'X_a', 'X_b', 'X_c', 'X_d', 'Y_a', 'Y_b', 'Y_c', 'Y_d'] # Tilt coeffs] - freq_params = {'RangeSamples': 'range_samples', 'RangeAveragingSamples': 'range_averaging_samples', - 'DigRate': 'dig_rate', 'LockOutIndex': 'lockout_index', - 'Gain': 'gain', 'PulseLen': 'pulse_length', 'DS': 'DS', - 'EL': 'EL', 'TVR': 'TVR', 'VTX0': 'VTX', 'BP': 'BP'} + int_params = { + "NumFreq": "num_freq", + "SerialNumber": "serial_number", + "BurstInterval": "burst_interval", + "PingsPerBurst": "pings_per_burst", + "AverageBurstPings": "average_burst_pings", + "SensorsFlag": "sensors_flag", + } + float_params = [ + "ka", + "kb", + "kc", + "A", + "B", + "C", # Temperature coeffs + "X_a", + "X_b", + "X_c", + "X_d", + "Y_a", + "Y_b", + "Y_c", + "Y_d", + ] # Tilt coeffs] + freq_params = { + "RangeSamples": "range_samples", + "RangeAveragingSamples": "range_averaging_samples", + "DigRate": "dig_rate", + "LockOutIndex": "lockout_index", + "Gain": "gain", + "PulseLen": "pulse_length", + "DS": "DS", + "EL": "EL", + "TVR": "TVR", + "VTX0": "VTX", + "BP": "BP", + } # Retreive integer parameters from the xml file for old_name, new_name in int_params.items(): @@ -57,8 +87,10 @@ def get_value_by_tag_name(tag_name, element=0): self.parameters[param] = float(get_value_by_tag_name(param)) # Retrieve frequency dependent parameters from the xml file for old_name, new_name in freq_params.items(): - self.parameters[new_name] = [float(get_value_by_tag_name(old_name, ch)) for - ch in range(self.parameters['num_freq'])] + self.parameters[new_name] = [ + float(get_value_by_tag_name(old_name, ch)) + for ch in range(self.parameters["num_freq"]) + ] def parse_raw(self): """Parse raw data file from AZFP echosounder. @@ -73,13 +105,22 @@ def parse_raw(self): def compute_temp(counts): """Returns the temperature in celsius given from xml data and the counts from ancillary""" v_in = 2.5 * (counts / 65535) - R = (self.parameters['ka'] + self.parameters['kb'] * v_in) / (self.parameters['kc'] - v_in) - T = 1 / (self.parameters['A'] + self.parameters['B'] * (math.log(R)) + - self.parameters['C'] * (math.log(R) ** 3)) - 273 + R = (self.parameters["ka"] + self.parameters["kb"] * v_in) / ( + self.parameters["kc"] - v_in + ) + T = ( + 1 + / ( + self.parameters["A"] + + self.parameters["B"] * (math.log(R)) + + self.parameters["C"] * (math.log(R) ** 3) + ) + - 273 + ) return T def compute_tilt(N, a, b, c, d): - return a + b * N + c * N**2 + d * N**3 + return a + b * N + c * N ** 2 + d * N ** 3 def compute_battery(N): USL5_BAT_CONSTANT = (2.5 / 65536.0) * (86.6 + 475.0) / 86.6 @@ -87,13 +128,15 @@ def compute_battery(N): # Instrument specific constants HEADER_SIZE = 124 - HEADER_FORMAT = ">HHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHBBBBHBBBBBBBBHHHHHHHHHHHHHHHHHHHH" + HEADER_FORMAT = ( + ">HHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHBBBBHBBBBBBBBHHHHHHHHHHHHHHHHHHHH" + ) # Read xml file into dict self.load_AZFP_xml() fmap = fsspec.get_mapper(self.source_file, **self.storage_options) - with fmap.fs.open(fmap.root, 'rb') as file: + with fmap.fs.open(fmap.root, "rb") as file: ping_num = 0 eof = False while not eof: @@ -109,28 +152,52 @@ def compute_battery(N): # Display information about the file that was loaded in self._print_status() # Compute temperature from unpacked_data[ii]['ancillary][4] - self.unpacked_data['temperature'].append( - compute_temp(self.unpacked_data['ancillary'][ping_num][4])) + self.unpacked_data["temperature"].append( + compute_temp(self.unpacked_data["ancillary"][ping_num][4]) + ) # compute x tilt from unpacked_data[ii]['ancillary][0] - self.unpacked_data['tilt_x'].append( - compute_tilt(self.unpacked_data['ancillary'][ping_num][0], - self.parameters['X_a'], self.parameters['X_b'], - self.parameters['X_c'], self.parameters['X_d'])) + self.unpacked_data["tilt_x"].append( + compute_tilt( + self.unpacked_data["ancillary"][ping_num][0], + self.parameters["X_a"], + self.parameters["X_b"], + self.parameters["X_c"], + self.parameters["X_d"], + ) + ) # Compute y tilt from unpacked_data[ii]['ancillary][1] - self.unpacked_data['tilt_y'].append( - compute_tilt(self.unpacked_data['ancillary'][ping_num][1], - self.parameters['Y_a'], self.parameters['Y_b'], - self.parameters['Y_c'], self.parameters['Y_d'])) + self.unpacked_data["tilt_y"].append( + compute_tilt( + self.unpacked_data["ancillary"][ping_num][1], + self.parameters["Y_a"], + self.parameters["Y_b"], + self.parameters["Y_c"], + self.parameters["Y_d"], + ) + ) # Compute cos tilt magnitude from tilt x and y values - self.unpacked_data['cos_tilt_mag'].append( - math.cos((math.sqrt(self.unpacked_data['tilt_x'][ping_num] ** 2 + - self.unpacked_data['tilt_y'][ping_num] ** 2)) * math.pi / 180)) + self.unpacked_data["cos_tilt_mag"].append( + math.cos( + ( + math.sqrt( + self.unpacked_data["tilt_x"][ping_num] ** 2 + + self.unpacked_data["tilt_y"][ping_num] ** 2 + ) + ) + * math.pi + / 180 + ) + ) # Calculate voltage of main battery pack - self.unpacked_data['battery_main'].append( - compute_battery(self.unpacked_data['ancillary'][ping_num][2])) + self.unpacked_data["battery_main"].append( + compute_battery( + self.unpacked_data["ancillary"][ping_num][2] + ) + ) # If there is a Tx battery pack - self.unpacked_data['battery_tx'].append( - compute_battery(self.unpacked_data['ad'][ping_num][0])) + self.unpacked_data["battery_tx"].append( + compute_battery(self.unpacked_data["ad"][ping_num][0]) + ) else: break else: @@ -140,62 +207,83 @@ def compute_battery(N): self._check_uniqueness() self._get_ping_time() # Explicitly cast frequency to a float in accordance with the SONAR-netCDF4 convention - self.unpacked_data['frequency'] = self.unpacked_data['frequency'].astype(np.float64) + self.unpacked_data["frequency"] = self.unpacked_data["frequency"].astype( + np.float64 + ) @staticmethod def _get_fields(): - """Returns the fields contained in each header of the raw file. - """ + """Returns the fields contained in each header of the raw file.""" _fields = ( - ('profile_flag', 'u2'), - ('profile_number', 'u2'), - ('serial_number', 'u2'), - ('ping_status', 'u2'), - ('burst_int', 'u4'), - ('year', 'u2'), # Year - ('month', 'u2'), # Month - ('day', 'u2'), # Day - ('hour', 'u2'), # Hour - ('minute', 'u2'), # Minute - ('second', 'u2'), # Second - ('hundredths', 'u2'), # Hundredths of a second - ('dig_rate', 'u2', 4), # Digitalization rate for each channel - ('lockout_index', 'u2', 4), # Lockout index for each channel - ('num_bins', 'u2', 4), # Number of bins for each channel - ('range_samples_per_bin', 'u2', 4), # Range samples per bin for each channel - ('ping_per_profile', 'u2'), # Number of pings per profile - ('avg_pings', 'u2'), # Flag indicating whether the pings average in time - ('num_acq_pings', 'u2'), # Pings acquired in the burst - ('ping_period', 'u2'), # Ping period in seconds - ('first_ping', 'u2'), - ('last_ping', 'u2'), - ('data_type', "u1", 4), # Datatype for each channel 1=Avg unpacked_data (5bytes), 0=raw (2bytes) - ('data_error', 'u2'), # Error number is an error occurred - ('phase', 'u1'), # Phase number used to acquire this profile - ('overrun', 'u1'), # 1 if an overrun occurred - ('num_chan', 'u1'), # 1, 2, 3, or 4 - ('gain', 'u1', 4), # gain channel 1-4 - ('spare_chan', 'u1'), # spare channel - ('pulse_length', 'u2', 4), # Pulse length chan 1-4 uS - ('board_num', 'u2', 4), # The board the data came from channel 1-4 - ('frequency', 'u2', 4), # frequency for channel 1-4 in kHz - ('sensor_flag', 'u2'), # Flag indicating if pressure sensor or temperature sensor is available - ('ancillary', 'u2', 5), # Tilt-X, Y, Battery, Pressure, Temperature - ('ad', 'u2', 2) # AD channel 6 and 7 + ("profile_flag", "u2"), + ("profile_number", "u2"), + ("serial_number", "u2"), + ("ping_status", "u2"), + ("burst_int", "u4"), + ("year", "u2"), # Year + ("month", "u2"), # Month + ("day", "u2"), # Day + ("hour", "u2"), # Hour + ("minute", "u2"), # Minute + ("second", "u2"), # Second + ("hundredths", "u2"), # Hundredths of a second + ("dig_rate", "u2", 4), # Digitalization rate for each channel + ("lockout_index", "u2", 4), # Lockout index for each channel + ("num_bins", "u2", 4), # Number of bins for each channel + ( + "range_samples_per_bin", + "u2", + 4, + ), # Range samples per bin for each channel + ("ping_per_profile", "u2"), # Number of pings per profile + ("avg_pings", "u2"), # Flag indicating whether the pings average in time + ("num_acq_pings", "u2"), # Pings acquired in the burst + ("ping_period", "u2"), # Ping period in seconds + ("first_ping", "u2"), + ("last_ping", "u2"), + ( + "data_type", + "u1", + 4, + ), # Datatype for each channel 1=Avg unpacked_data (5bytes), 0=raw (2bytes) + ("data_error", "u2"), # Error number is an error occurred + ("phase", "u1"), # Phase number used to acquire this profile + ("overrun", "u1"), # 1 if an overrun occurred + ("num_chan", "u1"), # 1, 2, 3, or 4 + ("gain", "u1", 4), # gain channel 1-4 + ("spare_chan", "u1"), # spare channel + ("pulse_length", "u2", 4), # Pulse length chan 1-4 uS + ("board_num", "u2", 4), # The board the data came from channel 1-4 + ("frequency", "u2", 4), # frequency for channel 1-4 in kHz + ( + "sensor_flag", + "u2", + ), # Flag indicating if pressure sensor or temperature sensor is available + ("ancillary", "u2", 5), # Tilt-X, Y, Battery, Pressure, Temperature + ("ad", "u2", 2), # AD channel 6 and 7 ) return _fields def _print_status(self): - """Prints message to console giving information about the raw file being parsed. - """ + """Prints message to console giving information about the raw file being parsed.""" filename = os.path.basename(self.source_file) - timestamp = dt(self.unpacked_data['year'][0], self.unpacked_data['month'][0], self.unpacked_data['day'][0], - self.unpacked_data['hour'][0], self.unpacked_data['minute'][0], - int(self.unpacked_data['second'][0] + self.unpacked_data['hundredths'][0] / 100)) + timestamp = dt( + self.unpacked_data["year"][0], + self.unpacked_data["month"][0], + self.unpacked_data["day"][0], + self.unpacked_data["hour"][0], + self.unpacked_data["minute"][0], + int( + self.unpacked_data["second"][0] + + self.unpacked_data["hundredths"][0] / 100 + ), + ) timestr = timestamp.strftime("%Y-%b-%d %H:%M:%S") pathstr, xml_name = os.path.split(self.xml_path) - print(f"{dt.now().strftime('%H:%M:%S')} parsing file {filename} with {xml_name}, " - f"time of first ping: {timestr}") + print( + f"{dt.now().strftime('%H:%M:%S')} parsing file {filename} with {xml_name}, " + f"time of first ping: {timestr}" + ) def _split_header(self, raw, header_unpacked): """Splits the header information into a dictionary. @@ -212,24 +300,40 @@ def _split_header(self, raw, header_unpacked): ------- True or False depending on whether the unpacking was successful """ - FILE_TYPE = 64770 # Instrument specific constant + FILE_TYPE = 64770 # Instrument specific constant fields = self._get_fields() - if header_unpacked[0] != FILE_TYPE: # first field should match hard-coded FILE_TYPE from manufacturer + if ( + header_unpacked[0] != FILE_TYPE + ): # first field should match hard-coded FILE_TYPE from manufacturer check_eof = raw.read(1) if check_eof: print("Error: Unknown file type") return False header_byte_cnt = 0 - firmware_freq_len = 4 # fields with num_freq data still takes 4 bytes, the extra bytes contain random numbers - field_w_freq = ('dig_rate', 'lockout_index', 'num_bins', 'range_samples_per_bin', # fields with num_freq data - 'data_type', 'gain', 'pulse_length', 'board_num', 'frequency') + firmware_freq_len = 4 # fields with num_freq data still takes 4 bytes, the extra bytes contain random numbers + field_w_freq = ( + "dig_rate", + "lockout_index", + "num_bins", + "range_samples_per_bin", # fields with num_freq data + "data_type", + "gain", + "pulse_length", + "board_num", + "frequency", + ) for field in fields: if field[0] in field_w_freq: # fields with num_freq data self.unpacked_data[field[0]].append( - header_unpacked[header_byte_cnt:header_byte_cnt + self.parameters['num_freq']]) + header_unpacked[ + header_byte_cnt : header_byte_cnt + self.parameters["num_freq"] + ] + ) header_byte_cnt += firmware_freq_len elif len(field) == 3: # other longer fields ('ancillary' and 'ad') - self.unpacked_data[field[0]].append(header_unpacked[header_byte_cnt:header_byte_cnt + field[2]]) + self.unpacked_data[field[0]].append( + header_unpacked[header_byte_cnt : header_byte_cnt + field[2]] + ) header_byte_cnt += field[2] else: self.unpacked_data[field[0]].append(header_unpacked[header_byte_cnt]) @@ -237,78 +341,116 @@ def _split_header(self, raw, header_unpacked): return True def _add_counts(self, raw, ping_num): - """Unpacks the echosounder raw data. Modifies self.unpacked_data. - """ - vv_tmp = [[]] * self.unpacked_data['num_chan'][ping_num] - for freq_ch in range(self.unpacked_data['num_chan'][ping_num]): - counts_byte_size = self.unpacked_data['num_bins'][ping_num][freq_ch] - if self.unpacked_data['data_type'][ping_num][freq_ch]: - if self.unpacked_data['avg_pings'][ping_num]: # if pings are averaged over time - divisor = self.unpacked_data['ping_per_profile'][ping_num] * \ - self.unpacked_data['range_samples_per_bin'][ping_num][freq_ch] + """Unpacks the echosounder raw data. Modifies self.unpacked_data.""" + vv_tmp = [[]] * self.unpacked_data["num_chan"][ping_num] + for freq_ch in range(self.unpacked_data["num_chan"][ping_num]): + counts_byte_size = self.unpacked_data["num_bins"][ping_num][freq_ch] + if self.unpacked_data["data_type"][ping_num][freq_ch]: + if self.unpacked_data["avg_pings"][ + ping_num + ]: # if pings are averaged over time + divisor = ( + self.unpacked_data["ping_per_profile"][ping_num] + * self.unpacked_data["range_samples_per_bin"][ping_num][freq_ch] + ) else: - divisor = self.unpacked_data['range_samples_per_bin'][ping_num][freq_ch] - ls = unpack(">" + "I" * counts_byte_size, raw.read(counts_byte_size * 4)) # Linear sum - lso = unpack(">" + "B" * counts_byte_size, raw.read(counts_byte_size * 1)) # linear sum overflow + divisor = self.unpacked_data["range_samples_per_bin"][ping_num][ + freq_ch + ] + ls = unpack( + ">" + "I" * counts_byte_size, raw.read(counts_byte_size * 4) + ) # Linear sum + lso = unpack( + ">" + "B" * counts_byte_size, raw.read(counts_byte_size * 1) + ) # linear sum overflow v = (np.array(ls) + np.array(lso) * 4294967295) / divisor - v = (np.log10(v) - 2.5) * (8 * 65535) * self.parameters['DS'][freq_ch] + v = (np.log10(v) - 2.5) * (8 * 65535) * self.parameters["DS"][freq_ch] v[np.isinf(v)] = 0 vv_tmp[freq_ch] = v else: counts_chunk = raw.read(counts_byte_size * 2) counts_unpacked = unpack(">" + "H" * counts_byte_size, counts_chunk) vv_tmp[freq_ch] = counts_unpacked - self.unpacked_data['counts'].append(vv_tmp) + self.unpacked_data["counts"].append(vv_tmp) def _check_uniqueness(self): - """Check for ping-by-ping consistency of sampling parameters and reduce if identical. - """ + """Check for ping-by-ping consistency of sampling parameters and reduce if identical.""" if not self.unpacked_data: self.parse_raw() - if np.array(self.unpacked_data['profile_flag']).size != 1: # Only check uniqueness once. + if ( + np.array(self.unpacked_data["profile_flag"]).size != 1 + ): # Only check uniqueness once. # fields with num_freq data - field_w_freq = ('dig_rate', 'lockout_index', 'num_bins', 'range_samples_per_bin', - 'data_type', 'gain', 'pulse_length', 'board_num', 'frequency') + field_w_freq = ( + "dig_rate", + "lockout_index", + "num_bins", + "range_samples_per_bin", + "data_type", + "gain", + "pulse_length", + "board_num", + "frequency", + ) # fields to reduce size if the same for all pings - field_include = ('profile_flag', 'serial_number', - 'burst_int', 'ping_per_profile', 'avg_pings', 'ping_period', - 'phase', 'num_chan', 'spare_chan') + field_include = ( + "profile_flag", + "serial_number", + "burst_int", + "ping_per_profile", + "avg_pings", + "ping_period", + "phase", + "num_chan", + "spare_chan", + ) for field in field_w_freq: uniq = np.unique(self.unpacked_data[field], axis=0) if uniq.shape[0] == 1: self.unpacked_data[field] = uniq.squeeze() else: - raise ValueError(f"Header value {field} is not constant for each ping") + raise ValueError( + f"Header value {field} is not constant for each ping" + ) for field in field_include: uniq = np.unique(self.unpacked_data[field]) if uniq.shape[0] == 1: self.unpacked_data[field] = uniq.squeeze() else: - raise ValueError(f"Header value {field} is not constant for each ping") + raise ValueError( + f"Header value {field} is not constant for each ping" + ) def _get_ping_time(self): - """Assemble ping time from parsed values. - """ + """Assemble ping time from parsed values.""" if not self.unpacked_data: self.parse_raw() ping_time = [] - for ping_num, year in enumerate(self.unpacked_data['year']): - ping_time.append(np.datetime64(dt(year, - self.unpacked_data['month'][ping_num], - self.unpacked_data['day'][ping_num], - self.unpacked_data['hour'][ping_num], - self.unpacked_data['minute'][ping_num], - int(self.unpacked_data['second'][ping_num] + - self.unpacked_data['hundredths'][ping_num] / 100)).replace(tzinfo=None), '[ms]')) + for ping_num, year in enumerate(self.unpacked_data["year"]): + ping_time.append( + np.datetime64( + dt( + year, + self.unpacked_data["month"][ping_num], + self.unpacked_data["day"][ping_num], + self.unpacked_data["hour"][ping_num], + self.unpacked_data["minute"][ping_num], + int( + self.unpacked_data["second"][ping_num] + + self.unpacked_data["hundredths"][ping_num] / 100 + ), + ).replace(tzinfo=None), + "[ms]", + ) + ) self.ping_time = ping_time @staticmethod def _calc_Sv_offset(f, pulse_length): - """Calculate the compensation factor for Sv calculation. - """ + """Calculate the compensation factor for Sv calculation.""" # TODO: this method seems should be in echopype.process if f > 38000: if pulse_length == 300: diff --git a/echopype/convert/parse_base.py b/echopype/convert/parse_base.py index 4490f5ebb..3d87871e8 100644 --- a/echopype/convert/parse_base.py +++ b/echopype/convert/parse_base.py @@ -1,30 +1,34 @@ +import os from collections import defaultdict -from .utils.ek_raw_io import RawSimradFile -from .utils.ek_raw_io import SimradEOF from datetime import datetime as dt + import numpy as np -import os -FILENAME_DATETIME_EK60 = '(?P.+)?-?D(?P\\w{1,8})-T(?P