Skip to content

Commit ff5efe4

Browse files
authoredJun 6, 2021
Merge branch 'master' into fix/gzip_compression_options
2 parents b4b2c23 + dd3fae3 commit ff5efe4

13 files changed

+130
-34
lines changed
 

‎nibabel/analyze.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,7 @@ class AnalyzeImage(SpatialImage):
906906
_meta_sniff_len = header_class.sizeof_hdr
907907
files_types = (('image', '.img'), ('header', '.hdr'))
908908
valid_exts = ('.img', '.hdr')
909-
_compressed_suffixes = ('.gz', '.bz2')
909+
_compressed_suffixes = ('.gz', '.bz2', '.zst')
910910

911911
makeable = True
912912
rw = True

‎nibabel/benchmarks/bench_fileslice.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
from ..fileslice import fileslice
1919
from ..rstutils import rst_table
2020
from ..tmpdirs import InTemporaryDirectory
21+
from ..optpkg import optional_package
2122

2223
SHAPE = (64, 64, 32, 100)
2324
ROW_NAMES = [f'axis {i}, len {dim}' for i, dim in enumerate(SHAPE)]
2425
COL_NAMES = ['mid int',
2526
'step 1',
2627
'half step 1',
2728
'step mid int']
29+
HAVE_ZSTD = optional_package("pyzstd")[1]
2830

2931

3032
def _slices_for_len(L):
@@ -70,7 +72,8 @@ def g():
7072
def bench_fileslice(bytes=True,
7173
file_=True,
7274
gz=True,
73-
bz2=False):
75+
bz2=False,
76+
zst=True):
7477
sys.stdout.flush()
7578
repeat = 2
7679

@@ -103,4 +106,10 @@ def my_table(title, times, base):
103106
my_table('bz2 slice - raw (ratio)',
104107
np.dstack((bz2_times, bz2_times / bz2_base)),
105108
bz2_base)
109+
if zst and HAVE_ZSTD:
110+
with InTemporaryDirectory():
111+
zst_times, zst_base = run_slices('data.zst', repeat)
112+
my_table('zst slice - raw (ratio)',
113+
np.dstack((zst_times, zst_times / zst_base)),
114+
zst_base)
106115
sys.stdout.flush()

‎nibabel/brikhead.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ class AFNIImage(SpatialImage):
490490
header_class = AFNIHeader
491491
valid_exts = ('.brik', '.head')
492492
files_types = (('image', '.brik'), ('header', '.head'))
493-
_compressed_suffixes = ('.gz', '.bz2', '.Z')
493+
_compressed_suffixes = ('.gz', '.bz2', '.Z', '.zst')
494494
makeable = False
495495
rw = False
496496
ImageArrayProxy = AFNIArrayProxy

‎nibabel/loadsave.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from .arrayproxy import is_proxy
2020
from .deprecated import deprecate_with_version
2121

22+
_compressed_suffixes = ('.gz', '.bz2', '.zst')
23+
2224

2325
def _signature_matches_extension(filename, sniff):
2426
"""Check if signature aka magic number matches filename extension.
@@ -153,7 +155,7 @@ def save(img, filename):
153155
return
154156

155157
# Be nice to users by making common implicit conversions
156-
froot, ext, trailing = splitext_addext(filename, ('.gz', '.bz2'))
158+
froot, ext, trailing = splitext_addext(filename, _compressed_suffixes)
157159
lext = ext.lower()
158160

159161
# Special-case Nifti singles and Pairs

‎nibabel/minc1.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ class Minc1Image(SpatialImage):
316316
_meta_sniff_len = 4
317317
valid_exts = ('.mnc',)
318318
files_types = (('image', '.mnc'),)
319-
_compressed_suffixes = ('.gz', '.bz2')
319+
_compressed_suffixes = ('.gz', '.bz2', '.zst')
320320

321321
makeable = True
322322
rw = False

‎nibabel/openers.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from os.path import splitext
1616
from distutils.version import StrictVersion
1717

18+
from nibabel.optpkg import optional_package
19+
1820
# is indexed_gzip present and modern?
1921
try:
2022
import indexed_gzip as igzip
@@ -72,6 +74,12 @@ def _gzip_open(filename, mode='rb', compresslevel=9, mtime=0, keep_open=False):
7274
return gzip_file
7375

7476

77+
def _zstd_open(filename, mode="r", *, level_or_option=None, zstd_dict=None):
78+
pyzstd = optional_package("pyzstd")[0]
79+
return pyzstd.ZstdFile(filename, mode,
80+
level_or_option=level_or_option, zstd_dict=zstd_dict)
81+
82+
7583
class Opener(object):
7684
r""" Class to accept, maybe open, and context-manage file-likes / filenames
7785
@@ -94,13 +102,20 @@ class Opener(object):
94102
"""
95103
gz_def = (_gzip_open, ('mode', 'compresslevel', 'mtime', 'keep_open'))
96104
bz2_def = (BZ2File, ('mode', 'buffering', 'compresslevel'))
105+
zstd_def = (_zstd_open, ('mode', 'level_or_option', 'zstd_dict'))
97106
compress_ext_map = {
98107
'.gz': gz_def,
99108
'.bz2': bz2_def,
109+
'.zst': zstd_def,
100110
None: (open, ('mode', 'buffering')) # default
101111
}
102112
#: default compression level when writing gz and bz2 files
103113
default_compresslevel = 1
114+
#: default option for zst files
115+
default_zst_compresslevel = 3
116+
default_level_or_option = {"rb": None, "r": None,
117+
"wb": default_zst_compresslevel,
118+
"w": default_zst_compresslevel}
104119
#: whether to ignore case looking for compression extensions
105120
compress_ext_icase = True
106121

@@ -117,10 +132,15 @@ def __init__(self, fileish, *args, **kwargs):
117132
full_kwargs.update(dict(zip(arg_names[:n_args], args)))
118133
# Set default mode
119134
if 'mode' not in full_kwargs:
120-
kwargs['mode'] = 'rb'
135+
mode = 'rb'
136+
kwargs['mode'] = mode
137+
else:
138+
mode = full_kwargs['mode']
121139
# Default compression level
122140
if 'compresslevel' in arg_names and 'compresslevel' not in kwargs:
123141
kwargs['compresslevel'] = self.default_compresslevel
142+
if 'level_or_option' in arg_names and 'level_or_option' not in kwargs:
143+
kwargs['level_or_option'] = self.default_level_or_option[mode]
124144
# Default keep_open hint
125145
if 'keep_open' in arg_names:
126146
kwargs.setdefault('keep_open', False)

‎nibabel/tests/test_analyze.py

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from ..casting import as_int
3131
from ..tmpdirs import InTemporaryDirectory
3232
from ..arraywriters import WriterError
33+
from ..optpkg import optional_package
3334

3435
import pytest
3536
from numpy.testing import (assert_array_equal, assert_array_almost_equal)
@@ -40,6 +41,8 @@
4041
from .test_wrapstruct import _TestLabeledWrapStruct
4142
from . import test_spatialimages as tsi
4243

44+
HAVE_ZSTD = optional_package("pyzstd")[1]
45+
4346
header_file = os.path.join(data_path, 'analyze.hdr')
4447

4548
PIXDIM0_MSG = 'pixdim[1,2,3] should be non-zero; setting 0 dims to 1'
@@ -788,6 +791,8 @@ def test_big_offset_exts(self):
788791
aff = np.eye(4)
789792
img_ext = img_klass.files_types[0][1]
790793
compressed_exts = ['', '.gz', '.bz2']
794+
if HAVE_ZSTD:
795+
compressed_exts += ['.zst']
791796
with InTemporaryDirectory():
792797
for offset in (0, 2048):
793798
# Set offset in in-memory image

‎nibabel/tests/test_minc1.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from ..deprecated import ModuleProxy
2323
from .. import minc1
2424
from ..minc1 import Minc1File, Minc1Image, MincHeader
25+
from ..optpkg import optional_package
2526

2627
from ..tmpdirs import InTemporaryDirectory
2728
from ..deprecator import ExpiredDeprecationError
@@ -32,6 +33,8 @@
3233
from . import test_spatialimages as tsi
3334
from .test_fileslice import slicer_samples
3435

36+
pyzstd, HAVE_ZSTD, _ = optional_package("pyzstd")
37+
3538
EG_FNAME = pjoin(data_path, 'tiny.mnc')
3639

3740
# Example images in format expected for ``test_image_api``, adding ``zooms``
@@ -170,7 +173,10 @@ def test_compressed(self):
170173
# Not so for MINC2; hence this small sub-class
171174
for tp in self.test_files:
172175
content = open(tp['fname'], 'rb').read()
173-
openers_exts = ((gzip.open, '.gz'), (bz2.BZ2File, '.bz2'))
176+
openers_exts = [(gzip.open, '.gz'),
177+
(bz2.BZ2File, '.bz2')]
178+
if HAVE_ZSTD: # add .zst to test if installed
179+
openers_exts += [(pyzstd.ZstdFile, '.zst')]
174180
with InTemporaryDirectory():
175181
for opener, ext in openers_exts:
176182
fname = 'test.mnc' + ext

‎nibabel/tests/test_openers.py

+35-14
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,23 @@
1616
import time
1717

1818
from numpy.compat.py3k import asstr, asbytes
19-
from ..openers import Opener, ImageOpener, HAVE_INDEXED_GZIP, BZ2File, DeterministicGzipFile
19+
from ..openers import (Opener,
20+
ImageOpener,
21+
HAVE_INDEXED_GZIP,
22+
BZ2File,
23+
DeterministicGzipFile,
24+
)
2025
from ..tmpdirs import InTemporaryDirectory
2126
from ..volumeutils import BinOpener
27+
from ..optpkg import optional_package
2228

2329
import unittest
2430
from unittest import mock
2531
import pytest
2632
from ..testing import error_warnings
2733

34+
pyzstd, HAVE_ZSTD, _ = optional_package("pyzstd")
35+
2836

2937
class Lunk(object):
3038
# bare file-like for testing
@@ -73,10 +81,13 @@ def test_Opener_various():
7381
import indexed_gzip as igzip
7482
with InTemporaryDirectory():
7583
sobj = BytesIO()
76-
for input in ('test.txt',
77-
'test.txt.gz',
78-
'test.txt.bz2',
79-
sobj):
84+
files_to_test = ['test.txt',
85+
'test.txt.gz',
86+
'test.txt.bz2',
87+
sobj]
88+
if HAVE_ZSTD:
89+
files_to_test += ['test.txt.zst']
90+
for input in files_to_test:
8091
with Opener(input, 'wb') as fobj:
8192
fobj.write(message)
8293
assert fobj.tell() == len(message)
@@ -242,6 +253,8 @@ def test_compressed_ext_case():
242253
class StrictOpener(Opener):
243254
compress_ext_icase = False
244255
exts = ('gz', 'bz2', 'GZ', 'gZ', 'BZ2', 'Bz2')
256+
if HAVE_ZSTD:
257+
exts += ('zst', 'ZST', 'Zst')
245258
with InTemporaryDirectory():
246259
# Make a basic file to check type later
247260
with open(__file__, 'rb') as a_file:
@@ -266,6 +279,8 @@ class StrictOpener(Opener):
266279
except ImportError:
267280
IndexedGzipFile = GzipFile
268281
assert isinstance(fobj.fobj, (GzipFile, IndexedGzipFile))
282+
elif lext == 'zst':
283+
assert isinstance(fobj.fobj, pyzstd.ZstdFile)
269284
else:
270285
assert isinstance(fobj.fobj, BZ2File)
271286

@@ -275,11 +290,14 @@ def test_name():
275290
sobj = BytesIO()
276291
lunk = Lunk('in ART')
277292
with InTemporaryDirectory():
278-
for input in ('test.txt',
279-
'test.txt.gz',
280-
'test.txt.bz2',
281-
sobj,
282-
lunk):
293+
files_to_test = ['test.txt',
294+
'test.txt.gz',
295+
'test.txt.bz2',
296+
sobj,
297+
lunk]
298+
if HAVE_ZSTD:
299+
files_to_test += ['test.txt.zst']
300+
for input in files_to_test:
283301
exp_name = input if type(input) == type('') else None
284302
with Opener(input, 'wb') as fobj:
285303
assert fobj.name == exp_name
@@ -331,10 +349,13 @@ def test_iter():
331349
""".split('\n')
332350
with InTemporaryDirectory():
333351
sobj = BytesIO()
334-
for input, does_t in (('test.txt', True),
335-
('test.txt.gz', False),
336-
('test.txt.bz2', False),
337-
(sobj, True)):
352+
files_to_test = [('test.txt', True),
353+
('test.txt.gz', False),
354+
('test.txt.bz2', False),
355+
(sobj, True)]
356+
if HAVE_ZSTD:
357+
files_to_test += [('test.txt.zst', False)]
358+
for input, does_t in files_to_test:
338359
with Opener(input, 'wb') as fobj:
339360
for line in lines:
340361
fobj.write(asbytes(line + os.linesep))

‎nibabel/tests/test_volumeutils.py

+35-12
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,16 @@
4949
from ..casting import (floor_log2, type_info, OK_FLOATS, shared_range)
5050

5151
from ..deprecator import ExpiredDeprecationError
52+
from ..optpkg import optional_package
5253

5354
from numpy.testing import (assert_array_almost_equal,
5455
assert_array_equal)
5556
import pytest
5657

5758
from nibabel.testing import nullcontext, assert_dt_equal, assert_allclose_safely, suppress_warnings
5859

60+
pyzstd, HAVE_ZSTD, _ = optional_package("pyzstd")
61+
5962
#: convenience variables for numpy types
6063
FLOAT_TYPES = np.sctypes['float']
6164
COMPLEX_TYPES = np.sctypes['complex']
@@ -68,9 +71,12 @@
6871
def test__is_compressed_fobj():
6972
# _is_compressed helper function
7073
with InTemporaryDirectory():
71-
for ext, opener, compressed in (('', open, False),
72-
('.gz', gzip.open, True),
73-
('.bz2', BZ2File, True)):
74+
file_openers = [('', open, False),
75+
('.gz', gzip.open, True),
76+
('.bz2', BZ2File, True)]
77+
if HAVE_ZSTD:
78+
file_openers += [('.zst', pyzstd.ZstdFile, True)]
79+
for ext, opener, compressed in file_openers:
7480
fname = 'test.bin' + ext
7581
for mode in ('wb', 'rb'):
7682
fobj = opener(fname, mode)
@@ -88,12 +94,15 @@ def make_array(n, bytes):
8894
arr.flags.writeable = True
8995
return arr
9096

91-
# Check whether file, gzip file, bz2 file reread memory from cache
97+
# Check whether file, gzip file, bz2, zst file reread memory from cache
9298
fname = 'test.bin'
9399
with InTemporaryDirectory():
100+
openers = [open, gzip.open, BZ2File]
101+
if HAVE_ZSTD:
102+
openers += [pyzstd.ZstdFile]
94103
for n, opener in itertools.product(
95104
(256, 1024, 2560, 25600),
96-
(open, gzip.open, BZ2File)):
105+
openers):
97106
in_arr = np.arange(n, dtype=dtype)
98107
# Write array to file
99108
fobj_w = opener(fname, 'wb')
@@ -230,7 +239,10 @@ def test_array_from_file_openers():
230239
dtype = np.dtype(np.float32)
231240
in_arr = np.arange(24, dtype=dtype).reshape(shape)
232241
with InTemporaryDirectory():
233-
for ext, offset in itertools.product(('', '.gz', '.bz2'),
242+
extensions = ['', '.gz', '.bz2']
243+
if HAVE_ZSTD:
244+
extensions += ['.zst']
245+
for ext, offset in itertools.product(extensions,
234246
(0, 5, 10)):
235247
fname = 'test.bin' + ext
236248
with Opener(fname, 'wb') as out_buf:
@@ -251,9 +263,12 @@ def test_array_from_file_reread():
251263
offset = 9
252264
fname = 'test.bin'
253265
with InTemporaryDirectory():
266+
openers = [open, gzip.open, bz2.BZ2File, BytesIO]
267+
if HAVE_ZSTD:
268+
openers += [pyzstd.ZstdFile]
254269
for shape, opener, dtt, order in itertools.product(
255270
((64,), (64, 65), (64, 65, 66)),
256-
(open, gzip.open, bz2.BZ2File, BytesIO),
271+
openers,
257272
(np.int16, np.float32),
258273
('F', 'C')):
259274
n_els = np.prod(shape)
@@ -901,7 +916,9 @@ def test_write_zeros():
901916
def test_seek_tell():
902917
# Test seek tell routine
903918
bio = BytesIO()
904-
in_files = bio, 'test.bin', 'test.gz', 'test.bz2'
919+
in_files = [bio, 'test.bin', 'test.gz', 'test.bz2']
920+
if HAVE_ZSTD:
921+
in_files += ['test.zst']
905922
start = 10
906923
end = 100
907924
diff = end - start
@@ -920,9 +937,12 @@ def test_seek_tell():
920937
fobj.write(b'\x01' * start)
921938
assert fobj.tell() == start
922939
# Files other than BZ2Files can seek forward on write, leaving
923-
# zeros in their wake. BZ2Files can't seek when writing, unless
924-
# we enable the write0 flag to seek_tell
925-
if not write0 and in_file == 'test.bz2': # Can't seek write in bz2
940+
# zeros in their wake. BZ2Files can't seek when writing,
941+
# unless we enable the write0 flag to seek_tell
942+
# ZstdFiles also does not support seek forward on write
943+
if (not write0 and
944+
(in_file == 'test.bz2' or
945+
in_file == 'test.zst')): # Can't seek write in bz2, zst
926946
# write the zeros by hand for the read test below
927947
fobj.write(b'\x00' * diff)
928948
else:
@@ -946,7 +966,10 @@ def test_seek_tell():
946966
# Check we have the expected written output
947967
with ImageOpener(in_file, 'rb') as fobj:
948968
assert fobj.read() == b'\x01' * start + b'\x00' * diff + b'\x02' * tail
949-
for in_file in ('test2.gz', 'test2.bz2'):
969+
input_files = ['test2.gz', 'test2.bz2']
970+
if HAVE_ZSTD:
971+
input_files += ['test2.zst']
972+
for in_file in input_files:
950973
# Check failure of write seek backwards
951974
with ImageOpener(in_file, 'wb') as fobj:
952975
fobj.write(b'g' * 10)

‎nibabel/volumeutils.py

+7
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
from .openers import Opener, BZ2File, IndexedGzipFile
2323
from .deprecated import deprecate_with_version
2424
from .externals.oset import OrderedSet
25+
from .optpkg import optional_package
26+
27+
pyzstd, HAVE_ZSTD, _ = optional_package("pyzstd")
2528

2629
sys_is_le = sys.byteorder == 'little'
2730
native_code = sys_is_le and '<' or '>'
@@ -40,6 +43,10 @@
4043
#: file-like classes known to hold compressed data
4144
COMPRESSED_FILE_LIKES = (gzip.GzipFile, BZ2File, IndexedGzipFile)
4245

46+
# Enable .zst support if pyzstd installed.
47+
if HAVE_ZSTD:
48+
COMPRESSED_FILE_LIKES = (*COMPRESSED_FILE_LIKES, pyzstd.ZstdFile)
49+
4350

4451
class Recoder(object):
4552
""" class to return canonical code(s) from code or aliases

‎setup.cfg

+3
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ test =
6161
pytest !=5.3.4
6262
pytest-cov
6363
pytest-doctestplus
64+
zstd =
65+
pyzstd >= 0.14.3
6466
all =
6567
%(dicomfs)s
6668
%(dev)s
@@ -69,6 +71,7 @@ all =
6971
%(spm)s
7072
%(style)s
7173
%(test)s
74+
%(zstd)s
7275

7376
[options.entry_points]
7477
console_scripts =

‎tools/ci/env.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ REQUIREMENTS="-r requirements.txt"
55
# Minimum versions of minimum requirements
66
MIN_REQUIREMENTS="-r min-requirements.txt"
77

8-
DEFAULT_OPT_DEPENDS="scipy matplotlib pillow pydicom h5py indexed_gzip"
8+
DEFAULT_OPT_DEPENDS="scipy matplotlib pillow pydicom h5py indexed_gzip pyzstd"
99
# pydicom has skipped some important pre-releases, so enable a check against master
1010
PYDICOM_MASTER="git+https://github.com/pydicom/pydicom.git@master"
1111
# Minimum versions of optional requirements

0 commit comments

Comments
 (0)
Please sign in to comment.