Skip to content

Commit e43fa23

Browse files
illia-vsrinivasreddy
authored andcommitted
pythongh-81340: Use copy_file_range in shutil.copyfile copy functions (pythonGH-93152)
This allows the underlying file system an opportunity to optimise or avoid the actual copy.
1 parent 0555ec9 commit e43fa23

File tree

5 files changed

+139
-42
lines changed

5 files changed

+139
-42
lines changed

Doc/library/shutil.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,9 @@ the use of userspace buffers in Python as in "``outfd.write(infd.read())``".
512512

513513
On macOS `fcopyfile`_ is used to copy the file content (not metadata).
514514

515-
On Linux and Solaris :func:`os.sendfile` is used.
515+
On Linux :func:`os.copy_file_range` or :func:`os.sendfile` is used.
516+
517+
On Solaris :func:`os.sendfile` is used.
516518

517519
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
518520
instead of 64 KiB) and a :func:`memoryview`-based variant of
@@ -527,6 +529,10 @@ file then shutil will silently fallback on using less efficient
527529
.. versionchanged:: 3.14
528530
Solaris now uses :func:`os.sendfile`.
529531

532+
.. versionchanged:: next
533+
Copy-on-write or server-side copy may be used internally via
534+
:func:`os.copy_file_range` on supported Linux filesystems.
535+
530536
.. _shutil-copytree-example:
531537

532538
copytree example

Lib/shutil.py

+76-20
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
# https://bugs.python.org/issue43743#msg393429
5050
_USE_CP_SENDFILE = (hasattr(os, "sendfile")
5151
and sys.platform.startswith(("linux", "android", "sunos")))
52+
_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range")
5253
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
5354

5455
# CMD defaults in Windows 10
@@ -107,6 +108,66 @@ def _fastcopy_fcopyfile(fsrc, fdst, flags):
107108
else:
108109
raise err from None
109110

111+
def _determine_linux_fastcopy_blocksize(infd):
112+
"""Determine blocksize for fastcopying on Linux.
113+
114+
Hopefully the whole file will be copied in a single call.
115+
The copying itself should be performed in a loop 'till EOF is
116+
reached (0 return) so a blocksize smaller or bigger than the actual
117+
file size should not make any difference, also in case the file
118+
content changes while being copied.
119+
"""
120+
try:
121+
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB
122+
except OSError:
123+
blocksize = 2 ** 27 # 128 MiB
124+
# On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
125+
# see gh-82500.
126+
if sys.maxsize < 2 ** 32:
127+
blocksize = min(blocksize, 2 ** 30)
128+
return blocksize
129+
130+
def _fastcopy_copy_file_range(fsrc, fdst):
131+
"""Copy data from one regular mmap-like fd to another by using
132+
a high-performance copy_file_range(2) syscall that gives filesystems
133+
an opportunity to implement the use of reflinks or server-side copy.
134+
135+
This should work on Linux >= 4.5 only.
136+
"""
137+
try:
138+
infd = fsrc.fileno()
139+
outfd = fdst.fileno()
140+
except Exception as err:
141+
raise _GiveupOnFastCopy(err) # not a regular file
142+
143+
blocksize = _determine_linux_fastcopy_blocksize(infd)
144+
offset = 0
145+
while True:
146+
try:
147+
n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset)
148+
except OSError as err:
149+
# ...in oder to have a more informative exception.
150+
err.filename = fsrc.name
151+
err.filename2 = fdst.name
152+
153+
if err.errno == errno.ENOSPC: # filesystem is full
154+
raise err from None
155+
156+
# Give up on first call and if no data was copied.
157+
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
158+
raise _GiveupOnFastCopy(err)
159+
160+
raise err
161+
else:
162+
if n_copied == 0:
163+
# If no bytes have been copied yet, copy_file_range
164+
# might silently fail.
165+
# https://lore.kernel.org/linux-fsdevel/[email protected]/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
166+
if offset == 0:
167+
raise _GiveupOnFastCopy()
168+
break
169+
offset += n_copied
170+
110171
def _fastcopy_sendfile(fsrc, fdst):
111172
"""Copy data from one regular mmap-like fd to another by using
112173
high-performance sendfile(2) syscall.
@@ -128,20 +189,7 @@ def _fastcopy_sendfile(fsrc, fdst):
128189
except Exception as err:
129190
raise _GiveupOnFastCopy(err) # not a regular file
130191

131-
# Hopefully the whole file will be copied in a single call.
132-
# sendfile() is called in a loop 'till EOF is reached (0 return)
133-
# so a bufsize smaller or bigger than the actual file size
134-
# should not make any difference, also in case the file content
135-
# changes while being copied.
136-
try:
137-
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB
138-
except OSError:
139-
blocksize = 2 ** 27 # 128MiB
140-
# On 32-bit architectures truncate to 1GiB to avoid OverflowError,
141-
# see bpo-38319.
142-
if sys.maxsize < 2 ** 32:
143-
blocksize = min(blocksize, 2 ** 30)
144-
192+
blocksize = _determine_linux_fastcopy_blocksize(infd)
145193
offset = 0
146194
while True:
147195
try:
@@ -266,12 +314,20 @@ def copyfile(src, dst, *, follow_symlinks=True):
266314
except _GiveupOnFastCopy:
267315
pass
268316
# Linux / Android / Solaris
269-
elif _USE_CP_SENDFILE:
270-
try:
271-
_fastcopy_sendfile(fsrc, fdst)
272-
return dst
273-
except _GiveupOnFastCopy:
274-
pass
317+
elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE:
318+
# reflink may be implicit in copy_file_range.
319+
if _USE_CP_COPY_FILE_RANGE:
320+
try:
321+
_fastcopy_copy_file_range(fsrc, fdst)
322+
return dst
323+
except _GiveupOnFastCopy:
324+
pass
325+
if _USE_CP_SENDFILE:
326+
try:
327+
_fastcopy_sendfile(fsrc, fdst)
328+
return dst
329+
except _GiveupOnFastCopy:
330+
pass
275331
# Windows, see:
276332
# https://github.com/python/cpython/pull/7160#discussion_r195405230
277333
elif _WINDOWS and file_size > 0:

Lib/test/test_shutil.py

+50-21
Original file line numberDiff line numberDiff line change
@@ -3239,12 +3239,8 @@ def test_filesystem_full(self):
32393239
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
32403240

32413241

3242-
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
3243-
class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
3244-
PATCHPOINT = "os.sendfile"
3245-
3246-
def zerocopy_fun(self, fsrc, fdst):
3247-
return shutil._fastcopy_sendfile(fsrc, fdst)
3242+
class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest):
3243+
BLOCKSIZE_INDEX = None
32483244

32493245
def test_non_regular_file_src(self):
32503246
with io.BytesIO(self.FILEDATA) as src:
@@ -3265,65 +3261,65 @@ def test_non_regular_file_dst(self):
32653261
self.assertEqual(dst.read(), self.FILEDATA)
32663262

32673263
def test_exception_on_second_call(self):
3268-
def sendfile(*args, **kwargs):
3264+
def syscall(*args, **kwargs):
32693265
if not flag:
32703266
flag.append(None)
3271-
return orig_sendfile(*args, **kwargs)
3267+
return orig_syscall(*args, **kwargs)
32723268
else:
32733269
raise OSError(errno.EBADF, "yo")
32743270

32753271
flag = []
3276-
orig_sendfile = os.sendfile
3277-
with unittest.mock.patch('os.sendfile', create=True,
3278-
side_effect=sendfile):
3272+
orig_syscall = eval(self.PATCHPOINT)
3273+
with unittest.mock.patch(self.PATCHPOINT, create=True,
3274+
side_effect=syscall):
32793275
with self.get_files() as (src, dst):
32803276
with self.assertRaises(OSError) as cm:
3281-
shutil._fastcopy_sendfile(src, dst)
3277+
self.zerocopy_fun(src, dst)
32823278
assert flag
32833279
self.assertEqual(cm.exception.errno, errno.EBADF)
32843280

32853281
def test_cant_get_size(self):
32863282
# Emulate a case where src file size cannot be determined.
32873283
# Internally bufsize will be set to a small value and
3288-
# sendfile() will be called repeatedly.
3284+
# a system call will be called repeatedly.
32893285
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
32903286
with self.get_files() as (src, dst):
3291-
shutil._fastcopy_sendfile(src, dst)
3287+
self.zerocopy_fun(src, dst)
32923288
assert m.called
32933289
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
32943290

32953291
def test_small_chunks(self):
32963292
# Force internal file size detection to be smaller than the
3297-
# actual file size. We want to force sendfile() to be called
3293+
# actual file size. We want to force a system call to be called
32983294
# multiple times, also in order to emulate a src fd which gets
32993295
# bigger while it is being copied.
33003296
mock = unittest.mock.Mock()
33013297
mock.st_size = 65536 + 1
33023298
with unittest.mock.patch('os.fstat', return_value=mock) as m:
33033299
with self.get_files() as (src, dst):
3304-
shutil._fastcopy_sendfile(src, dst)
3300+
self.zerocopy_fun(src, dst)
33053301
assert m.called
33063302
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
33073303

33083304
def test_big_chunk(self):
33093305
# Force internal file size detection to be +100MB bigger than
3310-
# the actual file size. Make sure sendfile() does not rely on
3306+
# the actual file size. Make sure a system call does not rely on
33113307
# file size value except for (maybe) a better throughput /
33123308
# performance.
33133309
mock = unittest.mock.Mock()
33143310
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
33153311
with unittest.mock.patch('os.fstat', return_value=mock) as m:
33163312
with self.get_files() as (src, dst):
3317-
shutil._fastcopy_sendfile(src, dst)
3313+
self.zerocopy_fun(src, dst)
33183314
assert m.called
33193315
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
33203316

33213317
def test_blocksize_arg(self):
3322-
with unittest.mock.patch('os.sendfile',
3318+
with unittest.mock.patch(self.PATCHPOINT,
33233319
side_effect=ZeroDivisionError) as m:
33243320
self.assertRaises(ZeroDivisionError,
33253321
shutil.copyfile, TESTFN, TESTFN2)
3326-
blocksize = m.call_args[0][3]
3322+
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
33273323
# Make sure file size and the block size arg passed to
33283324
# sendfile() are the same.
33293325
self.assertEqual(blocksize, os.path.getsize(TESTFN))
@@ -3333,9 +3329,19 @@ def test_blocksize_arg(self):
33333329
self.addCleanup(os_helper.unlink, TESTFN2 + '3')
33343330
self.assertRaises(ZeroDivisionError,
33353331
shutil.copyfile, TESTFN2, TESTFN2 + '3')
3336-
blocksize = m.call_args[0][3]
3332+
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
33373333
self.assertEqual(blocksize, 2 ** 23)
33383334

3335+
3336+
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
3337+
@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False)
3338+
class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase):
3339+
PATCHPOINT = "os.sendfile"
3340+
BLOCKSIZE_INDEX = 3
3341+
3342+
def zerocopy_fun(self, fsrc, fdst):
3343+
return shutil._fastcopy_sendfile(fsrc, fdst)
3344+
33393345
def test_file2file_not_supported(self):
33403346
# Emulate a case where sendfile() only support file->socket
33413347
# fds. In such a case copyfile() is supposed to skip the
@@ -3358,6 +3364,29 @@ def test_file2file_not_supported(self):
33583364
shutil._USE_CP_SENDFILE = True
33593365

33603366

3367+
@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported")
3368+
class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase):
3369+
PATCHPOINT = "os.copy_file_range"
3370+
BLOCKSIZE_INDEX = 2
3371+
3372+
def zerocopy_fun(self, fsrc, fdst):
3373+
return shutil._fastcopy_copy_file_range(fsrc, fdst)
3374+
3375+
def test_empty_file(self):
3376+
srcname = f"{TESTFN}src"
3377+
dstname = f"{TESTFN}dst"
3378+
self.addCleanup(lambda: os_helper.unlink(srcname))
3379+
self.addCleanup(lambda: os_helper.unlink(dstname))
3380+
with open(srcname, "wb"):
3381+
pass
3382+
3383+
with open(srcname, "rb") as src, open(dstname, "wb") as dst:
3384+
# _fastcopy_copy_file_range gives up copying empty files due
3385+
# to a bug in older Linux.
3386+
with self.assertRaises(shutil._GiveupOnFastCopy):
3387+
self.zerocopy_fun(src, dst)
3388+
3389+
33613390
@unittest.skipIf(not MACOS, 'macOS only')
33623391
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
33633392
PATCHPOINT = "posix._fcopyfile"

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,7 @@ Johannes Vogel
19721972
Michael Vogt
19731973
Radu Voicilas
19741974
Alex Volkov
1975+
Illia Volochii
19751976
Ruben Vorderman
19761977
Guido Vranken
19771978
Martijn Vries
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`,
2+
and :func:`shutil.copyfile` functions by default. An underlying Linux system
3+
call gives filesystems an opportunity to implement the use of copy-on-write
4+
(in case of btrfs and XFS) or server-side copy (in the case of NFS.)
5+
Patch by Illia Volochii.

0 commit comments

Comments
 (0)