From 985159a3df7ae4bb4434c503eaa3268a95aa112a Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Mon, 23 May 2022 19:27:51 +0300 Subject: [PATCH 1/6] Move determining a block size for copying to a separate function --- Lib/shutil.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 2cbd808abf2ffb..c8e7f1c94ff22a 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -103,6 +103,25 @@ def _fastcopy_fcopyfile(fsrc, fdst, flags): else: raise err from None +def _determine_linux_fastcopy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB + except OSError: + blocksize = 2 ** 27 # 128MiB + # On 32-bit architectures truncate to 1GiB to avoid OverflowError, + # see bpo-38319. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + def _fastcopy_sendfile(fsrc, fdst): """Copy data from one regular mmap-like fd to another by using high-performance sendfile(2) syscall. @@ -124,20 +143,7 @@ def _fastcopy_sendfile(fsrc, fdst): except Exception as err: raise _GiveupOnFastCopy(err) # not a regular file - # Hopefully the whole file will be copied in a single call. - # sendfile() is called in a loop 'till EOF is reached (0 return) - # so a bufsize smaller or bigger than the actual file size - # should not make any difference, also in case the file content - # changes while being copied. - try: - blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB - except OSError: - blocksize = 2 ** 27 # 128MiB - # On 32-bit architectures truncate to 1GiB to avoid OverflowError, - # see bpo-38319. - if sys.maxsize < 2 ** 32: - blocksize = min(blocksize, 2 ** 30) - + blocksize = _determine_linux_fastcopy_blocksize(infd) offset = 0 while True: try: From e4fa33b870cc890ac38857a68d7595bf5aa6f7a6 Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Mon, 23 May 2022 20:30:51 +0300 Subject: [PATCH 2/6] Add copy-on-write support to shutil --- Doc/library/shutil.rst | 30 +++++++- Lib/shutil.py | 72 ++++++++++++++++--- Lib/test/test_shutil.py | 71 ++++++++++++------ Misc/ACKS | 1 + ...2-05-23-21-23-29.gh-issue-81340.D11RkZ.rst | 5 ++ 5 files changed, 144 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 9a25b0d008bf5f..3f91bdb286b21a 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -48,7 +48,7 @@ Directory and files operations be copied. -.. function:: copyfile(src, dst, *, follow_symlinks=True) +.. function:: copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True) Copy the contents (no metadata) of the file named *src* to a file named *dst* and return *dst* in the most efficient way possible. @@ -67,6 +67,10 @@ Directory and files operations a new symbolic link will be created instead of copying the file *src* points to. + *allow_reflink* enables copy-on-write on supported Linux filesystems. + The :c:func:`copy_file_range` system call is used internally when + *allow_reflink* is true. + .. audit-event:: shutil.copyfile src,dst shutil.copyfile .. versionchanged:: 3.3 @@ -83,6 +87,10 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. + .. versionchanged:: 3.12 + Added *allow_reflink* argument. Copy-on-write is enabled by default on + supported Linux filesystems. + .. exception:: SameFileError This exception is raised if source and destination in :func:`copyfile` @@ -155,7 +163,7 @@ Directory and files operations .. versionchanged:: 3.3 Added *follow_symlinks* argument and support for Linux extended attributes. -.. function:: copy(src, dst, *, follow_symlinks=True) +.. function:: copy(src, dst, *, follow_symlinks=True, allow_reflink=True) Copies the file *src* to the file or directory *dst*. *src* and *dst* should be :term:`path-like objects ` or strings. If @@ -168,6 +176,10 @@ Directory and files operations is true and *src* is a symbolic link, *dst* will be a copy of the file *src* refers to. + *allow_reflink* enables copy-on-write on supported Linux filesystems. + The :c:func:`copy_file_range` system call is used internally when + *allow_reflink* is true. + :func:`~shutil.copy` copies the file data and the file's permission mode (see :func:`os.chmod`). Other metadata, like the file's creation and modification times, is not preserved. @@ -187,7 +199,11 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. -.. function:: copy2(src, dst, *, follow_symlinks=True) + .. versionchanged:: 3.12 + Added *allow_reflink* argument. Copy-on-write is enabled by default on + supported Linux filesystems. + +.. function:: copy2(src, dst, *, follow_symlinks=True, allow_reflink=True) Identical to :func:`~shutil.copy` except that :func:`copy2` also attempts to preserve file metadata. @@ -201,6 +217,10 @@ Directory and files operations it can; :func:`copy2` never raises an exception because it cannot preserve file metadata. + *allow_reflink* enables copy-on-write on supported Linux filesystems. + The :c:func:`copy_file_range` system call is used internally when + *allow_reflink* is true. + :func:`copy2` uses :func:`copystat` to copy the file metadata. Please see :func:`copystat` for more information about platform support for modifying symbolic link metadata. @@ -219,6 +239,10 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. + .. versionchanged:: 3.12 + Added *allow_reflink* argument. Copy-on-write is enabled by default on + supported Linux filesystems. + .. function:: ignore_patterns(*patterns) This factory function creates a function that can be used as a callable for diff --git a/Lib/shutil.py b/Lib/shutil.py index c8e7f1c94ff22a..ce2bb8d59602fd 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -43,6 +43,7 @@ # This should never be removed, see rationale in: # https://bugs.python.org/issue43743#msg393429 _USE_CP_SENDFILE = hasattr(os, "sendfile") and sys.platform.startswith("linux") +_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range") _HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS # CMD defaults in Windows 10 @@ -122,6 +123,47 @@ def _determine_linux_fastcopy_blocksize(infd): blocksize = min(blocksize, 2 ** 30) return blocksize +def _fastcopy_copy_file_range(fsrc, fdst): + """Copy data from one regular mmap-like fd to another by using + a high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side copy. + + This should work on Linux >= 4.5 only. + """ + try: + infd = fsrc.fileno() + outfd = fdst.fileno() + except Exception as err: + raise _GiveupOnFastCopy(err) # not a regular file + + blocksize = _determine_linux_fastcopy_blocksize(infd) + offset = 0 + while True: + try: + n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset) + except OSError as err: + # ...in oder to have a more informative exception. + err.filename = fsrc.name + err.filename2 = fdst.name + + if err.errno == errno.ENOSPC: # filesystem is full + raise err from None + + # Give up on first call and if no data was copied. + if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: + raise _GiveupOnFastCopy(err) + + raise err + else: + if n_copied == 0: + # If no bytes have been copied yet, copy_file_range + # might silently fail. + # https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0 + if offset == 0: + raise _GiveupOnFastCopy() + break + offset += n_copied + def _fastcopy_sendfile(fsrc, fdst): """Copy data from one regular mmap-like fd to another by using high-performance sendfile(2) syscall. @@ -230,7 +272,7 @@ def _stat(fn): def _islink(fn): return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) -def copyfile(src, dst, *, follow_symlinks=True): +def copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True): """Copy data from src to dst in the most efficient way possible. If follow_symlinks is not set and src is a symbolic link, a new @@ -271,12 +313,20 @@ def copyfile(src, dst, *, follow_symlinks=True): except _GiveupOnFastCopy: pass # Linux - elif _USE_CP_SENDFILE: - try: - _fastcopy_sendfile(fsrc, fdst) - return dst - except _GiveupOnFastCopy: - pass + elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE: + # reflink may be implicit in copy_file_range. + if _USE_CP_COPY_FILE_RANGE and allow_reflink: + try: + _fastcopy_copy_file_range(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + if _USE_CP_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass # Windows, see: # https://github.com/python/cpython/pull/7160#discussion_r195405230 elif _WINDOWS and file_size > 0: @@ -409,7 +459,7 @@ def lookup(name): else: raise -def copy(src, dst, *, follow_symlinks=True): +def copy(src, dst, *, follow_symlinks=True, allow_reflink=True): """Copy data and mode bits ("cp src dst"). Return the file's destination. The destination may be a directory. @@ -423,11 +473,11 @@ def copy(src, dst, *, follow_symlinks=True): """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst, follow_symlinks=follow_symlinks) + copyfile(src, dst, follow_symlinks=follow_symlinks, allow_reflink=allow_reflink) copymode(src, dst, follow_symlinks=follow_symlinks) return dst -def copy2(src, dst, *, follow_symlinks=True): +def copy2(src, dst, *, follow_symlinks=True, allow_reflink=True): """Copy data and metadata. Return the file's destination. Metadata is copied with copystat(). Please see the copystat function @@ -440,7 +490,7 @@ def copy2(src, dst, *, follow_symlinks=True): """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst, follow_symlinks=follow_symlinks) + copyfile(src, dst, follow_symlinks=follow_symlinks, allow_reflink=allow_reflink) copystat(src, dst, follow_symlinks=follow_symlinks) return dst diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index a61bb126e4e73f..981ab65d652c99 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2467,12 +2467,8 @@ def test_filesystem_full(self): self.assertRaises(OSError, self.zerocopy_fun, src, dst) -@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported') -class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase): - PATCHPOINT = "os.sendfile" - - def zerocopy_fun(self, fsrc, fdst): - return shutil._fastcopy_sendfile(fsrc, fdst) +class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest): + BLOCKSIZE_INDEX = None def test_non_regular_file_src(self): with io.BytesIO(self.FILEDATA) as src: @@ -2493,65 +2489,65 @@ def test_non_regular_file_dst(self): self.assertEqual(dst.read(), self.FILEDATA) def test_exception_on_second_call(self): - def sendfile(*args, **kwargs): + def syscall(*args, **kwargs): if not flag: flag.append(None) - return orig_sendfile(*args, **kwargs) + return orig_syscall(*args, **kwargs) else: raise OSError(errno.EBADF, "yo") flag = [] - orig_sendfile = os.sendfile - with unittest.mock.patch('os.sendfile', create=True, - side_effect=sendfile): + orig_syscall = eval(self.PATCHPOINT) + with unittest.mock.patch(self.PATCHPOINT, create=True, + side_effect=syscall): with self.get_files() as (src, dst): with self.assertRaises(OSError) as cm: - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert flag self.assertEqual(cm.exception.errno, errno.EBADF) def test_cant_get_size(self): # Emulate a case where src file size cannot be determined. # Internally bufsize will be set to a small value and - # sendfile() will be called repeatedly. + # a system call will be called repeatedly. with unittest.mock.patch('os.fstat', side_effect=OSError) as m: with self.get_files() as (src, dst): - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert m.called self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) def test_small_chunks(self): # Force internal file size detection to be smaller than the - # actual file size. We want to force sendfile() to be called + # actual file size. We want to force a system call to be called # multiple times, also in order to emulate a src fd which gets # bigger while it is being copied. mock = unittest.mock.Mock() mock.st_size = 65536 + 1 with unittest.mock.patch('os.fstat', return_value=mock) as m: with self.get_files() as (src, dst): - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert m.called self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) def test_big_chunk(self): # Force internal file size detection to be +100MB bigger than - # the actual file size. Make sure sendfile() does not rely on + # the actual file size. Make sure a system call does not rely on # file size value except for (maybe) a better throughput / # performance. mock = unittest.mock.Mock() mock.st_size = self.FILESIZE + (100 * 1024 * 1024) with unittest.mock.patch('os.fstat', return_value=mock) as m: with self.get_files() as (src, dst): - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert m.called self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) def test_blocksize_arg(self): - with unittest.mock.patch('os.sendfile', + with unittest.mock.patch(self.PATCHPOINT, side_effect=ZeroDivisionError) as m: self.assertRaises(ZeroDivisionError, shutil.copyfile, TESTFN, TESTFN2) - blocksize = m.call_args[0][3] + blocksize = m.call_args[0][self.BLOCKSIZE_INDEX] # Make sure file size and the block size arg passed to # sendfile() are the same. self.assertEqual(blocksize, os.path.getsize(TESTFN)) @@ -2561,9 +2557,19 @@ def test_blocksize_arg(self): self.addCleanup(os_helper.unlink, TESTFN2 + '3') self.assertRaises(ZeroDivisionError, shutil.copyfile, TESTFN2, TESTFN2 + '3') - blocksize = m.call_args[0][3] + blocksize = m.call_args[0][self.BLOCKSIZE_INDEX] self.assertEqual(blocksize, 2 ** 23) + +@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported') +@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False) +class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase): + PATCHPOINT = "os.sendfile" + BLOCKSIZE_INDEX = 3 + + def zerocopy_fun(self, fsrc, fdst): + return shutil._fastcopy_sendfile(fsrc, fdst) + def test_file2file_not_supported(self): # Emulate a case where sendfile() only support file->socket # fds. In such a case copyfile() is supposed to skip the @@ -2586,6 +2592,29 @@ def test_file2file_not_supported(self): shutil._USE_CP_SENDFILE = True +@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported") +class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase): + PATCHPOINT = "os.copy_file_range" + BLOCKSIZE_INDEX = 2 + + def zerocopy_fun(self, fsrc, fdst): + return shutil._fastcopy_copy_file_range(fsrc, fdst) + + def test_empty_file(self): + srcname = f"{TESTFN}src" + dstname = f"{TESTFN}dst" + self.addCleanup(lambda: os_helper.unlink(srcname)) + self.addCleanup(lambda: os_helper.unlink(dstname)) + with open(srcname, "wb"): + pass + + with open(srcname, "rb") as src, open(dstname, "wb") as dst: + # _fastcopy_copy_file_range gives up copying empty files due + # to a bug in older Linux. + with self.assertRaises(shutil._GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + + @unittest.skipIf(not MACOS, 'macOS only') class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase): PATCHPOINT = "posix._fcopyfile" diff --git a/Misc/ACKS b/Misc/ACKS index d0e18303434f22..1777a028a87bbe 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1878,6 +1878,7 @@ Johannes Vogel Michael Vogt Radu Voicilas Alex Volkov +Illia Volochii Ruben Vorderman Guido Vranken Martijn Vries diff --git a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst new file mode 100644 index 00000000000000..a0de14ed5ae746 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst @@ -0,0 +1,5 @@ +Use the :c:func:`copy_file_range` Linux system call in :func:`shutil.copy`, +:func:`shutil.copy2`, and :func:`shutil.copyfile` functions by default. The +system call gives filesystems an opportunity to implement the use of +copy-on-write or server-side copy. The functions have a new *allow_reflink* +argument to control the functionality. Patch by Illia Volochii. From 47b083458a87f5a2933307c549ef0557dbada1b5 Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Tue, 24 May 2022 20:02:48 +0300 Subject: [PATCH 3/6] Update comments in `_determine_linux_fastcopy_blocksize` Co-authored-by: Victor Stinner --- Lib/shutil.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index ce2bb8d59602fd..f1dfc68019eebb 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -114,11 +114,11 @@ def _determine_linux_fastcopy_blocksize(infd): content changes while being copied. """ try: - blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB except OSError: - blocksize = 2 ** 27 # 128MiB - # On 32-bit architectures truncate to 1GiB to avoid OverflowError, - # see bpo-38319. + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. if sys.maxsize < 2 ** 32: blocksize = min(blocksize, 2 ** 30) return blocksize From 474859cbe12ded1bee2b96b1e81d4dd5696fdf5f Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Tue, 24 May 2022 20:12:51 +0300 Subject: [PATCH 4/6] Update docs to link to `copy_file_range` as a Python function --- Doc/library/shutil.rst | 12 ++++++------ .../2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 3f91bdb286b21a..5e696c29d4b489 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -67,8 +67,8 @@ Directory and files operations a new symbolic link will be created instead of copying the file *src* points to. - *allow_reflink* enables copy-on-write on supported Linux filesystems. - The :c:func:`copy_file_range` system call is used internally when + *allow_reflink* enables copy-on-write on supported Linux filesystems (e.g., + btrfs and XFS). :func:`os.copy_file_range` is used internally when *allow_reflink* is true. .. audit-event:: shutil.copyfile src,dst shutil.copyfile @@ -176,8 +176,8 @@ Directory and files operations is true and *src* is a symbolic link, *dst* will be a copy of the file *src* refers to. - *allow_reflink* enables copy-on-write on supported Linux filesystems. - The :c:func:`copy_file_range` system call is used internally when + *allow_reflink* enables copy-on-write on supported Linux filesystems (e.g., + btrfs and XFS). :func:`os.copy_file_range` is used internally when *allow_reflink* is true. :func:`~shutil.copy` copies the file data and the file's permission @@ -217,8 +217,8 @@ Directory and files operations it can; :func:`copy2` never raises an exception because it cannot preserve file metadata. - *allow_reflink* enables copy-on-write on supported Linux filesystems. - The :c:func:`copy_file_range` system call is used internally when + *allow_reflink* enables copy-on-write on supported Linux filesystems (e.g., + btrfs and XFS). :func:`os.copy_file_range` is used internally when *allow_reflink* is true. :func:`copy2` uses :func:`copystat` to copy the file metadata. diff --git a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst index a0de14ed5ae746..66e0fadec10aa2 100644 --- a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst +++ b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst @@ -1,5 +1,6 @@ -Use the :c:func:`copy_file_range` Linux system call in :func:`shutil.copy`, -:func:`shutil.copy2`, and :func:`shutil.copyfile` functions by default. The -system call gives filesystems an opportunity to implement the use of -copy-on-write or server-side copy. The functions have a new *allow_reflink* -argument to control the functionality. Patch by Illia Volochii. +Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`, +and :func:`shutil.copyfile` functions by default. An underlying Linux system +call gives filesystems an opportunity to implement the use of copy-on-write +(in case of btrfs and XFS) or server-side copy (in the case of NFS.) The +functions have a new *allow_reflink* argument to control the functionality. +Patch by Illia Volochii. From 41d48d9cdfcee263c980405cf67d542d47c7f6a4 Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Wed, 5 Jun 2024 18:57:38 +0300 Subject: [PATCH 5/6] Drop the `allow_reflink` argument --- Doc/library/shutil.rst | 36 +++++++------------ Lib/shutil.py | 12 +++---- ...2-05-23-21-23-29.gh-issue-81340.D11RkZ.rst | 3 +- 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index e35203b90341e5..730b6aaa2c2e39 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -48,7 +48,7 @@ Directory and files operations be copied. -.. function:: copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True) +.. function:: copyfile(src, dst, *, follow_symlinks=True) Copy the contents (no metadata) of the file named *src* to a file named *dst* and return *dst* in the most efficient way possible. @@ -67,10 +67,6 @@ Directory and files operations a new symbolic link will be created instead of copying the file *src* points to. - *allow_reflink* enables copy-on-write on supported Linux filesystems (e.g., - btrfs and XFS). :func:`os.copy_file_range` is used internally when - *allow_reflink* is true. - .. audit-event:: shutil.copyfile src,dst shutil.copyfile .. versionchanged:: 3.3 @@ -87,9 +83,9 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionchanged:: 3.12 - Added *allow_reflink* argument. Copy-on-write is enabled by default on - supported Linux filesystems. + .. versionchanged:: 3.14 + Copy-on-write or server-side copy may be used internally on supported + filesystems via :func:`os.copy_file_range`. .. exception:: SameFileError @@ -163,7 +159,7 @@ Directory and files operations .. versionchanged:: 3.3 Added *follow_symlinks* argument and support for Linux extended attributes. -.. function:: copy(src, dst, *, follow_symlinks=True, allow_reflink=True) +.. function:: copy(src, dst, *, follow_symlinks=True) Copies the file *src* to the file or directory *dst*. *src* and *dst* should be :term:`path-like objects ` or strings. If @@ -176,10 +172,6 @@ Directory and files operations is true and *src* is a symbolic link, *dst* will be a copy of the file *src* refers to. - *allow_reflink* enables copy-on-write on supported Linux filesystems (e.g., - btrfs and XFS). :func:`os.copy_file_range` is used internally when - *allow_reflink* is true. - :func:`~shutil.copy` copies the file data and the file's permission mode (see :func:`os.chmod`). Other metadata, like the file's creation and modification times, is not preserved. @@ -199,11 +191,11 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionchanged:: 3.12 - Added *allow_reflink* argument. Copy-on-write is enabled by default on - supported Linux filesystems. + .. versionchanged:: 3.14 + Copy-on-write or server-side copy may be used internally on supported + filesystems via :func:`os.copy_file_range`. -.. function:: copy2(src, dst, *, follow_symlinks=True, allow_reflink=True) +.. function:: copy2(src, dst, *, follow_symlinks=True) Identical to :func:`~shutil.copy` except that :func:`copy2` also attempts to preserve file metadata. @@ -217,10 +209,6 @@ Directory and files operations it can; :func:`copy2` never raises an exception because it cannot preserve file metadata. - *allow_reflink* enables copy-on-write on supported Linux filesystems (e.g., - btrfs and XFS). :func:`os.copy_file_range` is used internally when - *allow_reflink* is true. - :func:`copy2` uses :func:`copystat` to copy the file metadata. Please see :func:`copystat` for more information about platform support for modifying symbolic link metadata. @@ -239,9 +227,9 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionchanged:: 3.12 - Added *allow_reflink* argument. Copy-on-write is enabled by default on - supported Linux filesystems. + .. versionchanged:: 3.14 + Copy-on-write or server-side copy may be used internally on supported + filesystems via :func:`os.copy_file_range`. .. function:: ignore_patterns(*patterns) diff --git a/Lib/shutil.py b/Lib/shutil.py index 023ac9d233bd76..91a25b7be8cdd7 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -275,7 +275,7 @@ def _stat(fn): def _islink(fn): return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) -def copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True): +def copyfile(src, dst, *, follow_symlinks=True): """Copy data from src to dst in the most efficient way possible. If follow_symlinks is not set and src is a symbolic link, a new @@ -318,7 +318,7 @@ def copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True): # Linux elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE: # reflink may be implicit in copy_file_range. - if _USE_CP_COPY_FILE_RANGE and allow_reflink: + if _USE_CP_COPY_FILE_RANGE: try: _fastcopy_copy_file_range(fsrc, fdst) return dst @@ -467,7 +467,7 @@ def lookup(name): else: raise -def copy(src, dst, *, follow_symlinks=True, allow_reflink=True): +def copy(src, dst, *, follow_symlinks=True): """Copy data and mode bits ("cp src dst"). Return the file's destination. The destination may be a directory. @@ -481,11 +481,11 @@ def copy(src, dst, *, follow_symlinks=True, allow_reflink=True): """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst, follow_symlinks=follow_symlinks, allow_reflink=allow_reflink) + copyfile(src, dst, follow_symlinks=follow_symlinks) copymode(src, dst, follow_symlinks=follow_symlinks) return dst -def copy2(src, dst, *, follow_symlinks=True, allow_reflink=True): +def copy2(src, dst, *, follow_symlinks=True): """Copy data and metadata. Return the file's destination. Metadata is copied with copystat(). Please see the copystat function @@ -521,7 +521,7 @@ def copy2(src, dst, *, follow_symlinks=True, allow_reflink=True): else: raise - copyfile(src, dst, follow_symlinks=follow_symlinks, allow_reflink=allow_reflink) + copyfile(src, dst, follow_symlinks=follow_symlinks) copystat(src, dst, follow_symlinks=follow_symlinks) return dst diff --git a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst index 66e0fadec10aa2..49e6305bf83138 100644 --- a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst +++ b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst @@ -1,6 +1,5 @@ Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`, and :func:`shutil.copyfile` functions by default. An underlying Linux system call gives filesystems an opportunity to implement the use of copy-on-write -(in case of btrfs and XFS) or server-side copy (in the case of NFS.) The -functions have a new *allow_reflink* argument to control the functionality. +(in case of btrfs and XFS) or server-side copy (in the case of NFS.) Patch by Illia Volochii. From e8feaca42f23310cc1f01bcb20b67ab38f05f9ef Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Wed, 5 Jun 2024 22:30:47 +0300 Subject: [PATCH 6/6] Remove duplicate change entries from docs --- Doc/library/shutil.rst | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 730b6aaa2c2e39..98d0c8d3684013 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -83,10 +83,6 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionchanged:: 3.14 - Copy-on-write or server-side copy may be used internally on supported - filesystems via :func:`os.copy_file_range`. - .. exception:: SameFileError This exception is raised if source and destination in :func:`copyfile` @@ -191,10 +187,6 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionchanged:: 3.14 - Copy-on-write or server-side copy may be used internally on supported - filesystems via :func:`os.copy_file_range`. - .. function:: copy2(src, dst, *, follow_symlinks=True) Identical to :func:`~shutil.copy` except that :func:`copy2` @@ -227,10 +219,6 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionchanged:: 3.14 - Copy-on-write or server-side copy may be used internally on supported - filesystems via :func:`os.copy_file_range`. - .. function:: ignore_patterns(*patterns) This factory function creates a function that can be used as a callable for @@ -529,7 +517,7 @@ the use of userspace buffers in Python as in "``outfd.write(infd.read())``". On macOS `fcopyfile`_ is used to copy the file content (not metadata). -On Linux :func:`os.sendfile` is used. +On Linux :func:`os.copy_file_range` or :func:`os.sendfile` is used. On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB instead of 64 KiB) and a :func:`memoryview`-based variant of @@ -541,6 +529,10 @@ file then shutil will silently fallback on using less efficient .. versionchanged:: 3.8 +.. versionchanged:: 3.14 + Copy-on-write or server-side copy may be used internally via + :func:`os.copy_file_range` on supported Linux filesystems. + .. _shutil-copytree-example: copytree example