Skip to content

Commit 7c38097

Browse files
barneygaleeryksun
andauthored
GH-73991: Add pathlib.Path.copy() (#119058)
Add a `Path.copy()` method that copies the content of one file to another. This method is similar to `shutil.copyfile()` but differs in the following ways: - Uses `fcntl.FICLONE` where available (see GH-81338) - Uses `os.copy_file_range` where available (see GH-81340) - Uses `_winapi.CopyFile2` where available, even though this copies more metadata than the other implementations. This makes `WindowsPath.copy()` more similar to `shutil.copy2()`. The method is presently _less_ specified than the `shutil` functions to allow OS-specific optimizations that might copy more or less metadata. Incorporates code from GH-81338 and GH-93152. Co-authored-by: Eryk Sun <[email protected]>
1 parent 2bacc23 commit 7c38097

File tree

7 files changed

+271
-2
lines changed

7 files changed

+271
-2
lines changed

Doc/library/pathlib.rst

+16-2
Original file line numberDiff line numberDiff line change
@@ -1429,8 +1429,22 @@ Creating files and directories
14291429
available. In previous versions, :exc:`NotImplementedError` was raised.
14301430

14311431

1432-
Renaming and deleting
1433-
^^^^^^^^^^^^^^^^^^^^^
1432+
Copying, renaming and deleting
1433+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1434+
1435+
.. method:: Path.copy(target)
1436+
1437+
Copy the contents of this file to the *target* file. If *target* specifies
1438+
a file that already exists, it will be replaced.
1439+
1440+
.. note::
1441+
This method uses operating system functionality to copy file content
1442+
efficiently. The OS might also copy some metadata, such as file
1443+
permissions. After the copy is complete, users may wish to call
1444+
:meth:`Path.chmod` to set the permissions of the target file.
1445+
1446+
.. versionadded:: 3.14
1447+
14341448

14351449
.. method:: Path.rename(target)
14361450

Doc/whatsnew/3.14.rst

+7
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ os
100100
by :func:`os.unsetenv`, or made outside Python in the same process.
101101
(Contributed by Victor Stinner in :gh:`120057`.)
102102

103+
pathlib
104+
-------
105+
106+
* Add :meth:`pathlib.Path.copy`, which copies the content of one file to
107+
another, like :func:`shutil.copyfile`.
108+
(Contributed by Barney Gale in :gh:`73991`.)
109+
103110
symtable
104111
--------
105112

Lib/pathlib/_abc.py

+30
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import posixpath
1717
from glob import _GlobberBase, _no_recurse_symlinks
1818
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
19+
from ._os import copyfileobj
1920

2021

2122
__all__ = ["UnsupportedOperation"]
@@ -563,6 +564,15 @@ def samefile(self, other_path):
563564
return (st.st_ino == other_st.st_ino and
564565
st.st_dev == other_st.st_dev)
565566

567+
def _samefile_safe(self, other_path):
568+
"""
569+
Like samefile(), but returns False rather than raising OSError.
570+
"""
571+
try:
572+
return self.samefile(other_path)
573+
except (OSError, ValueError):
574+
return False
575+
566576
def open(self, mode='r', buffering=-1, encoding=None,
567577
errors=None, newline=None):
568578
"""
@@ -780,6 +790,26 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
780790
"""
781791
raise UnsupportedOperation(self._unsupported_msg('mkdir()'))
782792

793+
def copy(self, target):
794+
"""
795+
Copy the contents of this file to the given target.
796+
"""
797+
if not isinstance(target, PathBase):
798+
target = self.with_segments(target)
799+
if self._samefile_safe(target):
800+
raise OSError(f"{self!r} and {target!r} are the same file")
801+
with self.open('rb') as source_f:
802+
try:
803+
with target.open('wb') as target_f:
804+
copyfileobj(source_f, target_f)
805+
except IsADirectoryError as e:
806+
if not target.exists():
807+
# Raise a less confusing exception.
808+
raise FileNotFoundError(
809+
f'Directory does not exist: {target}') from e
810+
else:
811+
raise
812+
783813
def rename(self, target):
784814
"""
785815
Rename this path to the target path.

Lib/pathlib/_local.py

+16
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
grp = None
1919

2020
from ._abc import UnsupportedOperation, PurePathBase, PathBase
21+
from ._os import copyfile
2122

2223

2324
__all__ = [
@@ -780,6 +781,21 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
780781
if not exist_ok or not self.is_dir():
781782
raise
782783

784+
if copyfile:
785+
def copy(self, target):
786+
"""
787+
Copy the contents of this file to the given target.
788+
"""
789+
try:
790+
target = os.fspath(target)
791+
except TypeError:
792+
if isinstance(target, PathBase):
793+
# Target is an instance of PathBase but not os.PathLike.
794+
# Use generic implementation from PathBase.
795+
return PathBase.copy(self, target)
796+
raise
797+
copyfile(os.fspath(self), target)
798+
783799
def chmod(self, mode, *, follow_symlinks=True):
784800
"""
785801
Change the permissions of the path, like os.chmod().

Lib/pathlib/_os.py

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""
2+
Low-level OS functionality wrappers used by pathlib.
3+
"""
4+
5+
from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV
6+
import os
7+
import sys
8+
try:
9+
import fcntl
10+
except ImportError:
11+
fcntl = None
12+
try:
13+
import posix
14+
except ImportError:
15+
posix = None
16+
try:
17+
import _winapi
18+
except ImportError:
19+
_winapi = None
20+
21+
22+
def get_copy_blocksize(infd):
23+
"""Determine blocksize for fastcopying on Linux.
24+
Hopefully the whole file will be copied in a single call.
25+
The copying itself should be performed in a loop 'till EOF is
26+
reached (0 return) so a blocksize smaller or bigger than the actual
27+
file size should not make any difference, also in case the file
28+
content changes while being copied.
29+
"""
30+
try:
31+
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB
32+
except OSError:
33+
blocksize = 2 ** 27 # 128 MiB
34+
# On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
35+
# see gh-82500.
36+
if sys.maxsize < 2 ** 32:
37+
blocksize = min(blocksize, 2 ** 30)
38+
return blocksize
39+
40+
41+
if fcntl and hasattr(fcntl, 'FICLONE'):
42+
def clonefd(source_fd, target_fd):
43+
"""
44+
Perform a lightweight copy of two files, where the data blocks are
45+
copied only when modified. This is known as Copy on Write (CoW),
46+
instantaneous copy or reflink.
47+
"""
48+
fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd)
49+
else:
50+
clonefd = None
51+
52+
53+
if posix and hasattr(posix, '_fcopyfile'):
54+
def copyfd(source_fd, target_fd):
55+
"""
56+
Copy a regular file content using high-performance fcopyfile(3)
57+
syscall (macOS).
58+
"""
59+
posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA)
60+
elif hasattr(os, 'copy_file_range'):
61+
def copyfd(source_fd, target_fd):
62+
"""
63+
Copy data from one regular mmap-like fd to another by using a
64+
high-performance copy_file_range(2) syscall that gives filesystems
65+
an opportunity to implement the use of reflinks or server-side
66+
copy.
67+
This should work on Linux >= 4.5 only.
68+
"""
69+
blocksize = get_copy_blocksize(source_fd)
70+
offset = 0
71+
while True:
72+
sent = os.copy_file_range(source_fd, target_fd, blocksize,
73+
offset_dst=offset)
74+
if sent == 0:
75+
break # EOF
76+
offset += sent
77+
elif hasattr(os, 'sendfile'):
78+
def copyfd(source_fd, target_fd):
79+
"""Copy data from one regular mmap-like fd to another by using
80+
high-performance sendfile(2) syscall.
81+
This should work on Linux >= 2.6.33 only.
82+
"""
83+
blocksize = get_copy_blocksize(source_fd)
84+
offset = 0
85+
while True:
86+
sent = os.sendfile(target_fd, source_fd, offset, blocksize)
87+
if sent == 0:
88+
break # EOF
89+
offset += sent
90+
else:
91+
copyfd = None
92+
93+
94+
if _winapi and hasattr(_winapi, 'CopyFile2'):
95+
def copyfile(source, target):
96+
"""
97+
Copy from one file to another using CopyFile2 (Windows only).
98+
"""
99+
_winapi.CopyFile2(source, target, 0)
100+
else:
101+
copyfile = None
102+
103+
104+
def copyfileobj(source_f, target_f):
105+
"""
106+
Copy data from file-like object source_f to file-like object target_f.
107+
"""
108+
try:
109+
source_fd = source_f.fileno()
110+
target_fd = target_f.fileno()
111+
except Exception:
112+
pass # Fall through to generic code.
113+
else:
114+
try:
115+
# Use OS copy-on-write where available.
116+
if clonefd:
117+
try:
118+
clonefd(source_fd, target_fd)
119+
return
120+
except OSError as err:
121+
if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV):
122+
raise err
123+
124+
# Use OS copy where available.
125+
if copyfd:
126+
copyfd(source_fd, target_fd)
127+
return
128+
except OSError as err:
129+
# Produce more useful error messages.
130+
err.filename = source_f.name
131+
err.filename2 = target_f.name
132+
raise err
133+
134+
# Last resort: copy with fileobj read() and write().
135+
read_source = source_f.read
136+
write_target = target_f.write
137+
while buf := read_source(1024 * 1024):
138+
write_target(buf)

Lib/test/test_pathlib/test_pathlib_abc.py

+62
Original file line numberDiff line numberDiff line change
@@ -1696,6 +1696,68 @@ def test_write_text_with_newlines(self):
16961696
self.assertEqual((p / 'fileA').read_bytes(),
16971697
b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq')
16981698

1699+
def test_copy_file(self):
1700+
base = self.cls(self.base)
1701+
source = base / 'fileA'
1702+
target = base / 'copyA'
1703+
source.copy(target)
1704+
self.assertTrue(target.exists())
1705+
self.assertEqual(source.read_text(), target.read_text())
1706+
1707+
def test_copy_directory(self):
1708+
base = self.cls(self.base)
1709+
source = base / 'dirA'
1710+
target = base / 'copyA'
1711+
with self.assertRaises(OSError):
1712+
source.copy(target)
1713+
1714+
@needs_symlinks
1715+
def test_copy_symlink(self):
1716+
base = self.cls(self.base)
1717+
source = base / 'linkA'
1718+
target = base / 'copyA'
1719+
source.copy(target)
1720+
self.assertTrue(target.exists())
1721+
self.assertFalse(target.is_symlink())
1722+
self.assertEqual(source.read_text(), target.read_text())
1723+
1724+
def test_copy_to_existing_file(self):
1725+
base = self.cls(self.base)
1726+
source = base / 'fileA'
1727+
target = base / 'dirB' / 'fileB'
1728+
source.copy(target)
1729+
self.assertTrue(target.exists())
1730+
self.assertEqual(source.read_text(), target.read_text())
1731+
1732+
def test_copy_to_existing_directory(self):
1733+
base = self.cls(self.base)
1734+
source = base / 'fileA'
1735+
target = base / 'dirA'
1736+
with self.assertRaises(OSError):
1737+
source.copy(target)
1738+
1739+
@needs_symlinks
1740+
def test_copy_to_existing_symlink(self):
1741+
base = self.cls(self.base)
1742+
source = base / 'dirB' / 'fileB'
1743+
target = base / 'linkA'
1744+
real_target = base / 'fileA'
1745+
source.copy(target)
1746+
self.assertTrue(target.exists())
1747+
self.assertTrue(target.is_symlink())
1748+
self.assertTrue(real_target.exists())
1749+
self.assertFalse(real_target.is_symlink())
1750+
self.assertEqual(source.read_text(), real_target.read_text())
1751+
1752+
def test_copy_empty(self):
1753+
base = self.cls(self.base)
1754+
source = base / 'empty'
1755+
target = base / 'copyA'
1756+
source.write_bytes(b'')
1757+
source.copy(target)
1758+
self.assertTrue(target.exists())
1759+
self.assertEqual(target.read_bytes(), b'')
1760+
16991761
def test_iterdir(self):
17001762
P = self.cls
17011763
p = P(self.base)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add :meth:`pathlib.Path.copy`, which copies the content of one file to another,
2+
like :func:`shutil.copyfile`.

0 commit comments

Comments
 (0)