Skip to content

Commit aa4df54

Browse files
committed
copy_file_range: use FICLONERANGE when possible
This is a follow-up from #12476: instead of adding a new abstraction to clone files, start with `copy_file_range`. They use the same mechanism in the kernel. Benefits of FICLONERANGE vs `copy_file_range(2)`: - O(1). - CoW: so if files are not modified, it will not take additional space. However, it is more restricted than copy_file_range: source and destination must be on the same partition, and only some file systems implement this. As of Linux 5.19 those are btrfs, cifs, nfs, ocfs2, overlayfs and xfs[1]. [1]: https://elixir.bootlin.com/linux/v5.19/A/ident/remap_file_range
1 parent 7d674d5 commit aa4df54

File tree

4 files changed

+64
-23
lines changed

4 files changed

+64
-23
lines changed

lib/std/fs.zig

+1-1
Original file line numberDiff line numberDiff line change
@@ -2805,7 +2805,7 @@ fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void {
28052805
// The kernel checks the u64 value `offset+count` for overflow, use
28062806
// a 32 bit value so that the syscall won't return EINVAL except for
28072807
// impossibly large files (> 2^64-1 - 2^32-1).
2808-
const amt = try os.copy_file_range(fd_in, offset, fd_out, offset, math.maxInt(u32), 0);
2808+
const amt = try os.copy_file_range(fd_in, offset, fd_out, offset, math.maxInt(u32));
28092809
// Terminate when no data was copied
28102810
if (amt == 0) break :cfr_loop;
28112811
offset += amt;

lib/std/fs/file.zig

+1-1
Original file line numberDiff line numberDiff line change
@@ -1219,7 +1219,7 @@ pub const File = struct {
12191219

12201220
pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: u64) CopyRangeError!u64 {
12211221
const adjusted_len = math.cast(usize, len) orelse math.maxInt(usize);
1222-
const result = try os.copy_file_range(in.handle, in_offset, out.handle, out_offset, adjusted_len, 0);
1222+
const result = try os.copy_file_range(in.handle, in_offset, out.handle, out_offset, adjusted_len);
12231223
return result;
12241224
}
12251225

lib/std/os.zig

+54-21
Original file line numberDiff line numberDiff line change
@@ -6260,32 +6260,65 @@ pub const CopyFileRangeError = error{
62606260

62616261
var has_copy_file_range_syscall = std.atomic.Atomic(bool).init(true);
62626262

6263-
/// Transfer data between file descriptors at specified offsets.
6264-
/// Returns the number of bytes written, which can less than requested.
6263+
/// Transfer data between file descriptors at specified offsets. Returns the
6264+
/// number of bytes written, which can be less than requested.
6265+
/// The `copy_file_range` call copies `len` bytes from one file descriptor to
6266+
/// another. When possible, this is done within the operating system kernel,
6267+
/// which can provide better performance characteristics than transferring data
6268+
/// from kernel to user space and back.
62656269
///
6266-
/// The `copy_file_range` call copies `len` bytes from one file descriptor to another. When possible,
6267-
/// this is done within the operating system kernel, which can provide better performance
6268-
/// characteristics than transferring data from kernel to user space and back, such as with
6269-
/// `pread` and `pwrite` calls.
6270-
///
6271-
/// `fd_in` must be a file descriptor opened for reading, and `fd_out` must be a file descriptor
6272-
/// opened for writing. They may be any kind of file descriptor; however, if `fd_in` is not a regular
6273-
/// file system file, it may cause this function to fall back to calling `pread` and `pwrite`, in which case
6274-
/// atomicity guarantees no longer apply.
6270+
/// `fd_in` must be a file descriptor opened for reading, and `fd_out` must be
6271+
/// a file descriptor opened for writing. They may be any kind of file
6272+
/// descriptor; however, if `fd_in` is not a regular file system file, it may
6273+
/// cause this function to fall back to calling `pread` and `pwrite`, in which
6274+
/// case atomicity guarantees no longer apply.
62756275
///
6276-
/// If `fd_in` and `fd_out` are the same, source and target ranges must not overlap.
6277-
/// The file descriptor seek positions are ignored and not updated.
6278-
/// When `off_in` is past the end of the input file, it successfully reads 0 bytes.
6276+
/// If `fd_in` and `fd_out` are the same, source and target ranges must not
6277+
/// overlap. The file descriptor seek positions are ignored and not updated.
6278+
/// When `off_in` is past the end of the input file, it successfully reads 0
6279+
/// bytes.
62796280
///
6280-
/// `flags` has different meanings per operating system; refer to the respective man pages.
6281-
///
6282-
/// These systems support in-kernel data copying:
6283-
/// * Linux 4.5 (cross-filesystem 5.3)
6281+
/// Depending on the system, a few mechanisms are tried:
62846282
///
6285-
/// Other systems fall back to calling `pread` / `pwrite`.
6283+
/// * Linux 4.5+: ioctl.FICLONERANGE: atomic, O(1), the fastest method. Uses
6284+
/// copy-on-write, therefore saves disk space and time. As of Linux 5.19,
6285+
/// available on btrfs, cifs, nfs, ocfs2, overlayfs and xfs. The source and
6286+
/// destination must be on the same file system.
6287+
/// * Linux 4.5+: `copy_file_range(2)` via a libc wrapper (if libc is linked)
6288+
/// or a syscall. This works at the block layer, so cross-filesystem
6289+
/// in-kernel copying (Linux 5.3+) is possible.
6290+
/// * Everything else: `pread`/`pwrite`.
62866291
///
62876292
/// Maximum offsets on Linux are `math.maxInt(i64)`.
6288-
pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize, flags: u32) CopyFileRangeError!usize {
6293+
///
6294+
pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize) CopyFileRangeError!usize {
6295+
const ficlone_range = comptime builtin.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) orelse true;
6296+
6297+
if (ficlone_range) {
6298+
const arg = linux.FICLONERANGE_arg{
6299+
.src_fd = fd_in,
6300+
.src_offset = off_in,
6301+
.src_length = len,
6302+
.dest_offset = off_out,
6303+
};
6304+
while (true) {
6305+
const rc = system.ioctl(fd_out, linux.T.FICLONERANGE, @ptrToInt(&arg));
6306+
switch (system.getErrno(rc)) {
6307+
.SUCCESS => return @intCast(usize, rc),
6308+
.INTR => continue,
6309+
.BADF => return error.FilesOpenedWithWrongFlags,
6310+
// may not be regular files, try fallback
6311+
.INVAL => break,
6312+
.ISDIR => return error.IsDir,
6313+
// not regular files or FS does not support reflinking; fallback worthy
6314+
.OPNOTSUPP => break,
6315+
.PERM => return error.PermissionDenied,
6316+
.TXTBSY => return error.FileBusy,
6317+
else => |err| return unexpectedErrno(err),
6318+
}
6319+
}
6320+
}
6321+
62896322
const call_cfr = comptime if (builtin.os.tag == .wasi)
62906323
// WASI-libc doesn't have copy_file_range.
62916324
false
@@ -6298,7 +6331,7 @@ pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len
62986331
var off_in_copy = @bitCast(i64, off_in);
62996332
var off_out_copy = @bitCast(i64, off_out);
63006333

6301-
const rc = system.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, flags);
6334+
const rc = system.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, 0);
63026335
switch (system.getErrno(rc)) {
63036336
.SUCCESS => return @intCast(usize, rc),
63046337
.BADF => return error.FilesOpenedWithWrongFlags,

lib/std/os/linux.zig

+8
Original file line numberDiff line numberDiff line change
@@ -2760,6 +2760,13 @@ pub const DT = struct {
27602760
pub const WHT = 14;
27612761
};
27622762

2763+
pub const FICLONERANGE_arg = extern struct {
2764+
src_fd: i64,
2765+
src_offset: u64,
2766+
src_length: u64,
2767+
dest_offset: u64,
2768+
};
2769+
27632770
pub const T = struct {
27642771
pub const CGETS = if (is_mips) 0x540D else 0x5401;
27652772
pub const CSETS = 0x5402;
@@ -2794,6 +2801,7 @@ pub const T = struct {
27942801
pub const IOCGSERIAL = 0x541E;
27952802
pub const IOCSSERIAL = 0x541F;
27962803
pub const IOCPKT = 0x5420;
2804+
pub const FICLONERANGE = IOCTL.IOW(0x94, 13, FICLONERANGE_arg);
27972805
pub const FIONBIO = 0x5421;
27982806
pub const IOCNOTTY = 0x5422;
27992807
pub const IOCSETD = 0x5423;

0 commit comments

Comments
 (0)