From: Daniele Di Proietto <daniele.di.proietto@gmail.com>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>, Keith Busch <kbusch@kernel.org>,
Pavel Begunkov <asml.silence@gmail.com>,
linux-fsdevel@vger.kernel.org,
Alexander Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
Daniele Di Proietto <daniele.di.proietto@gmail.com>
Subject: [PATCH v3 4/4] io_uring: Add IORING_OP_DUP
Date: Sat, 21 Mar 2026 23:21:42 +0000 [thread overview]
Message-ID: <20260321232142.911280-5-daniele.di.proietto@gmail.com> (raw)
In-Reply-To: <20260321232142.911280-1-daniele.di.proietto@gmail.com>
The new operation is like dup3(). The source file can be a regular file
descriptor or a direct descriptor. The destination is a regular file
descriptor.
The direct descriptor variant is useful to move a descriptor to an fd
and close the existing fd with a single acquisition of the `struct
files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or
IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention
for multithreaded applications.
Signed-off-by: Daniele Di Proietto <daniele.di.proietto@gmail.com>
---
include/uapi/linux/io_uring.h | 17 ++++
io_uring/opdef.c | 8 ++
io_uring/openclose.c | 180 ++++++++++++++++++++++++++++++++++
io_uring/openclose.h | 4 +
4 files changed, 209 insertions(+)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1ff16141c8a5..1612aa2db846 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -74,6 +74,7 @@ struct io_uring_sqe {
__u32 install_fd_flags;
__u32 nop_flags;
__u32 pipe_flags;
+ __u32 dup_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -90,6 +91,7 @@ struct io_uring_sqe {
__u32 file_index;
__u32 zcrx_ifq_idx;
__u32 optlen;
+ __s32 dup_new_fd;
struct {
__u16 addr_len;
__u16 __pad3[1];
@@ -316,6 +318,7 @@ enum io_uring_op {
IORING_OP_PIPE,
IORING_OP_NOP128,
IORING_OP_URING_CMD128,
+ IORING_OP_DUP,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -475,6 +478,20 @@ enum io_uring_msg_ring_flags {
*/
#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
+/*
+ * IORING_OP_DUP flags (sqe->dup_flags)
+ *
+ * IORING_DUP_NO_CLOEXEC Don't mark the new fd as O_CLOEXEC. Only valid
+ * if IORING_DUP_NEW_FIXED is not set.
+ * IORING_DUP_OLD_FIXED sqe->fd (the source) is a fixed descriptor.
+ * Otherwise it's a regular fd.
+ * IORING_DUP_NEW_FIXED sqe->dup_new_fd (the destination) is a fixed
+ * descriptor. Otherwise is a regular fd.
+ */
+#define IORING_DUP_NO_CLOEXEC (1U << 0)
+#define IORING_DUP_OLD_FIXED (1U << 1)
+#define IORING_DUP_NEW_FIXED (1U << 2)
+
/*
* IORING_OP_NOP flags (sqe->nop_flags)
*
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 91a23baf415e..62fe566d2cad 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -599,6 +599,10 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_uring_cmd_prep,
.issue = io_uring_cmd,
},
+ [IORING_OP_DUP] = {
+ .prep = io_dup_prep,
+ .issue = io_dup,
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -857,6 +861,10 @@ const struct io_cold_def io_cold_defs[] = {
.sqe_copy = io_uring_cmd_sqe_copy,
.cleanup = io_uring_cmd_cleanup,
},
+ [IORING_OP_DUP] = {
+ .name = "DUP",
+ .cleanup = io_dup_cleanup,
+ },
};
const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index c71242915dad..b3e5ce9e827c 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -39,6 +39,14 @@ struct io_fixed_install {
unsigned int o_flags;
};
+struct io_dup {
+ struct file *file;
+ int old_fd;
+ int new_fd;
+ unsigned int flags;
+ struct io_rsrc_node *rsrc_node;
+};
+
static bool io_openat_force_async(struct io_open *open)
{
/*
@@ -446,3 +454,175 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
fput(files[1]);
return ret;
}
+
+void io_dup_cleanup(struct io_kiocb *req)
+{
+ struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
+
+ if (id->rsrc_node)
+ io_put_rsrc_node(req->ctx, id->rsrc_node);
+ id->rsrc_node = NULL;
+}
+
+#define IORING_DUP_FLAGS \
+ (IORING_DUP_NO_CLOEXEC | IORING_DUP_OLD_FIXED | IORING_DUP_NEW_FIXED)
+
+int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_dup *id;
+
+ if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
+ return -EINVAL;
+
+ id = io_kiocb_to_cmd(req, struct io_dup);
+ id->flags = READ_ONCE(sqe->dup_flags);
+ if (id->flags & ~IORING_DUP_FLAGS)
+ return -EINVAL;
+
+ if ((id->flags & IORING_DUP_NO_CLOEXEC) &&
+ (id->flags & IORING_DUP_NEW_FIXED))
+ return -EINVAL;
+
+ id->old_fd = READ_ONCE(sqe->fd);
+ id->new_fd = READ_ONCE(sqe->dup_new_fd);
+
+ if (((id->flags & IORING_DUP_NEW_FIXED) == 0) ==
+ ((id->flags & IORING_DUP_OLD_FIXED) == 0) &&
+ id->old_fd == id->new_fd)
+ return -EINVAL;
+
+ id->rsrc_node = NULL;
+
+ /* ensure the task's creds are used when installing/receiving fds */
+ if (req->flags & REQ_F_CREDS)
+ return -EPERM;
+
+ return 0;
+}
+
+static struct file *io_dup_get_old_file_fixed(struct io_kiocb *req,
+ unsigned int issue_flags,
+ unsigned int file_slot)
+{
+ struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
+ struct file *file = NULL;
+
+ if (!id->rsrc_node)
+ id->rsrc_node = io_file_get_fixed_node(req, file_slot, issue_flags);
+
+ if (id->rsrc_node) {
+ file = io_slot_file(id->rsrc_node);
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ return file;
+}
+
+static int io_dup_to_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ bool old_fixed, int old_fd, unsigned int file_slot)
+{
+ struct file *old_file = NULL;
+ int ret;
+
+ if (!old_fixed) {
+ old_file = io_file_get_normal(req, old_fd);
+ if (old_file && io_is_uring_fops(old_file)) {
+ fput(old_file);
+ old_file = NULL;
+ }
+ } else {
+ old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
+ if (old_file)
+ get_file(old_file);
+ }
+ if (!old_file)
+ return -EBADF;
+
+ if (file_slot != IORING_FILE_INDEX_ALLOC)
+ file_slot++;
+
+ ret = io_fixed_fd_install(req, issue_flags, old_file, file_slot);
+ if (file_slot == IORING_FILE_INDEX_ALLOC || ret < 0)
+ return ret;
+ return file_slot - 1;
+}
+
+static int io_dup_complete(struct io_kiocb *req, int ret)
+{
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_COMPLETE;
+}
+
+static int io_dup_to_fd(struct io_kiocb *req, unsigned int issue_flags,
+ bool old_fixed, int old_fd, int new_fd, int o_flags)
+{
+ bool non_block = issue_flags & IO_URING_F_NONBLOCK;
+ struct files_struct *files = current->files;
+ struct file *old_file, *to_close = NULL;
+ int err;
+
+ if (new_fd >= rlimit(RLIMIT_NOFILE))
+ return -EBADF;
+
+ if (old_fixed)
+ old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
+
+ {
+ guard(spinlock)(&files->file_lock);
+
+ /* Do we need to expand? If so, be safe and punt to async */
+ if (new_fd >= files_fdtable(files)->max_fds && non_block)
+ return -EAGAIN;
+ err = expand_files(files, new_fd);
+ if (err < 0)
+ return io_dup_complete(req, err);
+
+ if (!old_fixed)
+ old_file = files_lookup_fd_locked(files, old_fd);
+
+ if (!old_file)
+ return io_dup_complete(req, -EBADF);
+
+ to_close = files_lookup_fd_locked(files, new_fd);
+ if (to_close) {
+ if (io_is_uring_fops(to_close))
+ return io_dup_complete(req, -EBADF);
+
+ /* if the file has a flush method, be safe and punt to async */
+ if (to_close->f_op->flush && non_block)
+ return -EAGAIN;
+ }
+ to_close = do_replace_fd_locked(files, old_file, new_fd, o_flags);
+ if (IS_ERR(to_close))
+ return io_dup_complete(req, PTR_ERR(to_close));
+ }
+
+ if (to_close)
+ filp_close(to_close, files);
+
+ return io_dup_complete(req, new_fd);
+}
+
+int io_dup(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
+ bool old_fixed = id->flags & IORING_DUP_OLD_FIXED;
+ bool new_fixed = id->flags & IORING_DUP_NEW_FIXED;
+ int ret, o_flags;
+
+ if (new_fixed) {
+ ret = io_dup_to_fixed(req, issue_flags, old_fixed, id->old_fd,
+ id->new_fd);
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_COMPLETE;
+ }
+
+ o_flags = O_CLOEXEC;
+ if (id->flags & IORING_DUP_NO_CLOEXEC)
+ o_flags = 0;
+ return io_dup_to_fd(req, issue_flags, old_fixed, id->old_fd, id->new_fd,
+ o_flags);
+}
diff --git a/io_uring/openclose.h b/io_uring/openclose.h
index 566739920658..95d6a338ac66 100644
--- a/io_uring/openclose.h
+++ b/io_uring/openclose.h
@@ -21,3 +21,7 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags);
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
+
+void io_dup_cleanup(struct io_kiocb *req);
+int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_dup(struct io_kiocb *req, unsigned int issue_flags);
--
2.43.0
next prev parent reply other threads:[~2026-03-21 23:22 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-21 23:21 [PATCH v3 0/4] New IORING_OP_DUP Daniele Di Proietto
2026-03-21 23:21 ` [PATCH v3 1/4] io_uring: Extract io_file_get_fixed_node() helper Daniele Di Proietto
2026-03-21 23:21 ` [PATCH v3 2/4] fs: Export expand_files() Daniele Di Proietto
2026-03-21 23:21 ` [PATCH v3 3/4] fs: Export new helper do_replace_fd_locked() Daniele Di Proietto
2026-03-23 12:00 ` Christian Brauner
2026-03-23 14:20 ` Jens Axboe
2026-03-21 23:21 ` Daniele Di Proietto [this message]
2026-03-23 14:23 ` [PATCH v3 4/4] io_uring: Add IORING_OP_DUP Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260321232142.911280-5-daniele.di.proietto@gmail.com \
--to=daniele.di.proietto@gmail.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=io-uring@vger.kernel.org \
--cc=jack@suse.cz \
--cc=kbusch@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox