From: Jens Axboe <axboe@kernel.dk>
To: Daniele Di Proietto <daniele.di.proietto@gmail.com>,
io-uring@vger.kernel.org
Cc: Keith Busch <kbusch@kernel.org>,
Pavel Begunkov <asml.silence@gmail.com>,
linux-fsdevel@vger.kernel.org,
Alexander Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>
Subject: Re: [PATCH v2 2/2] io_uring: Add IORING_OP_DUP
Date: Fri, 20 Mar 2026 13:34:29 -0600 [thread overview]
Message-ID: <ff60b19b-ecd8-44cd-b7d3-f7755fe49934@kernel.dk> (raw)
In-Reply-To: <20260320182341.780295-3-daniele.di.proietto@gmail.com>
On 3/20/26 12:23 PM, Daniele Di Proietto wrote:
> diff --git a/fs/file.c b/fs/file.c
> index 384c83ce768d..64d712ef89b5 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -285,9 +285,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
> * Return <0 error code on error; 0 on success.
> * The files->file_lock should be held on entry, and will be held on exit.
> */
> -static int expand_files(struct files_struct *files, unsigned int nr)
> - __releases(files->file_lock)
> - __acquires(files->file_lock)
> +int expand_files(struct files_struct *files, unsigned int nr)
> + __releases(files->file_lock) __acquires(files->file_lock)
> {
> struct fdtable *fdt;
> int error;
> @@ -1291,13 +1290,33 @@ bool get_close_on_exec(unsigned int fd)
> return res;
> }
>
> -static int do_dup2(struct files_struct *files,
> - struct file *file, unsigned fd, unsigned flags)
> -__releases(&files->file_lock)
> +/**
> + * do_replace_fd_locked() - Installs a file on a specific fd number.
> + * @files: struct files_struct to install the file on.
> + * @file: struct file to be installed.
> + * @fd: number in the files table to replace
> + * @flags: the O_* flags to apply to the new fd entry
> + *
> + * Installs a @file on the specific @fd number on the @files table.
> + *
> + * The caller must makes sure that @fd fits inside the @files table, likely via
> + * expand_files().
> + *
> + * Requires holding @files->file_lock.
> + *
> + * This helper handles its own reference counting of the incoming
> + * struct file.
> + *
> + * Returns a preexisting file in @fd, if any, NULL, if none or an error.
> + */
> +struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
> + unsigned int fd, unsigned int flags)
> {
> struct file *tofree;
> struct fdtable *fdt;
>
> + lockdep_assert_held(&files->file_lock);
> +
> /*
> * dup2() is expected to close the file installed in the target fd slot
> * (if any). However, userspace hand-picking a fd may be racing against
> @@ -1328,26 +1347,19 @@ __releases(&files->file_lock)
> fd = array_index_nospec(fd, fdt->max_fds);
> tofree = rcu_dereference_raw(fdt->fd[fd]);
> if (!tofree && fd_is_open(fd, fdt))
> - goto Ebusy;
> + return ERR_PTR(-EBUSY);
> get_file(file);
> rcu_assign_pointer(fdt->fd[fd], file);
> __set_open_fd(fd, fdt, flags & O_CLOEXEC);
> - spin_unlock(&files->file_lock);
> -
> - if (tofree)
> - filp_close(tofree, files);
> -
> - return fd;
>
> -Ebusy:
> - spin_unlock(&files->file_lock);
> - return -EBUSY;
> + return tofree;
> }
>
> int replace_fd(unsigned fd, struct file *file, unsigned flags)
> {
> - int err;
> struct files_struct *files = current->files;
> + struct file *tofree;
> + int err;
>
> if (!file)
> return close_fd(fd);
> @@ -1359,9 +1371,14 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
> err = expand_files(files, fd);
> if (unlikely(err < 0))
> goto out_unlock;
> - err = do_dup2(files, file, fd, flags);
> - if (err < 0)
> - return err;
> + tofree = do_replace_fd_locked(files, file, fd, flags);
> + spin_unlock(&files->file_lock);
> +
> + if (IS_ERR(tofree))
> + return PTR_ERR(tofree);
> +
> + if (tofree)
> + filp_close(tofree, files);
> return 0;
>
> out_unlock:
> @@ -1422,11 +1439,29 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
> return new_fd;
> }
>
> -static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> +static struct file *do_dup3(struct files_struct *files, unsigned int oldfd,
> + unsigned int newfd, int flags)
> + __releases(files->file_lock) __acquires(files->file_lock)
> {
> - int err = -EBADF;
> struct file *file;
> + int err = 0;
> +
> + err = expand_files(files, newfd);
> + file = files_lookup_fd_locked(files, oldfd);
> + if (unlikely(!file))
> + return ERR_PTR(-EBADF);
> + if (err < 0) {
> + if (err == -EMFILE)
> + err = -EBADF;
> + return ERR_PTR(err);
> + }
> + return do_replace_fd_locked(files, file, newfd, flags);
> +}
> +
> +static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> +{
> struct files_struct *files = current->files;
> + struct file *to_free;
>
> if ((flags & ~O_CLOEXEC) != 0)
> return -EINVAL;
> @@ -1438,22 +1473,15 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> return -EBADF;
>
> spin_lock(&files->file_lock);
> - err = expand_files(files, newfd);
> - file = files_lookup_fd_locked(files, oldfd);
> - if (unlikely(!file))
> - goto Ebadf;
> - if (unlikely(err < 0)) {
> - if (err == -EMFILE)
> - goto Ebadf;
> - goto out_unlock;
> - }
> - return do_dup2(files, file, newfd, flags);
> -
> -Ebadf:
> - err = -EBADF;
> -out_unlock:
> + to_free = do_dup3(files, oldfd, newfd, flags);
> spin_unlock(&files->file_lock);
> - return err;
> +
> + if (IS_ERR(to_free))
> + return PTR_ERR(to_free);
> + if (to_free)
> + filp_close(to_free, files);
> +
> + return newfd;
> }
>
> SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
> diff --git a/fs/internal.h b/fs/internal.h
> index cbc384a1aa09..c3d1eaf65328 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -197,6 +197,11 @@ extern struct file *do_file_open_root(const struct path *,
> extern struct open_how build_open_how(int flags, umode_t mode);
> extern int build_open_flags(const struct open_how *how, struct open_flags *op);
> struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
> +struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
> + unsigned int fd, unsigned int flags)
> + __must_hold(files->file_lock);
> +int expand_files(struct files_struct *files, unsigned int nr)
> + __releases(files->file_lock) __acquires(files->file_lock);
>
> int do_ftruncate(struct file *file, loff_t length, int small);
> int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
All of the above should be a separate prep patch, not part of the
io_uring patch adding IORING_OP_DUP.
> diff --git a/io_uring/openclose.c b/io_uring/openclose.c
> index c71242915dad..2658adbfd17a 100644
> --- a/io_uring/openclose.c
> +++ b/io_uring/openclose.c
> @@ -446,3 +454,175 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
> fput(files[1]);
> return ret;
> }
> +
> +void io_dup_cleanup(struct io_kiocb *req)
> +{
> + struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
> +
> + if (id->rsrc_node)
> + io_put_rsrc_node(req->ctx, id->rsrc_node);
> +}
Probably doesn't hurt to be defensive and clear ->rsrc_node when put.
> +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> + struct io_dup *id;
> +
> + if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
> + return -EINVAL;
> +
> + id = io_kiocb_to_cmd(req, struct io_dup);
> + id->flags = READ_ONCE(sqe->dup_flags);
> + if (id->flags & ~(IORING_DUP_NO_CLOEXEC | IORING_DUP_OLD_FIXED |
> + IORING_DUP_NEW_FIXED))
> + return -EINVAL;
Would be cleaner with an IORING_DUP_FLAGS mask above io_dup_prep().
> +static struct file *io_dup_get_old_file_fixed(struct io_kiocb *req,
> + unsigned int issue_flags,
> + unsigned int file_slot)
> +{
> + struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
> + struct file *file = NULL;
> +
> + if (!id->rsrc_node)
> + id->rsrc_node =
> + io_file_get_fixed_node(req, file_slot, issue_flags);
Just use the full line length, io_uring isn't a stickler on 80 chars and
it'd read better as:
if (!id->rsrc_node)
id->rsrc_node = io_file_get_fixed_node(req, file_slot, issue_flags);
Need to check further, but I'm assuming the difference here is for retry
where the node could already be assigned?
> +static int io_dup_to_fd(struct io_kiocb *req, unsigned int issue_flags,
> + bool old_fixed, int old_fd, int new_fd, int o_flags)
> +{
> + struct file *old_file, *to_replace, *to_close = NULL;
> + bool non_block = issue_flags & IO_URING_F_NONBLOCK;
> + struct files_struct *files = current->files;
> + int ret;
> +
> + if (new_fd >= rlimit(RLIMIT_NOFILE))
> + return -EBADF;
> +
> + if (old_fixed)
> + old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
> +
> + spin_lock(&files->file_lock);
If you use:
guard(spinlock)(&files->file_lock);
and move:
> +
> + /* Do we need to expand? If so, be safe and punt to async */
> + if (new_fd >= files_fdtable(files)->max_fds && non_block)
> + goto out_again;
> + ret = expand_files(files, new_fd);
> + if (ret < 0)
> + goto out_unlock;
> +
> + if (!old_fixed)
> + old_file = files_lookup_fd_locked(files, old_fd);
> +
> + ret = -EBADF;
> + if (!old_file)
> + goto out_unlock;
> +
> + to_replace = files_lookup_fd_locked(files, new_fd);
> + if (to_replace) {
> + if (io_is_uring_fops(to_replace))
> + goto out_unlock;
> +
> + /* if the file has a flush method, be safe and punt to async */
> + if (to_replace->f_op->flush && non_block)
> + goto out_again;
> + }
> + to_close = do_replace_fd_locked(files, old_file, new_fd, o_flags);
> + ret = new_fd;
into a helper, all of this somewhat messy goto error_handling stuff can
go away.
--
Jens Axboe
next prev parent reply other threads:[~2026-03-20 19:34 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-20 18:23 [PATCH v2 0/2] New IORING_OP_DUP Daniele Di Proietto
2026-03-20 18:23 ` [PATCH v2 1/2] io_uring: Extract io_file_get_fixed_node helper Daniele Di Proietto
2026-03-20 18:23 ` [PATCH v2 2/2] io_uring: Add IORING_OP_DUP Daniele Di Proietto
2026-03-20 19:34 ` Jens Axboe [this message]
2026-03-21 23:26 ` Daniele Di Proietto
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ff60b19b-ecd8-44cd-b7d3-f7755fe49934@kernel.dk \
--to=axboe@kernel.dk \
--cc=asml.silence@gmail.com \
--cc=brauner@kernel.org \
--cc=daniele.di.proietto@gmail.com \
--cc=io-uring@vger.kernel.org \
--cc=jack@suse.cz \
--cc=kbusch@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.