public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: Daniele Di Proietto <daniele.di.proietto@gmail.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: io-uring@vger.kernel.org, Keith Busch <kbusch@kernel.org>,
	 Pavel Begunkov <asml.silence@gmail.com>,
	linux-fsdevel@vger.kernel.org,
	 Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>
Subject: Re: [PATCH v2 2/2] io_uring: Add IORING_OP_DUP
Date: Sat, 21 Mar 2026 23:26:57 +0000	[thread overview]
Message-ID: <CAExiqTJ5JKOeb-9rKy15i4hQw341EvpSc0aQNt1iMxxurHbZVQ@mail.gmail.com> (raw)
In-Reply-To: <ff60b19b-ecd8-44cd-b7d3-f7755fe49934@kernel.dk>

On Fri, Mar 20, 2026 at 7:34 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 3/20/26 12:23 PM, Daniele Di Proietto wrote:
> > diff --git a/fs/file.c b/fs/file.c
> > index 384c83ce768d..64d712ef89b5 100644
> > --- a/fs/file.c
> > +++ b/fs/file.c
> > @@ -285,9 +285,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
> >   * Return <0 error code on error; 0 on success.
> >   * The files->file_lock should be held on entry, and will be held on exit.
> >   */
> > -static int expand_files(struct files_struct *files, unsigned int nr)
> > -     __releases(files->file_lock)
> > -     __acquires(files->file_lock)
> > +int expand_files(struct files_struct *files, unsigned int nr)
> > +     __releases(files->file_lock) __acquires(files->file_lock)
> >  {
> >       struct fdtable *fdt;
> >       int error;
> > @@ -1291,13 +1290,33 @@ bool get_close_on_exec(unsigned int fd)
> >       return res;
> >  }
> >
> > -static int do_dup2(struct files_struct *files,
> > -     struct file *file, unsigned fd, unsigned flags)
> > -__releases(&files->file_lock)
> > +/**
> > + * do_replace_fd_locked() - Installs a file on a specific fd number.
> > + * @files: struct files_struct to install the file on.
> > + * @file: struct file to be installed.
> > + * @fd: number in the files table to replace
> > + * @flags: the O_* flags to apply to the new fd entry
> > + *
> > + * Installs a @file on the specific @fd number on the @files table.
> > + *
> > + * The caller must makes sure that @fd fits inside the @files table, likely via
> > + * expand_files().
> > + *
> > + * Requires holding @files->file_lock.
> > + *
> > + * This helper handles its own reference counting of the incoming
> > + * struct file.
> > + *
> > + * Returns a preexisting file in @fd, if any, NULL, if none or an error.
> > + */
> > +struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
> > +                               unsigned int fd, unsigned int flags)
> >  {
> >       struct file *tofree;
> >       struct fdtable *fdt;
> >
> > +     lockdep_assert_held(&files->file_lock);
> > +
> >       /*
> >        * dup2() is expected to close the file installed in the target fd slot
> >        * (if any). However, userspace hand-picking a fd may be racing against
> > @@ -1328,26 +1347,19 @@ __releases(&files->file_lock)
> >       fd = array_index_nospec(fd, fdt->max_fds);
> >       tofree = rcu_dereference_raw(fdt->fd[fd]);
> >       if (!tofree && fd_is_open(fd, fdt))
> > -             goto Ebusy;
> > +             return ERR_PTR(-EBUSY);
> >       get_file(file);
> >       rcu_assign_pointer(fdt->fd[fd], file);
> >       __set_open_fd(fd, fdt, flags & O_CLOEXEC);
> > -     spin_unlock(&files->file_lock);
> > -
> > -     if (tofree)
> > -             filp_close(tofree, files);
> > -
> > -     return fd;
> >
> > -Ebusy:
> > -     spin_unlock(&files->file_lock);
> > -     return -EBUSY;
> > +     return tofree;
> >  }
> >
> >  int replace_fd(unsigned fd, struct file *file, unsigned flags)
> >  {
> > -     int err;
> >       struct files_struct *files = current->files;
> > +     struct file *tofree;
> > +     int err;
> >
> >       if (!file)
> >               return close_fd(fd);
> > @@ -1359,9 +1371,14 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
> >       err = expand_files(files, fd);
> >       if (unlikely(err < 0))
> >               goto out_unlock;
> > -     err = do_dup2(files, file, fd, flags);
> > -     if (err < 0)
> > -             return err;
> > +     tofree = do_replace_fd_locked(files, file, fd, flags);
> > +     spin_unlock(&files->file_lock);
> > +
> > +     if (IS_ERR(tofree))
> > +             return PTR_ERR(tofree);
> > +
> > +     if (tofree)
> > +             filp_close(tofree, files);
> >       return 0;
> >
> >  out_unlock:
> > @@ -1422,11 +1439,29 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
> >       return new_fd;
> >  }
> >
> > -static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> > +static struct file *do_dup3(struct files_struct *files, unsigned int oldfd,
> > +                         unsigned int newfd, int flags)
> > +     __releases(files->file_lock) __acquires(files->file_lock)
> >  {
> > -     int err = -EBADF;
> >       struct file *file;
> > +     int err = 0;
> > +
> > +     err = expand_files(files, newfd);
> > +     file = files_lookup_fd_locked(files, oldfd);
> > +     if (unlikely(!file))
> > +             return ERR_PTR(-EBADF);
> > +     if (err < 0) {
> > +             if (err == -EMFILE)
> > +                     err = -EBADF;
> > +             return ERR_PTR(err);
> > +     }
> > +     return do_replace_fd_locked(files, file, newfd, flags);
> > +}
> > +
> > +static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> > +{
> >       struct files_struct *files = current->files;
> > +     struct file *to_free;
> >
> >       if ((flags & ~O_CLOEXEC) != 0)
> >               return -EINVAL;
> > @@ -1438,22 +1473,15 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> >               return -EBADF;
> >
> >       spin_lock(&files->file_lock);
> > -     err = expand_files(files, newfd);
> > -     file = files_lookup_fd_locked(files, oldfd);
> > -     if (unlikely(!file))
> > -             goto Ebadf;
> > -     if (unlikely(err < 0)) {
> > -             if (err == -EMFILE)
> > -                     goto Ebadf;
> > -             goto out_unlock;
> > -     }
> > -     return do_dup2(files, file, newfd, flags);
> > -
> > -Ebadf:
> > -     err = -EBADF;
> > -out_unlock:
> > +     to_free = do_dup3(files, oldfd, newfd, flags);
> >       spin_unlock(&files->file_lock);
> > -     return err;
> > +
> > +     if (IS_ERR(to_free))
> > +             return PTR_ERR(to_free);
> > +     if (to_free)
> > +             filp_close(to_free, files);
> > +
> > +     return newfd;
> >  }
> >
> >  SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
> > diff --git a/fs/internal.h b/fs/internal.h
> > index cbc384a1aa09..c3d1eaf65328 100644
> > --- a/fs/internal.h
> > +++ b/fs/internal.h
> > @@ -197,6 +197,11 @@ extern struct file *do_file_open_root(const struct path *,
> >  extern struct open_how build_open_how(int flags, umode_t mode);
> >  extern int build_open_flags(const struct open_how *how, struct open_flags *op);
> >  struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
> > +struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
> > +                               unsigned int fd, unsigned int flags)
> > +     __must_hold(files->file_lock);
> > +int expand_files(struct files_struct *files, unsigned int nr)
> > +     __releases(files->file_lock) __acquires(files->file_lock);
> >
> >  int do_ftruncate(struct file *file, loff_t length, int small);
> >  int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
>
> All of the above should be a separate prep patch, not part of the
> io_uring patch adding IORING_OP_DUP.

Done, thanks!

>
> > diff --git a/io_uring/openclose.c b/io_uring/openclose.c
> > index c71242915dad..2658adbfd17a 100644
> > --- a/io_uring/openclose.c
> > +++ b/io_uring/openclose.c
> > @@ -446,3 +454,175 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
> >               fput(files[1]);
> >       return ret;
> >  }
> > +
> > +void io_dup_cleanup(struct io_kiocb *req)
> > +{
> > +     struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
> > +
> > +     if (id->rsrc_node)
> > +             io_put_rsrc_node(req->ctx, id->rsrc_node);
> > +}
>
> Probably doesn't hurt to be defensive and clear ->rsrc_node when put.

Done, thanks!

>
> > +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> > +{
> > +     struct io_dup *id;
> > +
> > +     if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
> > +             return -EINVAL;
> > +
> > +     id = io_kiocb_to_cmd(req, struct io_dup);
> > +     id->flags = READ_ONCE(sqe->dup_flags);
> > +     if (id->flags & ~(IORING_DUP_NO_CLOEXEC | IORING_DUP_OLD_FIXED |
> > +                       IORING_DUP_NEW_FIXED))
> > +             return -EINVAL;
>
> Would be cleaner with an IORING_DUP_FLAGS mask above io_dup_prep().

Done, thanks!

>
> > +static struct file *io_dup_get_old_file_fixed(struct io_kiocb *req,
> > +                                           unsigned int issue_flags,
> > +                                           unsigned int file_slot)
> > +{
> > +     struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
> > +     struct file *file = NULL;
> > +
> > +     if (!id->rsrc_node)
> > +             id->rsrc_node =
> > +                     io_file_get_fixed_node(req, file_slot, issue_flags);
>
> Just use the full line length, io_uring isn't a stickler on 80 chars and
> it'd read better as:

Done, thanks!

>
>         if (!id->rsrc_node)
>                 id->rsrc_node = io_file_get_fixed_node(req, file_slot, issue_flags);
>
> Need to check further, but I'm assuming the difference here is for retry
> where the node could already be assigned?

That's right

>
>
> > +static int io_dup_to_fd(struct io_kiocb *req, unsigned int issue_flags,
> > +                     bool old_fixed, int old_fd, int new_fd, int o_flags)
> > +{
> > +     struct file *old_file, *to_replace, *to_close = NULL;
> > +     bool non_block = issue_flags & IO_URING_F_NONBLOCK;
> > +     struct files_struct *files = current->files;
> > +     int ret;
> > +
> > +     if (new_fd >= rlimit(RLIMIT_NOFILE))
> > +             return -EBADF;
> > +
> > +     if (old_fixed)
> > +             old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
> > +
> > +     spin_lock(&files->file_lock);
>
> If you use:
>
>         guard(spinlock)(&files->file_lock);
>
> and move:
>
> > +
> > +     /* Do we need to expand? If so, be safe and punt to async */
> > +     if (new_fd >= files_fdtable(files)->max_fds && non_block)
> > +             goto out_again;
> > +     ret = expand_files(files, new_fd);
> > +     if (ret < 0)
> > +             goto out_unlock;
> > +
> > +     if (!old_fixed)
> > +             old_file = files_lookup_fd_locked(files, old_fd);
> > +
> > +     ret = -EBADF;
> > +     if (!old_file)
> > +             goto out_unlock;
> > +
> > +     to_replace = files_lookup_fd_locked(files, new_fd);
> > +     if (to_replace) {
> > +             if (io_is_uring_fops(to_replace))
> > +                     goto out_unlock;
> > +
> > +             /* if the file has a flush method, be safe and punt to async */
> > +             if (to_replace->f_op->flush && non_block)
> > +                     goto out_again;
> > +     }
> > +     to_close = do_replace_fd_locked(files, old_file, new_fd, o_flags);
> > +     ret = new_fd;
>
> into a helper, all of this somewhat messy goto error_handling stuff can
> go away.

Neat!

I didn't move it into a separate helper, because it has to return three things:
* The completeness of the operation (IOU_COMPLETE or -EAGAIN)
* An eventual error
* The file to close

Using a guard in a block and another small helper I managed to avoid
the gotos, as suggested.

Thanks!

>
> --
> Jens Axboe

      reply	other threads:[~2026-03-21 23:27 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-20 18:23 [PATCH v2 0/2] New IORING_OP_DUP Daniele Di Proietto
2026-03-20 18:23 ` [PATCH v2 1/2] io_uring: Extract io_file_get_fixed_node helper Daniele Di Proietto
2026-03-20 18:23 ` [PATCH v2 2/2] io_uring: Add IORING_OP_DUP Daniele Di Proietto
2026-03-20 19:34   ` Jens Axboe
2026-03-21 23:26     ` Daniele Di Proietto [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAExiqTJ5JKOeb-9rKy15i4hQw341EvpSc0aQNt1iMxxurHbZVQ@mail.gmail.com \
    --to=daniele.di.proietto@gmail.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=io-uring@vger.kernel.org \
    --cc=jack@suse.cz \
    --cc=kbusch@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox