public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Christian Brauner <brauner@kernel.org>
To: Mateusz Guzik <mjguzik@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
	viro@zeniv.linux.org.uk, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, oleg@redhat.com,
	Matthew Wilcox <willy@infradead.org>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: Re: [PATCH v2 (kindof)] fs: use __fput_sync in close(2)
Date: Tue, 8 Aug 2023 18:30:01 +0200	[thread overview]
Message-ID: <20230808-lebst-vorgibt-75c3010b4e54@brauner> (raw)
In-Reply-To: <CAGudoHGqRr_WNz86pmgK9Kmnwsox+_XXqqbp+rLW53e5t8higg@mail.gmail.com>

On Tue, Aug 08, 2023 at 05:07:22PM +0200, Mateusz Guzik wrote:
> I slapped the following variant just for illustration purposes.
> 
> - adds __close_fd which returns a struct file
> - adds __filp_close with a flag whether to fput
> - makes close(2) use both
> - transparent to everyone else
> 
> Downside is that __fput_sync still loses the assert. Instead of
> losing, it could perhaps be extended with a hack to check syscall
> number -- pass if either this is close (or binary compat close) or a
> kthread, BUG out otherwise. Alternatively perhaps deref could be
> opencoded along with a comment about real fput that this is taking
> place. Or maybe some other cosmetic choice.
> 
> I cannot compile-test right now, so down below is a rough copy make
> sure it is clear what I mean.
> 
> I feel compelled to note that simple patches get microbenchmarked all
> the time, with these results being the only justification provided.
> I'm confused why this patch is supposed to be an exception given its
> simplicity.
> 
> Serious justification should be expected from tough calls --
> complicated, invasive changes, maybe with numerous tradeoffs.
> 
> In contrast close(2) doing __fput_sync looks a clear cut thing to do,
> at worst one can argue which way to do it.
> 
> diff --git a/fs/file.c b/fs/file.c
> index 3fd003a8604f..c341b07533b0 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -651,20 +651,30 @@ static struct file *pick_file(struct
> files_struct *files, unsigned fd)
>         return file;
>  }
> 
> -int close_fd(unsigned fd)
> +struct file *__close_fd(unsigned fd, struct file_struct *files)
>  {
> -       struct files_struct *files = current->files;
>         struct file *file;
> 
>         spin_lock(&files->file_lock);
>         file = pick_file(files, fd);
>         spin_unlock(&files->file_lock);
> +
> +       return file;
> +}
> +EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
> +
> +int close_fd(unsigned fd)
> +{
> +       struct files_struct *files = current->files;
> +       struct file *file;
> +
> +       file = __close_fd(fd, files);
>         if (!file)
>                 return -EBADF;
> 
>         return filp_close(file, files);
>  }
> -EXPORT_SYMBOL(close_fd); /* for ksys_close() */
> +EXPORT_SYMBOL(close_fd);
> 
>  /**
>   * last_fd - return last valid index into fd table
> diff --git a/fs/file_table.c b/fs/file_table.c
> index fc7d677ff5ad..b7461f0b73f4 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -463,6 +463,11 @@ void __fput_sync(struct file *file)
>  {
>         if (atomic_long_dec_and_test(&file->f_count)) {
>                 struct task_struct *task = current;
> +               /*
> +                * I see 2 basic options
> +                * 1. just remove the assert
> +                * 2. demand the flag *or* that the caller is close(2)
> +                */
>                 BUG_ON(!(task->flags & PF_KTHREAD));
>                 __fput(file);
>         }
> diff --git a/fs/open.c b/fs/open.c
> index e6ead0f19964..b1602307c1c3 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -1533,7 +1533,16 @@ EXPORT_SYMBOL(filp_close);
>   */
>  SYSCALL_DEFINE1(close, unsigned int, fd)
>  {
> -       int retval = close_fd(fd);
> +       struct files_struct *files = current->files;
> +       struct file *file;
> +       int retval;
> +
> +       file = __close_fd(fd);
> +       if (!file)
> +               return -EBADF;
> +
> +       retval = __filp_close(file, files, false);
> +       __fput_sync(file);
> 
>         /* can't restart close syscall because file table entry was cleared */
>         if (unlikely(retval == -ERESTARTSYS ||
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 562f2623c9c9..e64c0238a65f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2388,7 +2388,11 @@ static inline struct file
> *file_clone_open(struct file *file)
>  {
>         return dentry_open(&file->f_path, file->f_flags, file->f_cred);
>  }
> -extern int filp_close(struct file *, fl_owner_t id);
> +extern int __filp_close(struct file *file, fl_owner_t id, bool dofput);
> +static inline int filp_close(struct file *file, fl_owner_t id)
> +{
> +       return __filp_close(file, id, true);
> +}
> 
>  extern struct filename *getname_flags(const char __user *, int, int *);
>  extern struct filename *getname_uflags(const char __user *, int);

At least make this really dumb and obvious and keep the ugliness to
internal.h and open.c

---
 fs/file_table.c | 10 ++++++++++
 fs/internal.h   |  1 +
 fs/open.c       | 21 +++++++++++++++------
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index fc7d677ff5ad..18f8adaba972 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -471,6 +471,16 @@ void __fput_sync(struct file *file)
 EXPORT_SYMBOL(fput);
 EXPORT_SYMBOL(__fput_sync);
 
+/*
+ * Same as __fput_sync() but for regular close.
+ * Not exported, not for general use.
+ */
+void fput_sync(struct file *file)
+{
+	if (atomic_long_dec_and_test(&file->f_count))
+		__fput(file);
+}
+
 void __init files_init(void)
 {
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/internal.h b/fs/internal.h
index f7a3dc111026..1ad2a4ce728b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -297,6 +297,7 @@ static inline ssize_t do_get_acl(struct mnt_idmap *idmap,
 #endif
 
 ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
+void fput_sync(struct file *file);
 
 /*
  * fs/attr.c
diff --git a/fs/open.c b/fs/open.c
index e6ead0f19964..2540b22fb114 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1499,11 +1499,7 @@ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
 }
 #endif
 
-/*
- * "id" is the POSIX thread ID. We use the
- * files pointer for this..
- */
-int filp_close(struct file *filp, fl_owner_t id)
+static int __filp_close(struct file *filp, fl_owner_t id, bool may_delay)
 {
 	int retval = 0;
 
@@ -1520,10 +1516,23 @@ int filp_close(struct file *filp, fl_owner_t id)
 		dnotify_flush(filp, id);
 		locks_remove_posix(filp, id);
 	}
-	fput(filp);
+
+	if (may_delay)
+		fput(filp);
+	else
+		fput_sync(filp);
 	return retval;
 }
 
+/*
+ * "id" is the POSIX thread ID. We use the
+ * files pointer for this..
+ */
+int filp_close(struct file *filp, fl_owner_t id)
+{
+	return __filp_close(filp, id, true);
+}
+
 EXPORT_SYMBOL(filp_close);
 
 /*
-- 
2.34.1


  reply	other threads:[~2023-08-08 19:09 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-06 23:06 [PATCH] fs: use __fput_sync in close(2) Mateusz Guzik
2023-08-07  3:18 ` Matthew Wilcox
2023-08-08  5:56 ` Eric W. Biederman
2023-08-08  7:32   ` Mateusz Guzik
2023-08-08  8:13   ` Christian Brauner
2023-08-08  8:23     ` Mateusz Guzik
2023-08-08  8:40       ` Christian Brauner
2023-08-08  9:21         ` Mateusz Guzik
2023-08-08 15:07           ` [PATCH v2 (kindof)] " Mateusz Guzik
2023-08-08 16:30             ` Christian Brauner [this message]
2023-08-08 17:00               ` Christian Brauner
2023-08-08 17:05               ` Linus Torvalds
2023-08-08 17:06                 ` Christian Brauner
2023-08-09  9:03                 ` David Laight
2023-08-08 16:57   ` [PATCH] " Linus Torvalds
2023-08-08 17:10     ` Mateusz Guzik
2023-08-08 17:18       ` Linus Torvalds
2023-08-08 17:24         ` Mateusz Guzik
2023-08-08 17:35           ` Christian Brauner
2023-08-08 17:48             ` Linus Torvalds
2023-08-08 17:15     ` Christian Brauner
2023-08-08 17:22       ` Linus Torvalds
2023-08-08 17:48         ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230808-lebst-vorgibt-75c3010b4e54@brauner \
    --to=brauner@kernel.org \
    --cc=ebiederm@xmission.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mjguzik@gmail.com \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox