From: Al Viro <viro@zeniv.linux.org.uk>
To: linux-fsdevel@vger.kernel.org
Cc: Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>
Subject: [PATCH][RFC] kernel/acct.c: saner struct file treatment
Date: Sun, 6 Jul 2025 20:58:44 +0100 [thread overview]
Message-ID: <20250706195844.GD1880847@ZenIV> (raw)
Instead of switching ->f_path.mnt of an opened file to internal
clone, resolve the pathname, get a struct path with ->mnt set to internal
clone, then dentry_open() that to get the file with right ->f_path.mnt
from the very beginning.
The only subtle part here is that on failure exits we need to
close the file with __fput_sync() and make sure we do that *before*
dropping the original mount.
With that done, only fs/{file_table,open,namei}.c ever store
anything to file->f_path and only prior to file->f_mode & FMODE_OPENED
becoming true. Analysis of mount write count handling also becomes
less brittle and convoluted...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/kernel/acct.c b/kernel/acct.c
index 6520baa13669..30ae403ee322 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -44,19 +44,14 @@
* a struct file opened for write. Fixed. 2/6/2000, AV.
*/
-#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/acct.h>
#include <linux/capability.h>
-#include <linux/file.h>
#include <linux/tty.h>
-#include <linux/security.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
#include <linux/jiffies.h>
-#include <linux/times.h>
#include <linux/syscalls.h>
-#include <linux/mount.h>
-#include <linux/uaccess.h>
+#include <linux/namei.h>
#include <linux/sched/cputime.h>
#include <asm/div64.h>
@@ -217,84 +212,68 @@ static void close_work(struct work_struct *work)
complete(&acct->done);
}
-static int acct_on(struct filename *pathname)
+DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T))
+static int acct_on(const char __user *name)
{
- struct file *file;
- struct vfsmount *mnt, *internal;
+ /* Difference from BSD - they don't do O_APPEND */
+ const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE;
struct pid_namespace *ns = task_active_pid_ns(current);
+ struct path path __free(path_put) = {}; // in that order
+ struct path internal __free(path_put) = {}; // in that order
+ struct file *file __free(fput_sync) = NULL; // in that order
struct bsd_acct_struct *acct;
+ struct vfsmount *mnt;
struct fs_pin *old;
int err;
- acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
- if (!acct)
- return -ENOMEM;
+ err = user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, &path);
+ if (err)
+ return err;
- /* Difference from BSD - they don't do O_APPEND */
- file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
- if (IS_ERR(file)) {
- kfree(acct);
+ mnt = mnt_clone_internal(&path);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ internal.mnt = mnt;
+ internal.dentry = dget(mnt->mnt_root);
+
+ file = dentry_open(&internal, open_flags, current_cred());
+ if (IS_ERR(file))
return PTR_ERR(file);
- }
- if (!S_ISREG(file_inode(file)->i_mode)) {
- kfree(acct);
- filp_close(file, NULL);
+ if (!S_ISREG(file_inode(file)->i_mode))
return -EACCES;
- }
/* Exclude kernel kernel internal filesystems. */
- if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
- kfree(acct);
- filp_close(file, NULL);
+ if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT))
return -EINVAL;
- }
/* Exclude procfs and sysfs. */
- if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
- kfree(acct);
- filp_close(file, NULL);
+ if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE)
return -EINVAL;
- }
- if (!(file->f_mode & FMODE_CAN_WRITE)) {
- kfree(acct);
- filp_close(file, NULL);
+ if (!(file->f_mode & FMODE_CAN_WRITE))
return -EIO;
- }
- internal = mnt_clone_internal(&file->f_path);
- if (IS_ERR(internal)) {
- kfree(acct);
- filp_close(file, NULL);
- return PTR_ERR(internal);
- }
- err = mnt_get_write_access(internal);
- if (err) {
- mntput(internal);
- kfree(acct);
- filp_close(file, NULL);
- return err;
- }
- mnt = file->f_path.mnt;
- file->f_path.mnt = internal;
+
+ acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+ if (!acct)
+ return -ENOMEM;
atomic_long_set(&acct->count, 1);
init_fs_pin(&acct->pin, acct_pin_kill);
- acct->file = file;
+ acct->file = no_free_ptr(file);
acct->needcheck = jiffies;
acct->ns = ns;
mutex_init(&acct->lock);
INIT_WORK(&acct->work, close_work);
init_completion(&acct->done);
mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
- pin_insert(&acct->pin, mnt);
+ pin_insert(&acct->pin, path.mnt);
rcu_read_lock();
old = xchg(&ns->bacct, &acct->pin);
mutex_unlock(&acct->lock);
pin_kill(old);
- mnt_put_write_access(mnt);
- mntput(mnt);
return 0;
}
@@ -319,14 +298,9 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
return -EPERM;
if (name) {
- struct filename *tmp = getname(name);
-
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
mutex_lock(&acct_on_mutex);
- error = acct_on(tmp);
+ error = acct_on(name);
mutex_unlock(&acct_on_mutex);
- putname(tmp);
} else {
rcu_read_lock();
pin_kill(task_active_pid_ns(current)->bacct);
next reply other threads:[~2025-07-06 19:58 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-06 19:58 Al Viro [this message]
2025-07-07 8:27 ` [PATCH][RFC] kernel/acct.c: saner struct file treatment Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250706195844.GD1880847@ZenIV \
--to=viro@zeniv.linux.org.uk \
--cc=brauner@kernel.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.