public inbox for audit@vger.kernel.org
 help / color / mirror / Atom feed
From: Richard Guy Briggs <rgb@redhat.com>
To: Waiman Long <longman@redhat.com>
Cc: Paul Moore <paul@paul-moore.com>, Eric Paris <eparis@redhat.com>,
	Christian Brauner <brauner@kernel.org>,
	Al Viro <viro@zeniv.linux.org.uk>,
	linux-kernel@vger.kernel.org, audit@vger.kernel.org,
	Ricardo Robaina <rrobaina@redhat.com>
Subject: Re: [PATCH v3 1/2] fs: Add a pool of extra fs->pwd references to fs_struct
Date: Fri, 27 Feb 2026 11:37:18 -0500	[thread overview]
Message-ID: <aaHIPoPomC4PRImW@madcap2.tricolour.ca> (raw)
In-Reply-To: <20260206201918.1988344-2-longman@redhat.com>

On 2026-02-06 15:19, Waiman Long wrote:
> When the audit subsystem is enabled, it can do a lot of get_fs_pwd()
> calls to get references to fs->pwd and then releasing those references
> back with path_put() later. That may cause a lot of spinlock contention
> on a single pwd's dentry lock because of the constant changes to the
> reference count when there are many processes on the same working
> directory actively doing open/close system calls. This can cause
> noticeable performance regresssion when compared with the case where
> the audit subsystem is turned off especially on systems with a lot of
> CPUs which is becoming more common these days.
> 
> A simple and elegant solution to avoid this kind of performance
> regression is to add a common pool of extra fs->pwd references inside
> the fs_struct. When a caller needs a pwd reference, it can borrow one
> from pool, if available, to avoid an explicit path_get(). When it is
> time to release the reference, it can put it back into the common pool
> if fs->pwd isn't changed before without doing a path_put(). We still
> need to acquire the fs's spinlock, but fs_struct is more distributed
> and it is less common to have many tasks sharing a single fs_struct.
> 
> A new set of get_fs_pwd_pool/put_fs_pwd_pool() APIs are introduced
> with this patch to enable other subsystems to acquire and release
> a pwd reference from the common pool without doing unnecessary
> path_get/path_put().
> 
> Besides fs/fs_struct.c, the copy_mnt_ns() function of fs/namespace.c is
> also modified to properly handle the extra pwd references, if available.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>

Reviewed-by: Richard Guy Briggs <rgb@redhat.com>

> ---
>  fs/fs_struct.c            | 26 +++++++++++++++++++++-----
>  fs/namespace.c            |  8 ++++++++
>  include/linux/fs_struct.h | 30 +++++++++++++++++++++++++++++-
>  3 files changed, 58 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/fs_struct.c b/fs/fs_struct.c
> index b8c46c5a38a0..621fe1677913 100644
> --- a/fs/fs_struct.c
> +++ b/fs/fs_struct.c
> @@ -32,15 +32,19 @@ void set_fs_root(struct fs_struct *fs, const struct path *path)
>  void set_fs_pwd(struct fs_struct *fs, const struct path *path)
>  {
>  	struct path old_pwd;
> +	int count;
>  
>  	path_get(path);
>  	write_seqlock(&fs->seq);
>  	old_pwd = fs->pwd;
>  	fs->pwd = *path;
> +	count = fs->pwd_refs + 1;
> +	fs->pwd_refs = 0;
>  	write_sequnlock(&fs->seq);
>  
>  	if (old_pwd.dentry)
> -		path_put(&old_pwd);
> +		while (count--)
> +			path_put(&old_pwd);
>  }
>  
>  static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
> @@ -62,10 +66,15 @@ void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
>  		task_lock(p);
>  		fs = p->fs;
>  		if (fs) {
> -			int hits = 0;
> +			int hits;
> +
>  			write_seqlock(&fs->seq);
> +			hits = replace_path(&fs->pwd, old_root, new_root);
> +			if (hits && fs->pwd_refs) {
> +				count += fs->pwd_refs;
> +				fs->pwd_refs = 0;
> +			}
>  			hits += replace_path(&fs->root, old_root, new_root);
> -			hits += replace_path(&fs->pwd, old_root, new_root);
>  			while (hits--) {
>  				count++;
>  				path_get(new_root);
> @@ -81,8 +90,11 @@ void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
>  
>  void free_fs_struct(struct fs_struct *fs)
>  {
> +	int count = fs->pwd_refs + 1;
> +
>  	path_put(&fs->root);
> -	path_put(&fs->pwd);
> +	while (count--)
> +		path_put(&fs->pwd);
>  	kmem_cache_free(fs_cachep, fs);
>  }
>  
> @@ -110,6 +122,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
>  	if (fs) {
>  		fs->users = 1;
>  		fs->in_exec = 0;
> +		fs->pwd_refs = 0;
>  		seqlock_init(&fs->seq);
>  		fs->umask = old->umask;
>  
> @@ -117,7 +130,10 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
>  		fs->root = old->root;
>  		path_get(&fs->root);
>  		fs->pwd = old->pwd;
> -		path_get(&fs->pwd);
> +		if (old->pwd_refs)
> +			old->pwd_refs--;
> +		else
> +			path_get(&fs->pwd);
>  		read_sequnlock_excl(&old->seq);
>  	}
>  	return fs;
> diff --git a/fs/namespace.c b/fs/namespace.c
> index c58674a20cad..a2323ba84d76 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -4135,6 +4135,14 @@ struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,
>  	 * as belonging to new namespace.  We have already acquired a private
>  	 * fs_struct, so tsk->fs->lock is not needed.
>  	 */
> +	if (new_fs)
> +		WARN_ON_ONCE(new_fs->users != 1);
> +
> +	/* Release the extra pwd references of new_fs, if present. */
> +	while (new_fs && new_fs->pwd_refs) {
> +		path_put(&new_fs->pwd);
> +		new_fs->pwd_refs--;
> +	}
>  	p = old;
>  	q = new;
>  	while (p) {
> diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
> index 0070764b790a..093648e65c20 100644
> --- a/include/linux/fs_struct.h
> +++ b/include/linux/fs_struct.h
> @@ -8,10 +8,11 @@
>  #include <linux/seqlock.h>
>  
>  struct fs_struct {
> -	int users;
>  	seqlock_t seq;
> +	int users;
>  	int umask;
>  	int in_exec;
> +	int pwd_refs;	/* A pool of extra pwd references */
>  	struct path root, pwd;
>  } __randomize_layout;
>  
> @@ -40,6 +41,33 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
>  	read_sequnlock_excl(&fs->seq);
>  }
>  
> +/* Acquire a pwd reference from the pwd_refs pool, if available */
> +static inline void get_fs_pwd_pool(struct fs_struct *fs, struct path *pwd)
> +{
> +	read_seqlock_excl(&fs->seq);
> +	*pwd = fs->pwd;
> +	if (fs->pwd_refs)
> +		fs->pwd_refs--;
> +	else
> +		path_get(pwd);
> +	read_sequnlock_excl(&fs->seq);
> +}
> +
> +/* Release a pwd reference back to the pwd_refs pool, if appropriate */
> +static inline void put_fs_pwd_pool(struct fs_struct *fs, struct path *pwd)
> +{
> +	bool put = false;
> +
> +	read_seqlock_excl(&fs->seq);
> +	if ((fs->pwd.dentry == pwd->dentry) && (fs->pwd.mnt == pwd->mnt))
> +		fs->pwd_refs++;
> +	else
> +		put = true;
> +	read_sequnlock_excl(&fs->seq);
> +	if (put)
> +		path_put(pwd);
> +}
> +
>  extern bool current_chrooted(void);
>  
>  static inline int current_umask(void)
> -- 
> 2.52.0
> 

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
Upstream IRC: SunRaycer
Voice: +1.613.860 2354 SMS: +1.613.518.6570


  reply	other threads:[~2026-02-27 16:37 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-06 20:19 [PATCH v3 0/2] fs, audit: Avoid excessive dput/dget in audit_context setup and reset paths Waiman Long
2026-02-06 20:19 ` [PATCH v3 1/2] fs: Add a pool of extra fs->pwd references to fs_struct Waiman Long
2026-02-27 16:37   ` Richard Guy Briggs [this message]
2026-02-06 20:19 ` [PATCH v3 2/2] audit: Use the new {get,put}_fs_pwd_pool() APIs to get/put pwd references Waiman Long
2026-02-27 16:37   ` Richard Guy Briggs
2026-02-12 16:36 ` [PATCH v3 0/2] fs, audit: Avoid excessive dput/dget in audit_context setup and reset paths Ricardo Robaina
2026-02-12 18:08 ` [RESEND PATCH " Waiman Long
2026-02-12 18:08 ` [RESEND PATCH v3 1/2] fs: Add a pool of extra fs->pwd references to fs_struct Waiman Long
2026-02-19 22:20   ` Paul Moore
2026-02-28 18:42     ` Waiman Long
2026-02-12 18:08 ` [RESEND PATCH v3 2/2] audit: Use the new {get,put}_fs_pwd_pool() APIs to get/put pwd references Waiman Long
2026-02-19 22:14   ` Paul Moore

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aaHIPoPomC4PRImW@madcap2.tricolour.ca \
    --to=rgb@redhat.com \
    --cc=audit@vger.kernel.org \
    --cc=brauner@kernel.org \
    --cc=eparis@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=paul@paul-moore.com \
    --cc=rrobaina@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox