linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nick Piggin <npiggin@suse.de>
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: Re: [patch 05/33] fs: scale mntget/mntput
Date: Mon, 7 Sep 2009 11:41:10 +0200	[thread overview]
Message-ID: <20090907094110.GI1875@wotan.suse.de> (raw)
In-Reply-To: <20090904065534.636133158@nick.local0.net>

On Fri, Sep 04, 2009 at 04:51:46PM +1000, npiggin@suse.de wrote:
> Improve scalability of mntget/mntput by using per-cpu counters protected
> by the reader side of the brlock vfsmount_lock. mnt_mounted keeps track of
> whether the vfsmount is actually attached to the tree so we can shortcut
> expensive checks in mntput.

Ah, I have a problem with count_mnt_count here...

count_mnt_count probably needs write lock otherwise you could count
CPU0, then counter is incremented on CPU0 and decremented on CPUN, then
you count the decrement on CPUN but have missed the increment.
count_mnt_count should be a slowpath anyway I think. may_umount_tree and
do_refcount_check I think need the write lock.

I think I was mis-remembering my mnt writers count per-cpu patches here,
but that's got a much more complex scheme to avoid atomic ops in the
fastpath... potentially we could possibly use the same scheme to speed
up mntget. Or if speed is not a problem then perhaps simplify mnt_want_write
with the use of the brlock vfsmount lock...

Anyway, I'll fix this one up the simple way for now, and I need to look
closely at single-threaded performance of this patchset in some areas anyway
so I'll revisit then.

> ---
>  fs/libfs.c            |    1 
>  fs/namespace.c        |  122 +++++++++++++++++++++++++++++++++++++++++++-------
>  fs/pnode.c            |    2 
>  include/linux/mount.h |   33 ++++---------
>  4 files changed, 121 insertions(+), 37 deletions(-)
> 
> Index: linux-2.6/fs/namespace.c
> ===================================================================
> --- linux-2.6.orig/fs/namespace.c
> +++ linux-2.6/fs/namespace.c
> @@ -177,6 +177,49 @@ void mnt_release_group_id(struct vfsmoun
>  	mnt->mnt_group_id = 0;
>  }
>  
> +static inline void add_mnt_count(struct vfsmount *mnt, int n)
> +{
> +#ifdef CONFIG_SMP
> +	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id())) += n;
> +#else
> +	mnt->mnt_count += n;
> +#endif
> +}
> +
> +static inline void inc_mnt_count(struct vfsmount *mnt)
> +{
> +#ifdef CONFIG_SMP
> +	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))++;
> +#else
> +	mnt->mnt_count++;
> +#endif
> +}
> +
> +static inline void dec_mnt_count(struct vfsmount *mnt)
> +{
> +#ifdef CONFIG_SMP
> +	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))--;
> +#else
> +	mnt->mnt_count--;
> +#endif
> +}
> +
> +unsigned int count_mnt_count(struct vfsmount *mnt)
> +{
> +#ifdef CONFIG_SMP
> +	unsigned int count = 0;
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu) {
> +		count += *per_cpu_ptr(mnt->mnt_count, cpu);
> +	}
> +
> +	return count;
> +#else
> +	return mnt->mnt_count;
> +#endif
> +}
> +
>  struct vfsmount *alloc_vfsmnt(const char *name)
>  {
>  	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
> @@ -193,7 +236,13 @@ struct vfsmount *alloc_vfsmnt(const char
>  				goto out_free_id;
>  		}
>  
> -		atomic_set(&mnt->mnt_count, 1);
> +#ifdef CONFIG_SMP
> +		mnt->mnt_count = alloc_percpu(int);
> +		if (!mnt->mnt_count)
> +			goto out_free_devname;
> +#else
> +		mnt->mnt_count = 0;
> +#endif
>  		INIT_LIST_HEAD(&mnt->mnt_hash);
>  		INIT_LIST_HEAD(&mnt->mnt_child);
>  		INIT_LIST_HEAD(&mnt->mnt_mounts);
> @@ -205,14 +254,19 @@ struct vfsmount *alloc_vfsmnt(const char
>  #ifdef CONFIG_SMP
>  		mnt->mnt_writers = alloc_percpu(int);
>  		if (!mnt->mnt_writers)
> -			goto out_free_devname;
> +			goto out_free_mntcount;
>  #else
>  		mnt->mnt_writers = 0;
>  #endif
> +		preempt_disable();
> +		inc_mnt_count(mnt);
> +		preempt_enable();
>  	}
>  	return mnt;
>  
>  #ifdef CONFIG_SMP
> +out_free_mntcount:
> +	free_percpu(mnt->mnt_count);
>  out_free_devname:
>  	kfree(mnt->mnt_devname);
>  #endif
> @@ -526,9 +580,11 @@ static void detach_mnt(struct vfsmount *
>  	old_path->mnt = mnt->mnt_parent;
>  	mnt->mnt_parent = mnt;
>  	mnt->mnt_mountpoint = mnt->mnt_root;
> -	list_del_init(&mnt->mnt_child);
>  	list_del_init(&mnt->mnt_hash);
> +	list_del_init(&mnt->mnt_child);
>  	old_path->dentry->d_mounted--;
> +	WARN_ON(mnt->mnt_mounted != 1);
> +	mnt->mnt_mounted--;
>  }
>  
>  void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
> @@ -545,6 +601,8 @@ static void attach_mnt(struct vfsmount *
>  	list_add_tail(&mnt->mnt_hash, mount_hashtable +
>  			hash(path->mnt, path->dentry));
>  	list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
> +	WARN_ON(mnt->mnt_mounted != 0);
> +	mnt->mnt_mounted++;
>  }
>  
>  /*
> @@ -567,6 +625,8 @@ static void commit_tree(struct vfsmount
>  	list_add_tail(&mnt->mnt_hash, mount_hashtable +
>  				hash(parent, mnt->mnt_mountpoint));
>  	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
> +	WARN_ON(mnt->mnt_mounted != 0);
> +	mnt->mnt_mounted++;
>  	touch_mnt_namespace(n);
>  }
>  
> @@ -670,50 +730,80 @@ static inline void __mntput(struct vfsmo
>  
>  void mntput_no_expire(struct vfsmount *mnt)
>  {
> -repeat:
> -	/* open-code atomic_dec_and_lock for the vfsmount lock */
> -	if (atomic_add_unless(&mnt->mnt_count, -1, 1))
> +	if (likely(mnt->mnt_mounted)) {
> +		vfsmount_read_lock();
> +		if (unlikely(!mnt->mnt_mounted)) {
> +			vfsmount_read_unlock();
> +			goto repeat;
> +		}
> +		dec_mnt_count(mnt);
> +		BUG_ON(count_mnt_count(mnt) == 0);
> +		vfsmount_read_unlock();
> +
>  		return;
> +	}
> +
> +repeat:
>  	vfsmount_write_lock();
> -	if (!atomic_dec_and_test(&mnt->mnt_count)) {
> +	BUG_ON(mnt->mnt_mounted);
> +	dec_mnt_count(mnt);
> +	if (count_mnt_count(mnt)) {
>  		vfsmount_write_unlock();
>  		return;
>  	}
> -
>  	if (likely(!mnt->mnt_pinned)) {
>  		vfsmount_write_unlock();
>  		__mntput(mnt);
>  		return;
>  	}
> -	atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
> +	add_mnt_count(mnt, mnt->mnt_pinned + 1);
>  	mnt->mnt_pinned = 0;
>  	vfsmount_write_unlock();
>  	acct_auto_close_mnt(mnt);
>  	security_sb_umount_close(mnt);
>  	goto repeat;
>  }
> -
>  EXPORT_SYMBOL(mntput_no_expire);
>  
> +void mntput(struct vfsmount *mnt)
> +{
> +	if (mnt) {
> +		/* avoid cacheline pingpong */
> +		if (unlikely(mnt->mnt_expiry_mark))
> +			mnt->mnt_expiry_mark = 0;
> +		mntput_no_expire(mnt);
> +	}
> +}
> +EXPORT_SYMBOL(mntput);
> +
> +struct vfsmount *mntget(struct vfsmount *mnt)
> +{
> +	if (mnt) {
> +		preempt_disable();
> +		inc_mnt_count(mnt);
> +		preempt_enable();
> +	}
> +	return mnt;
> +}
> +EXPORT_SYMBOL(mntget);
> +
>  void mnt_pin(struct vfsmount *mnt)
>  {
>  	vfsmount_write_lock();
>  	mnt->mnt_pinned++;
>  	vfsmount_write_unlock();
>  }
> -
>  EXPORT_SYMBOL(mnt_pin);
>  
>  void mnt_unpin(struct vfsmount *mnt)
>  {
>  	vfsmount_write_lock();
>  	if (mnt->mnt_pinned) {
> -		atomic_inc(&mnt->mnt_count);
> +		inc_mnt_count(mnt);
>  		mnt->mnt_pinned--;
>  	}
>  	vfsmount_write_unlock();
>  }
> -
>  EXPORT_SYMBOL(mnt_unpin);
>  
>  static inline void mangle(struct seq_file *m, const char *s)
> @@ -996,7 +1086,7 @@ int may_umount_tree(struct vfsmount *mnt
>  
>  	vfsmount_read_lock();
>  	for (p = mnt; p; p = next_mnt(p, mnt)) {
> -		actual_refs += atomic_read(&p->mnt_count);
> +		actual_refs += count_mnt_count(p);
>  		minimum_refs += 2;
>  	}
>  	vfsmount_read_unlock();
> @@ -1076,6 +1166,8 @@ void umount_tree(struct vfsmount *mnt, i
>  		__touch_mnt_namespace(p->mnt_ns);
>  		p->mnt_ns = NULL;
>  		list_del_init(&p->mnt_child);
> +		WARN_ON(p->mnt_mounted != 1);
> +		p->mnt_mounted--;
>  		if (p->mnt_parent != p) {
>  			p->mnt_parent->mnt_ghosts++;
>  			p->mnt_mountpoint->d_mounted--;
> @@ -1107,7 +1199,7 @@ static int do_umount(struct vfsmount *mn
>  		    flags & (MNT_FORCE | MNT_DETACH))
>  			return -EINVAL;
>  
> -		if (atomic_read(&mnt->mnt_count) != 2)
> +		if (count_mnt_count(mnt) != 2)
>  			return -EBUSY;
>  
>  		if (!xchg(&mnt->mnt_expiry_mark, 1))
> Index: linux-2.6/include/linux/mount.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mount.h
> +++ linux-2.6/include/linux/mount.h
> @@ -56,20 +56,20 @@ struct vfsmount {
>  	struct mnt_namespace *mnt_ns;	/* containing namespace */
>  	int mnt_id;			/* mount identifier */
>  	int mnt_group_id;		/* peer group identifier */
> -	/*
> -	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
> -	 * to let these frequently modified fields in a separate cache line
> -	 * (so that reads of mnt_flags wont ping-pong on SMP machines)
> -	 */
> -	atomic_t mnt_count;
>  	int mnt_expiry_mark;		/* true if marked for expiry */
>  	int mnt_pinned;
>  	int mnt_ghosts;
> +	int mnt_mounted;
>  #ifdef CONFIG_SMP
>  	int *mnt_writers;
>  #else
>  	int mnt_writers;
>  #endif
> +#ifdef CONFIG_SMP
> +	int *mnt_count;
> +#else
> +	int mnt_count;
> +#endif
>  };
>  
>  static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
> @@ -81,13 +81,6 @@ static inline int *get_mnt_writers_ptr(s
>  #endif
>  }
>  
> -static inline struct vfsmount *mntget(struct vfsmount *mnt)
> -{
> -	if (mnt)
> -		atomic_inc(&mnt->mnt_count);
> -	return mnt;
> -}
> -
>  struct file; /* forward dec */
>  
>  extern void vfsmount_read_lock(void);
> @@ -95,23 +88,21 @@ extern void vfsmount_read_unlock(void);
>  extern void vfsmount_write_lock(void);
>  extern void vfsmount_write_unlock(void);
>  
> +extern unsigned int count_mnt_count(struct vfsmount *mnt);
> +
>  extern int mnt_want_write(struct vfsmount *mnt);
>  extern int mnt_want_write_file(struct file *file);
>  extern int mnt_clone_write(struct vfsmount *mnt);
>  extern void mnt_drop_write(struct vfsmount *mnt);
> +
>  extern void mntput_no_expire(struct vfsmount *mnt);
> +extern struct vfsmount *mntget(struct vfsmount *mnt);
> +extern void mntput(struct vfsmount *mnt);
> +
>  extern void mnt_pin(struct vfsmount *mnt);
>  extern void mnt_unpin(struct vfsmount *mnt);
>  extern int __mnt_is_readonly(struct vfsmount *mnt);
>  
> -static inline void mntput(struct vfsmount *mnt)
> -{
> -	if (mnt) {
> -		mnt->mnt_expiry_mark = 0;
> -		mntput_no_expire(mnt);
> -	}
> -}
> -
>  extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
>  				      const char *name, void *data);
>  
> Index: linux-2.6/fs/pnode.c
> ===================================================================
> --- linux-2.6.orig/fs/pnode.c
> +++ linux-2.6/fs/pnode.c
> @@ -279,7 +279,7 @@ out:
>   */
>  static inline int do_refcount_check(struct vfsmount *mnt, int count)
>  {
> -	int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
> +	int mycount = count_mnt_count(mnt) - mnt->mnt_ghosts;
>  	return (mycount > count);
>  }
>  
> Index: linux-2.6/fs/libfs.c
> ===================================================================
> --- linux-2.6.orig/fs/libfs.c
> +++ linux-2.6/fs/libfs.c
> @@ -244,6 +244,7 @@ int get_sb_pseudo(struct file_system_typ
>  	d_instantiate(dentry, root);
>  	s->s_root = dentry;
>  	s->s_flags |= MS_ACTIVE;
> +	mnt->mnt_mounted++; /* never unmounted, shortcut mntget (XXX: OK?) */
>  	simple_set_mnt(mnt, s);
>  	return 0;
>  
> 

  reply	other threads:[~2009-09-07  9:41 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-04  6:51 [patch 00/33] my current vfs scalability patch queue npiggin
2009-09-04  6:51 ` [patch 01/33] fs: no games with DCACHE_UNHASHED npiggin
2009-09-04  6:51 ` [patch 02/33] fs: cleanup files_lock npiggin
2009-09-04  6:51 ` [patch 03/33] fs: scale files_lock npiggin
2009-09-28 13:22   ` Peter Zijlstra
2009-09-28 13:24   ` Peter Zijlstra
2009-10-01  2:16     ` Nick Piggin
     [not found]       ` <r2i3282373b1004011751j440635b3n484018db2e2bc50c@mail.gmail.com>
2010-04-02  2:24         ` [patch 1/2] fs: cleanup files_lock tim
2009-09-04  6:51 ` [patch 04/33] fs: brlock vfsmount_lock npiggin
2009-09-04 15:19   ` Jens Axboe
2009-09-07  7:39     ` Nick Piggin
2009-09-22 15:17   ` Al Viro
2009-09-27 19:56     ` Nick Piggin
2009-09-28 13:21       ` Peter Zijlstra
2009-10-01  2:10         ` Nick Piggin
2009-09-04  6:51 ` [patch 05/33] fs: scale mntget/mntput npiggin
2009-09-07  9:41   ` Nick Piggin [this message]
2009-09-04  6:51 ` [patch 06/33] fs: dcache scale hash npiggin
2009-09-04  6:51 ` [patch 07/33] fs: dcache scale lru npiggin
2009-09-04  6:51 ` [patch 08/33] fs: dcache scale nr_dentry npiggin
2009-09-04 14:41   ` Daniel Walker
2009-09-07  7:36     ` Nick Piggin
2009-09-04  6:51 ` [patch 09/33] fs: dcache scale dentry refcount npiggin
2009-09-06 18:01   ` Eric Paris
2009-09-07  7:44     ` Nick Piggin
2009-09-07 11:21       ` Eric Paris
2009-09-07 11:35         ` Nick Piggin
2009-09-04  6:51 ` [patch 10/33] fs: dcache scale d_unhashed npiggin
2009-09-04  6:51 ` [patch 11/33] fs: dcache scale subdirs npiggin
2010-06-17 15:13   ` Peter Zijlstra
2010-06-17 16:53     ` Nick Piggin
2010-06-21 13:35       ` Peter Zijlstra
2010-06-21 14:48         ` Nick Piggin
2010-06-21 14:55           ` Peter Zijlstra
2010-06-22  6:02             ` john stultz
2010-06-22  6:06               ` Nick Piggin
2010-06-22  7:27               ` Peter Zijlstra
2010-06-23  2:03                 ` john stultz
2010-06-23  7:23                   ` Peter Zijlstra
2009-09-04  6:51 ` [patch 12/33] fs: scale inode alias list npiggin
2009-09-04  6:51 ` [patch 13/33] fs: use RCU / seqlock logic for reverse and multi-step operaitons npiggin
2009-09-04  6:51 ` [patch 14/33] fs: dcache remove dcache_lock npiggin
2009-09-04  6:51 ` [patch 15/33] fs: dcache reduce dput locking npiggin
2009-09-04  6:51 ` [patch 16/33] fs: dcache per-bucket dcache hash locking npiggin
2009-09-04 14:51   ` Daniel Walker
2009-09-07  7:38     ` Nick Piggin
2009-09-04  6:51 ` [patch 17/33] fs: dcache reduce dcache_inode_lock npiggin
2009-09-04  6:51 ` [patch 18/33] fs: dcache per-inode inode alias locking npiggin
2009-09-04  6:52 ` [patch 19/33] fs: icache lock s_inodes list npiggin
2009-09-04  6:52 ` [patch 20/33] fs: icache lock inode hash npiggin
2009-09-04  6:52 ` [patch 21/33] fs: icache lock i_state npiggin
2009-09-04  6:52 ` [patch 22/33] fs: icache lock i_count npiggin
2009-09-04  6:52 ` [patch 23/33] fs: icache atomic inodes_stat npiggin
2009-09-04  6:52 ` [patch 24/33] fs: icache lock lru/writeback lists npiggin
2009-09-04  6:52 ` [patch 25/33] fs: icache protect inode state npiggin
2009-09-04  6:52 ` [patch 26/33] fs: inode atomic last_ino, iunique lock npiggin
2009-09-04  6:52 ` [patch 27/33] fs: icache remove inode_lock npiggin
2009-09-04  6:52 ` [patch 28/33] fs: inode factor hash lock into functions npiggin
2009-09-04  6:52 ` [patch 29/33] Remove the global inode_hash_lock and replace it with per-hash-bucket locks. fs: inode per-bucket inode hash locks npiggin
2009-09-04  7:05   ` Nick Piggin
2009-09-04  6:52 ` [patch 30/33] fs: inode lazy lru npiggin
2009-09-04  6:52 ` [patch 31/33] fs: RCU free inodes npiggin
2009-09-04  6:52 ` [patch 32/33] fs: rcu walk for i_sb_list npiggin
2009-09-04  6:52 ` [patch 33/33] fs: improve scalability of pseudo filesystems npiggin
2009-09-04  7:05 ` [patch 00/33] my current vfs scalability patch queue Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090907094110.GI1875@wotan.suse.de \
    --to=npiggin@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).