[RFC][PATCH 1/5] r/o bind mounts: change spinlock to mutex

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Dave Hansen <dave@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, viro@zeniv.linux.org.uk,
	hch@infradead.org, trond.myklebust@fys.uio.no,
	Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [RFC][PATCH 1/5] r/o bind mounts: change spinlock to mutex
Date: Tue, 29 Apr 2008 11:59:44 -0700	[thread overview]
Message-ID: <20080429185944.15C04C04@kernel> (raw)
In-Reply-To: <20080429185943.3BA6A050@kernel>


In order to lock out new writers during remount operations,
we need to hold the cpu_writer->lock's.  But, remounts can
sleep, so we can not use spinlocks.  Make them mutexes.

We do the fiddling in mnt_drop_write() because we used to
rely on the spin_lock() disabling preemption.  Since the
mutex does not do that, we have to use another method
to avoid underflow.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/fs/namespace.c |   52 +++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff -puN fs/namespace.c~make-cpu_writer-lock-a-mutex fs/namespace.c
--- linux-2.6.git/fs/namespace.c~make-cpu_writer-lock-a-mutex	2008-04-29 11:56:39.000000000 -0700
+++ linux-2.6.git-dave/fs/namespace.c	2008-04-29 11:56:39.000000000 -0700
@@ -173,7 +173,7 @@ struct mnt_writer {
 	 * If holding multiple instances of this lock, they
 	 * must be ordered by cpu number.
 	 */
-	spinlock_t lock;
+	struct mutex lock;
 	struct lock_class_key lock_class; /* compiles out with !lockdep */
 	unsigned long count;
 	struct vfsmount *mnt;
@@ -185,7 +185,7 @@ static int __init init_mnt_writers(void)
 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
+		mutex_init(&writer->lock);
 		lockdep_set_class(&writer->lock, &writer->lock_class);
 		writer->count = 0;
 	}
@@ -200,7 +200,7 @@ static void unlock_mnt_writers(void)
 
 	for_each_possible_cpu(cpu) {
 		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		mutex_unlock(&cpu_writer->lock);
 	}
 }
 
@@ -252,8 +252,8 @@ int mnt_want_write(struct vfsmount *mnt)
 	int ret = 0;
 	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	cpu_writer = &__get_cpu_var(mnt_writers);
+	mutex_lock(&cpu_writer->lock);
 	if (__mnt_is_readonly(mnt)) {
 		ret = -EROFS;
 		goto out;
@@ -261,8 +261,7 @@ int mnt_want_write(struct vfsmount *mnt)
 	use_cpu_writer_for_mount(cpu_writer, mnt);
 	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	mutex_unlock(&cpu_writer->lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -274,7 +273,7 @@ static void lock_mnt_writers(void)
 
 	for_each_possible_cpu(cpu) {
 		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
+		mutex_lock(&cpu_writer->lock);
 		__clear_mnt_count(cpu_writer);
 		cpu_writer->mnt = NULL;
 	}
@@ -333,18 +332,34 @@ void mnt_drop_write(struct vfsmount *mnt
 	int must_check_underflow = 0;
 	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+retry:
+	cpu_writer = &__get_cpu_var(mnt_writers);
+	mutex_lock(&cpu_writer->lock);
 
 	use_cpu_writer_for_mount(cpu_writer, mnt);
 	if (cpu_writer->count > 0) {
 		cpu_writer->count--;
 	} else {
-		must_check_underflow = 1;
+		/*
+		 * Without this check, it is theoretically
+		 * possible for large number of processes
+		 * to atomic_dec(__mnt_writers) and cause
+		 * it to underflow.  Because we are under
+		 * the mutex (and there are NR_CPUS
+		 * mutexes), this limits the number of
+		 * concurrent decrements and underflow
+		 * level to NR_CPUS.
+		 */
+		if (atomic_read(&mnt->__mnt_writers) <
+			MNT_WRITER_UNDERFLOW_LIMIT) {
+			mutex_unlock(&cpu_writer->lock);
+			goto retry;
+		}
 		atomic_dec(&mnt->__mnt_writers);
+		must_check_underflow = 1;
 	}
 
-	spin_unlock(&cpu_writer->lock);
+	mutex_unlock(&cpu_writer->lock);
 	/*
 	 * Logically, we could call this each time,
 	 * but the __mnt_writers cacheline tends to
@@ -352,15 +367,6 @@ void mnt_drop_write(struct vfsmount *mnt
 	 */
 	if (must_check_underflow)
 		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -615,7 +621,7 @@ static inline void __mntput(struct vfsmo
 		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
 		if (cpu_writer->mnt != mnt)
 			continue;
-		spin_lock(&cpu_writer->lock);
+		mutex_lock(&cpu_writer->lock);
 		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
 		cpu_writer->count = 0;
 		/*
@@ -624,7 +630,7 @@ static inline void __mntput(struct vfsmo
 		 * it to be valid.
 		 */
 		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
+		mutex_unlock(&cpu_writer->lock);
 	}
 	/*
 	 * This probably indicates that somebody messed
_

next prev parent reply	other threads:[~2008-04-29 18:59 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-29 18:59 [RFC][PATCH 0/5] do remounts without consulting sb->s_files list Dave Hansen
2008-04-29 18:59 ` Dave Hansen [this message]
2008-04-29 18:59 ` [RFC][PATCH 2/5] introduce simple_set_mnt_no_get() helper for NFS Dave Hansen
2008-04-29 21:34   ` Trond Myklebust
2008-04-30  1:46     ` Dave Hansen
2008-05-01 16:58       ` Matthew Wilcox
2008-04-29 18:59 ` [RFC][PATCH 3/5] reintroduce list of vfsmounts over superblock Dave Hansen
2008-04-29 18:59 ` [RFC][PATCH 4/5] must hold lock_super() to set initial mount writer Dave Hansen
2008-04-30  9:25   ` Miklos Szeredi
2008-05-01 16:26     ` Dave Hansen
2008-04-29 18:59 ` [RFC][PATCH 5/5] check mount writers at superblock remount Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080429185944.15C04C04@kernel \
    --to=dave@linux.vnet.ibm.com \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=trond.myklebust@fys.uio.no \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).