All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Hansen <dave@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, viro@zeniv.linux.org.uk,
	hch@infradead.org, trond.myklebust@fys.uio.no,
	Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [RFC][PATCH 1/5] r/o bind mounts: change spinlock to mutex
Date: Tue, 29 Apr 2008 11:59:44 -0700	[thread overview]
Message-ID: <20080429185944.15C04C04@kernel> (raw)
In-Reply-To: <20080429185943.3BA6A050@kernel>


In order to lock out new writers during remount operations,
we need to hold the cpu_writer->lock's.  But, remounts can
sleep, so we can not use spinlocks.  Make them mutexes.

We do the fiddling in mnt_drop_write() because we used to
rely on the spin_lock() disabling preemption.  Since the
mutex does not do that, we have to use another method
to avoid underflow.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---

 linux-2.6.git-dave/fs/namespace.c |   52 +++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff -puN fs/namespace.c~make-cpu_writer-lock-a-mutex fs/namespace.c
--- linux-2.6.git/fs/namespace.c~make-cpu_writer-lock-a-mutex	2008-04-29 11:56:39.000000000 -0700
+++ linux-2.6.git-dave/fs/namespace.c	2008-04-29 11:56:39.000000000 -0700
@@ -173,7 +173,7 @@ struct mnt_writer {
 	 * If holding multiple instances of this lock, they
 	 * must be ordered by cpu number.
 	 */
-	spinlock_t lock;
+	struct mutex lock;
 	struct lock_class_key lock_class; /* compiles out with !lockdep */
 	unsigned long count;
 	struct vfsmount *mnt;
@@ -185,7 +185,7 @@ static int __init init_mnt_writers(void)
 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
+		mutex_init(&writer->lock);
 		lockdep_set_class(&writer->lock, &writer->lock_class);
 		writer->count = 0;
 	}
@@ -200,7 +200,7 @@ static void unlock_mnt_writers(void)
 
 	for_each_possible_cpu(cpu) {
 		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		mutex_unlock(&cpu_writer->lock);
 	}
 }
 
@@ -252,8 +252,8 @@ int mnt_want_write(struct vfsmount *mnt)
 	int ret = 0;
 	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	cpu_writer = &__get_cpu_var(mnt_writers);
+	mutex_lock(&cpu_writer->lock);
 	if (__mnt_is_readonly(mnt)) {
 		ret = -EROFS;
 		goto out;
@@ -261,8 +261,7 @@ int mnt_want_write(struct vfsmount *mnt)
 	use_cpu_writer_for_mount(cpu_writer, mnt);
 	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	mutex_unlock(&cpu_writer->lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -274,7 +273,7 @@ static void lock_mnt_writers(void)
 
 	for_each_possible_cpu(cpu) {
 		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
+		mutex_lock(&cpu_writer->lock);
 		__clear_mnt_count(cpu_writer);
 		cpu_writer->mnt = NULL;
 	}
@@ -333,18 +332,34 @@ void mnt_drop_write(struct vfsmount *mnt
 	int must_check_underflow = 0;
 	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+retry:
+	cpu_writer = &__get_cpu_var(mnt_writers);
+	mutex_lock(&cpu_writer->lock);
 
 	use_cpu_writer_for_mount(cpu_writer, mnt);
 	if (cpu_writer->count > 0) {
 		cpu_writer->count--;
 	} else {
-		must_check_underflow = 1;
+		/*
+		 * Without this check, it is theoretically
+		 * possible for large number of processes
+		 * to atomic_dec(__mnt_writers) and cause
+		 * it to underflow.  Because we are under
+		 * the mutex (and there are NR_CPUS
+		 * mutexes), this limits the number of
+		 * concurrent decrements and underflow
+		 * level to NR_CPUS.
+		 */
+		if (atomic_read(&mnt->__mnt_writers) <
+			MNT_WRITER_UNDERFLOW_LIMIT) {
+			mutex_unlock(&cpu_writer->lock);
+			goto retry;
+		}
 		atomic_dec(&mnt->__mnt_writers);
+		must_check_underflow = 1;
 	}
 
-	spin_unlock(&cpu_writer->lock);
+	mutex_unlock(&cpu_writer->lock);
 	/*
 	 * Logically, we could call this each time,
 	 * but the __mnt_writers cacheline tends to
@@ -352,15 +367,6 @@ void mnt_drop_write(struct vfsmount *mnt
 	 */
 	if (must_check_underflow)
 		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -615,7 +621,7 @@ static inline void __mntput(struct vfsmo
 		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
 		if (cpu_writer->mnt != mnt)
 			continue;
-		spin_lock(&cpu_writer->lock);
+		mutex_lock(&cpu_writer->lock);
 		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
 		cpu_writer->count = 0;
 		/*
@@ -624,7 +630,7 @@ static inline void __mntput(struct vfsmo
 		 * it to be valid.
 		 */
 		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
+		mutex_unlock(&cpu_writer->lock);
 	}
 	/*
 	 * This probably indicates that somebody messed
_

  reply	other threads:[~2008-04-29 19:00 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-29 18:59 [RFC][PATCH 0/5] do remounts without consulting sb->s_files list Dave Hansen
2008-04-29 18:59 ` Dave Hansen [this message]
2008-04-29 18:59 ` [RFC][PATCH 2/5] introduce simple_set_mnt_no_get() helper for NFS Dave Hansen
2008-04-29 21:34   ` Trond Myklebust
2008-04-30  1:46     ` Dave Hansen
2008-05-01 16:58       ` Matthew Wilcox
2008-04-29 18:59 ` [RFC][PATCH 3/5] reintroduce list of vfsmounts over superblock Dave Hansen
2008-04-29 18:59 ` [RFC][PATCH 4/5] must hold lock_super() to set initial mount writer Dave Hansen
2008-04-30  9:25   ` Miklos Szeredi
2008-05-01 16:26     ` Dave Hansen
2008-04-29 18:59 ` [RFC][PATCH 5/5] check mount writers at superblock remount Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080429185944.15C04C04@kernel \
    --to=dave@linux.vnet.ibm.com \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=trond.myklebust@fys.uio.no \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.