From: Dave Hansen <dave@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, viro@zeniv.linux.org.uk,
hch@infradead.org, trond.myklebust@fys.uio.no,
Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [RFC][PATCH 1/5] r/o bind mounts: change spinlock to mutex
Date: Tue, 29 Apr 2008 11:59:44 -0700 [thread overview]
Message-ID: <20080429185944.15C04C04@kernel> (raw)
In-Reply-To: <20080429185943.3BA6A050@kernel>
In order to lock out new writers during remount operations,
we need to hold the cpu_writer->lock's. But, remounts can
sleep, so we can not use spinlocks. Make them mutexes.
We do the fiddling in mnt_drop_write() because we used to
rely on the spin_lock() disabling preemption. Since the
mutex does not do that, we have to use another method
to avoid underflow.
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
---
linux-2.6.git-dave/fs/namespace.c | 52 +++++++++++++++++++++-----------------
1 file changed, 29 insertions(+), 23 deletions(-)
diff -puN fs/namespace.c~make-cpu_writer-lock-a-mutex fs/namespace.c
--- linux-2.6.git/fs/namespace.c~make-cpu_writer-lock-a-mutex 2008-04-29 11:56:39.000000000 -0700
+++ linux-2.6.git-dave/fs/namespace.c 2008-04-29 11:56:39.000000000 -0700
@@ -173,7 +173,7 @@ struct mnt_writer {
* If holding multiple instances of this lock, they
* must be ordered by cpu number.
*/
- spinlock_t lock;
+ struct mutex lock;
struct lock_class_key lock_class; /* compiles out with !lockdep */
unsigned long count;
struct vfsmount *mnt;
@@ -185,7 +185,7 @@ static int __init init_mnt_writers(void)
int cpu;
for_each_possible_cpu(cpu) {
struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
- spin_lock_init(&writer->lock);
+ mutex_init(&writer->lock);
lockdep_set_class(&writer->lock, &writer->lock_class);
writer->count = 0;
}
@@ -200,7 +200,7 @@ static void unlock_mnt_writers(void)
for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_unlock(&cpu_writer->lock);
+ mutex_unlock(&cpu_writer->lock);
}
}
@@ -252,8 +252,8 @@ int mnt_want_write(struct vfsmount *mnt)
int ret = 0;
struct mnt_writer *cpu_writer;
- cpu_writer = &get_cpu_var(mnt_writers);
- spin_lock(&cpu_writer->lock);
+ cpu_writer = &__get_cpu_var(mnt_writers);
+ mutex_lock(&cpu_writer->lock);
if (__mnt_is_readonly(mnt)) {
ret = -EROFS;
goto out;
@@ -261,8 +261,7 @@ int mnt_want_write(struct vfsmount *mnt)
use_cpu_writer_for_mount(cpu_writer, mnt);
cpu_writer->count++;
out:
- spin_unlock(&cpu_writer->lock);
- put_cpu_var(mnt_writers);
+ mutex_unlock(&cpu_writer->lock);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -274,7 +273,7 @@ static void lock_mnt_writers(void)
for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_lock(&cpu_writer->lock);
+ mutex_lock(&cpu_writer->lock);
__clear_mnt_count(cpu_writer);
cpu_writer->mnt = NULL;
}
@@ -333,18 +332,34 @@ void mnt_drop_write(struct vfsmount *mnt
int must_check_underflow = 0;
struct mnt_writer *cpu_writer;
- cpu_writer = &get_cpu_var(mnt_writers);
- spin_lock(&cpu_writer->lock);
+retry:
+ cpu_writer = &__get_cpu_var(mnt_writers);
+ mutex_lock(&cpu_writer->lock);
use_cpu_writer_for_mount(cpu_writer, mnt);
if (cpu_writer->count > 0) {
cpu_writer->count--;
} else {
- must_check_underflow = 1;
+ /*
+ * Without this check, it is theoretically
+ * possible for large number of processes
+ * to atomic_dec(__mnt_writers) and cause
+ * it to underflow. Because we are under
+ * the mutex (and there are NR_CPUS
+ * mutexes), this limits the number of
+ * concurrent decrements and underflow
+ * level to NR_CPUS.
+ */
+ if (atomic_read(&mnt->__mnt_writers) <
+ MNT_WRITER_UNDERFLOW_LIMIT) {
+ mutex_unlock(&cpu_writer->lock);
+ goto retry;
+ }
atomic_dec(&mnt->__mnt_writers);
+ must_check_underflow = 1;
}
- spin_unlock(&cpu_writer->lock);
+ mutex_unlock(&cpu_writer->lock);
/*
* Logically, we could call this each time,
* but the __mnt_writers cacheline tends to
@@ -352,15 +367,6 @@ void mnt_drop_write(struct vfsmount *mnt
*/
if (must_check_underflow)
handle_write_count_underflow(mnt);
- /*
- * This could be done right after the spinlock
- * is taken because the spinlock keeps us on
- * the cpu, and disables preemption. However,
- * putting it here bounds the amount that
- * __mnt_writers can underflow. Without it,
- * we could theoretically wrap __mnt_writers.
- */
- put_cpu_var(mnt_writers);
}
EXPORT_SYMBOL_GPL(mnt_drop_write);
@@ -615,7 +621,7 @@ static inline void __mntput(struct vfsmo
struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
if (cpu_writer->mnt != mnt)
continue;
- spin_lock(&cpu_writer->lock);
+ mutex_lock(&cpu_writer->lock);
atomic_add(cpu_writer->count, &mnt->__mnt_writers);
cpu_writer->count = 0;
/*
@@ -624,7 +630,7 @@ static inline void __mntput(struct vfsmo
* it to be valid.
*/
cpu_writer->mnt = NULL;
- spin_unlock(&cpu_writer->lock);
+ mutex_unlock(&cpu_writer->lock);
}
/*
* This probably indicates that somebody messed
_
next prev parent reply other threads:[~2008-04-29 18:59 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-29 18:59 [RFC][PATCH 0/5] do remounts without consulting sb->s_files list Dave Hansen
2008-04-29 18:59 ` Dave Hansen [this message]
2008-04-29 18:59 ` [RFC][PATCH 2/5] introduce simple_set_mnt_no_get() helper for NFS Dave Hansen
2008-04-29 21:34 ` Trond Myklebust
2008-04-30 1:46 ` Dave Hansen
2008-05-01 16:58 ` Matthew Wilcox
2008-04-29 18:59 ` [RFC][PATCH 3/5] reintroduce list of vfsmounts over superblock Dave Hansen
2008-04-29 18:59 ` [RFC][PATCH 4/5] must hold lock_super() to set initial mount writer Dave Hansen
2008-04-30 9:25 ` Miklos Szeredi
2008-05-01 16:26 ` Dave Hansen
2008-04-29 18:59 ` [RFC][PATCH 5/5] check mount writers at superblock remount Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080429185944.15C04C04@kernel \
--to=dave@linux.vnet.ibm.com \
--cc=hch@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=trond.myklebust@fys.uio.no \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).