From: Oleg Nesterov <oleg@redhat.com>
To: Al Viro <viro@zeniv.linux.org.uk>, Jan Kara <jack@suse.cz>,
Linus Torvalds <torvalds@linux-foundation.org>,
Paul McKenney <paulmck@linux.vnet.ibm.com>,
Peter Zijlstra <peterz@infradead.org>
Cc: Daniel Wagner <daniel.wagner@bmw-carit.de>,
Davidlohr Bueso <dave@stgolabs.net>,
Ingo Molnar <mingo@redhat.com>, Tejun Heo <tj@kernel.org>,
linux-kernel@vger.kernel.org
Subject: [PATCH 4/4] change sb_writers to use percpu_rw_semaphore
Date: Mon, 13 Jul 2015 23:25:59 +0200 [thread overview]
Message-ID: <20150713212559.GA13886@redhat.com> (raw)
In-Reply-To: <20150713212536.GA13855@redhat.com>
__sb_start/end_write() can use percpu_down/up_read().
---
fs/super.c | 135 +++++++++++++++++-----------------------------------
include/linux/fs.h | 14 +----
2 files changed, 47 insertions(+), 102 deletions(-)
diff --git a/fs/super.c b/fs/super.c
index 94303fc..6e336b8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -147,7 +147,7 @@ static void destroy_super(struct super_block *s)
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
for (i = 0; i < SB_FREEZE_LEVELS; i++)
- percpu_counter_destroy(&s->s_writers.counter[i]);
+ percpu_free_rwsem(s->s_writers.rw_sem + i);
security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts));
kfree(s->s_subtype);
@@ -178,14 +178,10 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
goto fail;
for (i = 0; i < SB_FREEZE_LEVELS; i++) {
- if (percpu_counter_init(&s->s_writers.counter[i], 0,
- GFP_KERNEL) < 0)
- goto fail;
- lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
- &type->s_writers_key[i], 0);
+ __percpu_init_rwsem(&s->s_writers.rw_sem[i],
+ sb_writers_name[i],
+ &type->s_writers_key[i]);
}
- init_waitqueue_head(&s->s_writers.wait);
- init_waitqueue_head(&s->s_writers.wait_unfrozen);
s->s_bdi = &noop_backing_dev_info;
s->s_flags = flags;
INIT_HLIST_NODE(&s->s_instances);
@@ -1146,43 +1142,10 @@ out:
*/
void __sb_end_write(struct super_block *sb, int level)
{
- percpu_counter_dec(&sb->s_writers.counter[level-1]);
- /*
- * Make sure s_writers are updated before we wake up waiters in
- * freeze_super().
- */
- smp_mb();
- if (waitqueue_active(&sb->s_writers.wait))
- wake_up(&sb->s_writers.wait);
- rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+ percpu_up_read(sb->s_writers.rw_sem + level-1);
}
EXPORT_SYMBOL(__sb_end_write);
-#ifdef CONFIG_LOCKDEP
-/*
- * We want lockdep to tell us about possible deadlocks with freezing but
- * it's it bit tricky to properly instrument it. Getting a freeze protection
- * works as getting a read lock but there are subtle problems. XFS for example
- * gets freeze protection on internal level twice in some cases, which is OK
- * only because we already hold a freeze protection also on higher level. Due
- * to these cases we have to tell lockdep we are doing trylock when we
- * already hold a freeze protection for a higher freeze level.
- */
-static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
- unsigned long ip)
-{
- int i;
-
- if (!trylock) {
- for (i = 0; i < level - 1; i++)
- if (lock_is_held(&sb->s_writers.lock_map[i])) {
- trylock = true;
- break;
- }
- }
- rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
-}
-#endif
/*
* This is an internal function, please use sb_start_{write,pagefault,intwrite}
@@ -1190,27 +1153,15 @@ static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
*/
int __sb_start_write(struct super_block *sb, int level, bool wait)
{
-retry:
- if (unlikely(sb->s_writers.frozen >= level)) {
- if (!wait)
- return 0;
- wait_event(sb->s_writers.wait_unfrozen,
- sb->s_writers.frozen < level);
- }
-
-#ifdef CONFIG_LOCKDEP
- acquire_freeze_lock(sb, level, !wait, _RET_IP_);
-#endif
- percpu_counter_inc(&sb->s_writers.counter[level-1]);
/*
- * Make sure counter is updated before we check for frozen.
- * freeze_super() first sets frozen and then checks the counter.
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * !!!!!!!!! percpu_down_read_trylock() wasn't merged yet !!!!!!!!!!!!!
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
*/
- smp_mb();
- if (unlikely(sb->s_writers.frozen >= level)) {
- __sb_end_write(sb, level);
- goto retry;
- }
+ if (!wait)
+ return 0;
+
+ percpu_down_read(sb->s_writers.rw_sem + level-1);
return 1;
}
EXPORT_SYMBOL(__sb_start_write);
@@ -1227,42 +1178,46 @@ EXPORT_SYMBOL(__sb_start_write);
*/
static void sb_wait_write(struct super_block *sb, int level)
{
- s64 writers;
-
- rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
-
- do {
- DEFINE_WAIT(wait);
-
- /*
- * We use a barrier in prepare_to_wait() to separate setting
- * of frozen and checking of the counter
- */
- prepare_to_wait(&sb->s_writers.wait, &wait,
- TASK_UNINTERRUPTIBLE);
-
- writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
- if (writers)
- schedule();
-
- finish_wait(&sb->s_writers.wait, &wait);
- } while (writers);
+ percpu_down_write(sb->s_writers.rw_sem + level-1);
}
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * !!!!!!!!!!! Move this code into kernel/locking/percpu-rwsem.c !!!!!!!!!!!!!!
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+#include "../kernel/locking/rwsem.h"
/* Avoid the warning from lockdep_sys_exit() */
static void sb_lockdep_release(struct super_block *sb)
{
int level;
+ struct percpu_rw_semaphore *sem;
+
+ for (level = 0; level < SB_FREEZE_LEVELS; ++level) {
+ sem = sb->s_writers.rw_sem + level;
+ rwsem_clear_owner(&sem->rw_sem);
+ rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+ }
+}
+
+static void sb_lockdep_acquire(struct super_block *sb)
+{
+ int level;
+ struct percpu_rw_semaphore *sem;
for (level = 0; level < SB_FREEZE_LEVELS; ++level) {
- rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
+ sem = sb->s_writers.rw_sem + level;
+ rwsem_set_owner(&sem->rw_sem); /* unneeded */
+ rwsem_acquire(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
}
}
static void sb_unlock_frozen(struct super_block *sb)
{
- smp_wmb();
- wake_up(&sb->s_writers.wait_unfrozen);
+ int level;
+
+ for (level = 0; level < SB_FREEZE_LEVELS; ++level)
+ percpu_up_write(sb->s_writers.rw_sem + level);
}
/**
@@ -1323,8 +1278,6 @@ int freeze_super(struct super_block *sb)
/* From now on, no new normal writers can start */
sb->s_writers.frozen = SB_FREEZE_WRITE;
- smp_wmb();
-
/* Release s_umount to preserve sb_start_write -> s_umount ordering */
up_write(&sb->s_umount);
@@ -1332,9 +1285,8 @@ int freeze_super(struct super_block *sb)
/* Now we go and block page faults... */
down_write(&sb->s_umount);
- sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
- smp_wmb();
+ sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */
@@ -1342,7 +1294,6 @@ int freeze_super(struct super_block *sb)
/* Now wait for internal filesystem counter */
sb->s_writers.frozen = SB_FREEZE_FS;
- smp_wmb();
sb_wait_write(sb, SB_FREEZE_FS);
if (sb->s_op->freeze_fs) {
@@ -1350,8 +1301,8 @@ int freeze_super(struct super_block *sb)
if (ret) {
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
- sb->s_writers.frozen = SB_UNFROZEN;
sb_unlock_frozen(sb);
+ sb->s_writers.frozen = SB_UNFROZEN;
deactivate_locked_super(sb);
return ret;
}
@@ -1386,6 +1337,8 @@ int thaw_super(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
goto out;
+ sb_lockdep_acquire(sb);
+
if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb);
if (error) {
@@ -1396,9 +1349,9 @@ int thaw_super(struct super_block *sb)
}
}
+ sb_unlock_frozen(sb);
out:
sb->s_writers.frozen = SB_UNFROZEN;
- sb_unlock_frozen(sb);
deactivate_locked_super(sb);
return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e..314e2d0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
-
#include <linux/linkage.h>
#include <linux/wait.h>
#include <linux/kdev_t.h>
@@ -30,6 +29,7 @@
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/blk_types.h>
+#include <linux/percpu-rwsem.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1246,16 +1246,8 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- /* Counters for counting writers at each level */
- struct percpu_counter counter[SB_FREEZE_LEVELS];
- wait_queue_head_t wait; /* queue for waiting for
- writers / faults to finish */
- int frozen; /* Is sb frozen? */
- wait_queue_head_t wait_unfrozen; /* queue for waiting for
- sb to be thawed */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map lock_map[SB_FREEZE_LEVELS];
-#endif
+ int frozen; /* Is sb frozen? */
+ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
struct super_block {
--
1.5.5.1
next prev parent reply other threads:[~2015-07-13 21:27 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-13 21:25 [PATCH RFC 0/4] change sb_writers to use percpu_rw_semaphore Oleg Nesterov
2015-07-13 21:25 ` [PATCH 1/4] change get_super_thawed() to use sb_start/end_write() Oleg Nesterov
2015-07-14 10:49 ` Jan Kara
2015-07-14 13:38 ` Oleg Nesterov
2015-07-13 21:25 ` [PATCH 2/4] introduce sb_unlock_frozen() Oleg Nesterov
2015-07-13 21:25 ` [PATCH 3/4] introduce sb_lockdep_release() Oleg Nesterov
2015-07-13 21:25 ` Oleg Nesterov [this message]
2015-07-13 22:23 ` [PATCH RFC 0/4] change sb_writers to use percpu_rw_semaphore Dave Chinner
2015-07-13 22:42 ` Oleg Nesterov
2015-07-13 23:14 ` Dave Chinner
2015-07-14 10:48 ` Jan Kara
2015-07-14 13:37 ` Oleg Nesterov
2015-07-14 21:17 ` Dave Hansen
2015-07-14 21:22 ` Oleg Nesterov
2015-07-14 21:41 ` Dave Hansen
2015-07-15 6:47 ` Jan Kara
2015-07-15 18:19 ` Oleg Nesterov
2015-07-16 7:26 ` Jan Kara
2015-07-16 7:30 ` Dave Hansen
2015-07-16 8:55 ` Jan Kara
2015-07-16 17:32 ` Oleg Nesterov
2015-07-17 1:27 ` Dave Chinner
2015-07-17 17:31 ` Oleg Nesterov
2015-07-17 22:40 ` Dave Chinner
2015-07-20 8:26 ` Jan Kara
2015-07-22 21:09 ` Oleg Nesterov
2015-07-20 16:23 ` Oleg Nesterov
-- strict thread matches above, loose matches on Subject: below --
2015-07-22 21:15 [PATCH " Oleg Nesterov
2015-07-22 21:15 ` [PATCH 4/4] " Oleg Nesterov
2015-07-22 21:34 ` Oleg Nesterov
2015-07-28 8:34 ` Jan Kara
2015-08-03 17:30 ` Oleg Nesterov
2015-08-07 19:54 ` Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150713212559.GA13886@redhat.com \
--to=oleg@redhat.com \
--cc=daniel.wagner@bmw-carit.de \
--cc=dave@stgolabs.net \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.