Re: [patch] fs: fix superblock iteration race

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Nick Piggin <npiggin@suse.de>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>, linux-fsdevel@vger.kernel.org
Subject: Re: [patch] fs: fix superblock iteration race
Date: Sat, 12 Jun 2010 13:57:52 +1000	[thread overview]
Message-ID: <20100612035751.GG16436@laptop> (raw)
In-Reply-To: <AANLkTilJ748JdsfCTOrMcdkwWjB3HhA1mmhfETc1kyzL@mail.gmail.com>

On Fri, Jun 11, 2010 at 09:06:01AM -0700, Linus Torvalds wrote:
> On Fri, Jun 11, 2010 at 7:50 AM, Nick Piggin <npiggin@suse.de> wrote:
> > Not sure if this is really the _cleanest_ way to fix it. But open coding
> > the list walking is a bit annoying too. And I couldn't see any real way to
> > make the list macro safe. Better ideas?
> 
> I really think we should open-code the list walking instead. You
> basically already are doing that, and in a very non-obvious way too
> (ie you are mixing the non-open-coded list walker with also explicitly
> playing with the internal variable for that magic walker.
> 
> So I would get rid of the 'list_for_each_entry_safe' entirely, and
> replace it with something like
> 
>    struct list_head *list;
> 
>    spin_lock(&sb_lock);
>    list = super_blocks->next;
>    while (list != &super_blocks) {
>       struct super_block *sb = list_entry(next, struct super_block, s_list);
>       list = list->next;
> 
>       if (list_empty(&sb->s_instances))
>          continue;
> 
>       if (!sb->s_nr_dentry_unused)
>          continue;
> 
>       sb->s_count++;
>       spin_unlock(&sb_lock);
> 
>       .... whatever ...
> 
>       spin_lock(&sb_lock);
>       /* We dropped the lock, need to re-load the next list entry */
>       list = sb->s_list.next;
>       __put_super(sb);
>    }
> 
> which isn't that much more complicated, now is it? Sure, it's
> open-coded, but at least it doesn't play games. And being open-coded,
> it's a lot more honest about the issue. Maybe even add a comment
> saying "we can't use the list_for_each[_safe]() macro, because we
> don't hold the lock and we're not the only ones that may delete
> things" explaining _why_ it's open-coded.
> 
> I dunno. Maybe Al disagrees. I just don't like using the "simple
> helpers" and then changing subtly how they work by knowing their
> internals.

Something like this
--

list_for_each_entry_safe is not suitable to protect against concurrent
modification of the list. 6754af6 introduced a race in sb walking.

list_for_each_entry can use the trick of pinning the current entry while we
drop and retake the lock because the iteration subsequently follows cur->next.
However list_for_each_entry_safe saves n=cur->next before entering the loop
body, so when the lock is dropped, n may be deleted.

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/dcache.c |   14 ++++++++++++--
 fs/super.c  |   51 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 57 insertions(+), 8 deletions(-)

Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
  */
 static void prune_dcache(int count)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 	int w_count;
 	int unused = dentry_stat.nr_unused;
 	int prune_ratio;
@@ -549,8 +549,16 @@ static void prune_dcache(int count)
 		prune_ratio = 1;
 	else
 		prune_ratio = unused / count;
+
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_nr_dentry_unused == 0)
@@ -590,6 +598,8 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
+		/* sb_lock dropped, must reload next */
+		list = sb->s_list.next;
 		count -= pruned;
 		__put_super(sb);
 		/* more work left to do? */
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -358,10 +358,17 @@ EXPORT_SYMBOL(drop_super);
  */
 void sync_supers(void)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,6 +381,8 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
+			/* sb_lock dropped, must reload next */
+			list = sb->s_list.next;
 			__put_super(sb);
 		}
 	}
@@ -390,10 +399,25 @@ void sync_supers(void)
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/*
+	 * Walk the list of super_blocks:
+	 * Cannot use list_for_each_entry because __put_super may delete
+	 * sb from the list.
+	 * Cannot use list_for_each_entry_safe because it loads both the
+	 * current and next list entries before the loop body. When dropping
+	 * the lock we have only pinned the current entry in the list, next
+	 * may be deleted.
+	 */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -405,6 +429,12 @@ void iterate_supers(void (*f)(struct sup
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
+		/*
+		 * sb_lock dropped, we must reload next entry. We can reload it
+		 * from sb because we have that element pinned in the list with
+		 * s_count.
+		 */
+		list = sb->s_list.next;
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
@@ -568,10 +598,17 @@ int do_remount_sb(struct super_block *sb
 
 static void do_emergency_remount(struct work_struct *work)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -585,6 +622,8 @@ static void do_emergency_remount(struct
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
+		/* sb_lock dropped, must reload next */
+		list = sb->s_list.next;
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);

next prev parent reply	other threads:[~2010-06-12  3:58 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-11 14:50 [patch] fs: fix superblock iteration race Nick Piggin
2010-06-11 16:06 ` Linus Torvalds
2010-06-12  3:37   ` Nick Piggin
2010-06-12  3:57   ` Nick Piggin [this message]
2010-06-12  4:15     ` Linus Torvalds
2010-06-12  4:38       ` Nick Piggin
2010-06-12  4:46         ` Linus Torvalds
2010-06-14 15:07           ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100612035751.GG16436@laptop \
    --to=npiggin@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).