All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <npiggin@suse.de>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>, linux-fsdevel@vger.kernel.org
Subject: Re: [patch] fs: fix superblock iteration race
Date: Sat, 12 Jun 2010 13:57:52 +1000	[thread overview]
Message-ID: <20100612035751.GG16436@laptop> (raw)
In-Reply-To: <AANLkTilJ748JdsfCTOrMcdkwWjB3HhA1mmhfETc1kyzL@mail.gmail.com>

On Fri, Jun 11, 2010 at 09:06:01AM -0700, Linus Torvalds wrote:
> On Fri, Jun 11, 2010 at 7:50 AM, Nick Piggin <npiggin@suse.de> wrote:
> > Not sure if this is really the _cleanest_ way to fix it. But open coding
> > the list walking is a bit annoying too. And I couldn't see any real way to
> > make the list macro safe. Better ideas?
> 
> I really think we should open-code the list walking instead. You
> basically already are doing that, and in a very non-obvious way too
> (ie you are mixing the non-open-coded list walker with also explicitly
> playing with the internal variable for that magic walker.
> 
> So I would get rid of the 'list_for_each_entry_safe' entirely, and
> replace it with something like
> 
>    struct list_head *list;
> 
>    spin_lock(&sb_lock);
>    list = super_blocks->next;
>    while (list != &super_blocks) {
>       struct super_block *sb = list_entry(next, struct super_block, s_list);
>       list = list->next;
> 
>       if (list_empty(&sb->s_instances))
>          continue;
> 
>       if (!sb->s_nr_dentry_unused)
>          continue;
> 
>       sb->s_count++;
>       spin_unlock(&sb_lock);
> 
>       .... whatever ...
> 
>       spin_lock(&sb_lock);
>       /* We dropped the lock, need to re-load the next list entry */
>       list = sb->s_list.next;
>       __put_super(sb);
>    }
> 
> which isn't that much more complicated, now is it? Sure, it's
> open-coded, but at least it doesn't play games. And being open-coded,
> it's a lot more honest about the issue. Maybe even add a comment
> saying "we can't use the list_for_each[_safe]() macro, because we
> don't hold the lock and we're not the only ones that may delete
> things" explaining _why_ it's open-coded.
> 
> I dunno. Maybe Al disagrees. I just don't like using the "simple
> helpers" and then changing subtly how they work by knowing their
> internals.

Something like this
--

list_for_each_entry_safe is not suitable to protect against concurrent
modification of the list. 6754af6 introduced a race in sb walking.

list_for_each_entry can use the trick of pinning the current entry while we
drop and retake the lock because the iteration subsequently follows cur->next.
However list_for_each_entry_safe saves n=cur->next before entering the loop
body, so when the lock is dropped, n may be deleted.

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/dcache.c |   14 ++++++++++++--
 fs/super.c  |   51 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 57 insertions(+), 8 deletions(-)

Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
  */
 static void prune_dcache(int count)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 	int w_count;
 	int unused = dentry_stat.nr_unused;
 	int prune_ratio;
@@ -549,8 +549,16 @@ static void prune_dcache(int count)
 		prune_ratio = 1;
 	else
 		prune_ratio = unused / count;
+
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_nr_dentry_unused == 0)
@@ -590,6 +598,8 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
+		/* sb_lock dropped, must reload next */
+		list = sb->s_list.next;
 		count -= pruned;
 		__put_super(sb);
 		/* more work left to do? */
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -358,10 +358,17 @@ EXPORT_SYMBOL(drop_super);
  */
 void sync_supers(void)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,6 +381,8 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
+			/* sb_lock dropped, must reload next */
+			list = sb->s_list.next;
 			__put_super(sb);
 		}
 	}
@@ -390,10 +399,25 @@ void sync_supers(void)
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/*
+	 * Walk the list of super_blocks:
+	 * Cannot use list_for_each_entry because __put_super may delete
+	 * sb from the list.
+	 * Cannot use list_for_each_entry_safe because it loads both the
+	 * current and next list entries before the loop body. When dropping
+	 * the lock we have only pinned the current entry in the list, next
+	 * may be deleted.
+	 */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -405,6 +429,12 @@ void iterate_supers(void (*f)(struct sup
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
+		/*
+		 * sb_lock dropped, we must reload next entry. We can reload it
+		 * from sb because we have that element pinned in the list with
+		 * s_count.
+		 */
+		list = sb->s_list.next;
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
@@ -568,10 +598,17 @@ int do_remount_sb(struct super_block *sb
 
 static void do_emergency_remount(struct work_struct *work)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -585,6 +622,8 @@ static void do_emergency_remount(struct
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
+		/* sb_lock dropped, must reload next */
+		list = sb->s_list.next;
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);

  parent reply	other threads:[~2010-06-12  3:58 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-11 14:50 [patch] fs: fix superblock iteration race Nick Piggin
2010-06-11 16:06 ` Linus Torvalds
2010-06-12  3:37   ` Nick Piggin
2010-06-12  3:57   ` Nick Piggin [this message]
2010-06-12  4:15     ` Linus Torvalds
2010-06-12  4:38       ` Nick Piggin
2010-06-12  4:46         ` Linus Torvalds
2010-06-14 15:07           ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100612035751.GG16436@laptop \
    --to=npiggin@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.