From: Nick Piggin <npiggin@suse.de>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>, linux-fsdevel@vger.kernel.org
Subject: Re: [patch] fs: fix superblock iteration race
Date: Sat, 12 Jun 2010 13:57:52 +1000 [thread overview]
Message-ID: <20100612035751.GG16436@laptop> (raw)
In-Reply-To: <AANLkTilJ748JdsfCTOrMcdkwWjB3HhA1mmhfETc1kyzL@mail.gmail.com>
On Fri, Jun 11, 2010 at 09:06:01AM -0700, Linus Torvalds wrote:
> On Fri, Jun 11, 2010 at 7:50 AM, Nick Piggin <npiggin@suse.de> wrote:
> > Not sure if this is really the _cleanest_ way to fix it. But open coding
> > the list walking is a bit annoying too. And I couldn't see any real way to
> > make the list macro safe. Better ideas?
>
> I really think we should open-code the list walking instead. You
> basically already are doing that, and in a very non-obvious way too
> (ie you are mixing the non-open-coded list walker with also explicitly
> playing with the internal variable for that magic walker.
>
> So I would get rid of the 'list_for_each_entry_safe' entirely, and
> replace it with something like
>
> struct list_head *list;
>
> spin_lock(&sb_lock);
> list = super_blocks->next;
> while (list != &super_blocks) {
> struct super_block *sb = list_entry(next, struct super_block, s_list);
> list = list->next;
>
> if (list_empty(&sb->s_instances))
> continue;
>
> if (!sb->s_nr_dentry_unused)
> continue;
>
> sb->s_count++;
> spin_unlock(&sb_lock);
>
> .... whatever ...
>
> spin_lock(&sb_lock);
> /* We dropped the lock, need to re-load the next list entry */
> list = sb->s_list.next;
> __put_super(sb);
> }
>
> which isn't that much more complicated, now is it? Sure, it's
> open-coded, but at least it doesn't play games. And being open-coded,
> it's a lot more honest about the issue. Maybe even add a comment
> saying "we can't use the list_for_each[_safe]() macro, because we
> don't hold the lock and we're not the only ones that may delete
> things" explaining _why_ it's open-coded.
>
> I dunno. Maybe Al disagrees. I just don't like using the "simple
> helpers" and then changing subtly how they work by knowing their
> internals.
Something like this
--
list_for_each_entry_safe is not suitable to protect against concurrent
modification of the list. 6754af6 introduced a race in sb walking.
list_for_each_entry can use the trick of pinning the current entry while we
drop and retake the lock because the iteration subsequently follows cur->next.
However list_for_each_entry_safe saves n=cur->next before entering the loop
body, so when the lock is dropped, n may be deleted.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
fs/dcache.c | 14 ++++++++++++--
fs/super.c | 51 +++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 57 insertions(+), 8 deletions(-)
Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
*/
static void prune_dcache(int count)
{
- struct super_block *sb, *n;
+ struct list_head *list;
int w_count;
int unused = dentry_stat.nr_unused;
int prune_ratio;
@@ -549,8 +549,16 @@ static void prune_dcache(int count)
prune_ratio = 1;
else
prune_ratio = unused / count;
+
+ /* see iterate_supers for super_blocks iteration comments */
spin_lock(&sb_lock);
- list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ list = super_blocks.next;
+ while (list != &super_blocks) {
+ struct super_block *sb;
+
+ sb = list_entry(list, struct super_block, s_list);
+ list = list->next;
+
if (list_empty(&sb->s_instances))
continue;
if (sb->s_nr_dentry_unused == 0)
@@ -590,6 +598,8 @@ static void prune_dcache(int count)
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
+ /* sb_lock dropped, must reload next */
+ list = sb->s_list.next;
count -= pruned;
__put_super(sb);
/* more work left to do? */
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -358,10 +358,17 @@ EXPORT_SYMBOL(drop_super);
*/
void sync_supers(void)
{
- struct super_block *sb, *n;
+ struct list_head *list;
+ /* see iterate_supers for super_blocks iteration comments */
spin_lock(&sb_lock);
- list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ list = super_blocks.next;
+ while (list != &super_blocks) {
+ struct super_block *sb;
+
+ sb = list_entry(list, struct super_block, s_list);
+ list = list->next;
+
if (list_empty(&sb->s_instances))
continue;
if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,6 +381,8 @@ void sync_supers(void)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* sb_lock dropped, must reload next */
+ list = sb->s_list.next;
__put_super(sb);
}
}
@@ -390,10 +399,25 @@ void sync_supers(void)
*/
void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
{
- struct super_block *sb, *n;
+ struct list_head *list;
+ /*
+ * Walk the list of super_blocks:
+ * Cannot use list_for_each_entry because __put_super may delete
+ * sb from the list.
+ * Cannot use list_for_each_entry_safe because it loads both the
+ * current and next list entries before the loop body. When dropping
+ * the lock we have only pinned the current entry in the list, next
+ * may be deleted.
+ */
spin_lock(&sb_lock);
- list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ list = super_blocks.next;
+ while (list != &super_blocks) {
+ struct super_block *sb;
+
+ sb = list_entry(list, struct super_block, s_list);
+ list = list->next;
+
if (list_empty(&sb->s_instances))
continue;
sb->s_count++;
@@ -405,6 +429,12 @@ void iterate_supers(void (*f)(struct sup
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /*
+ * sb_lock dropped, we must reload next entry. We can reload it
+ * from sb because we have that element pinned in the list with
+ * s_count.
+ */
+ list = sb->s_list.next;
__put_super(sb);
}
spin_unlock(&sb_lock);
@@ -568,10 +598,17 @@ int do_remount_sb(struct super_block *sb
static void do_emergency_remount(struct work_struct *work)
{
- struct super_block *sb, *n;
+ struct list_head *list;
+ /* see iterate_supers for super_blocks iteration comments */
spin_lock(&sb_lock);
- list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ list = super_blocks.next;
+ while (list != &super_blocks) {
+ struct super_block *sb;
+
+ sb = list_entry(list, struct super_block, s_list);
+ list = list->next;
+
if (list_empty(&sb->s_instances))
continue;
sb->s_count++;
@@ -585,6 +622,8 @@ static void do_emergency_remount(struct
}
up_write(&sb->s_umount);
spin_lock(&sb_lock);
+ /* sb_lock dropped, must reload next */
+ list = sb->s_list.next;
__put_super(sb);
}
spin_unlock(&sb_lock);
next prev parent reply other threads:[~2010-06-12 3:58 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-06-11 14:50 [patch] fs: fix superblock iteration race Nick Piggin
2010-06-11 16:06 ` Linus Torvalds
2010-06-12 3:37 ` Nick Piggin
2010-06-12 3:57 ` Nick Piggin [this message]
2010-06-12 4:15 ` Linus Torvalds
2010-06-12 4:38 ` Nick Piggin
2010-06-12 4:46 ` Linus Torvalds
2010-06-14 15:07 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100612035751.GG16436@laptop \
--to=npiggin@suse.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).