From: Al Viro <viro@ZenIV.linux.org.uk>
To: Neil Brown <neilb@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Kevin Winchester <kjwinchester@gmail.com>,
"J. Bruce Fields" <bfields@fieldses.org>,
Arjan van de Ven <arjan@linux.intel.com>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
NetDev <netdev@vger.kernel.org>,
gregkh@suse.de
Subject: Re: Top 10 kernel oopses for the week ending January 5th, 2008
Date: Thu, 10 Jan 2008 05:53:59 +0000 [thread overview]
Message-ID: <20080110055359.GA27894@ZenIV.linux.org.uk> (raw)
In-Reply-To: <18309.39804.31974.851666@notabene.brown>
On Thu, Jan 10, 2008 at 03:13:48PM +1100, Neil Brown wrote:
> > What guarantees that it doesn't happen before we get to callback? AFAICS,
> > nothing whatsoever...
>
> Yes, that's bad isn't it :-)
>
> I think I should be using sysfs_schedule_callback here. That makes the
> required 'get' and 'put' calls.... but it can fail with -ENOMEM. I
> wonder what I do if -ENOMEM??? Maybe I'll just continue to roll my
> one :-(
How about this instead (completely untested)
* split failure exits
* switch to kick_rdev_from_array()
* fold unbind_rdev_from_array() into it (no other callers anymore)
* take export_rdev() into failure case in bind_rdev_to_array()
* in kick_rdev_from_array() do what export_rdev() does sans
kobject_put() and do that before schedule_work(). Take kobject_put() into
delayed_delete().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cef9ebd..116cc5a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1344,6 +1344,39 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
static LIST_HEAD(pending_raid_disks);
+static void unlock_rdev(mdk_rdev_t *rdev)
+{
+ struct block_device *bdev = rdev->bdev;
+ rdev->bdev = NULL;
+ if (!bdev)
+ MD_BUG();
+ bd_release(bdev);
+ blkdev_put(bdev);
+}
+
+void md_autodetect_dev(dev_t dev);
+
+static void __export_rdev(mdk_rdev_t * rdev)
+{
+ char b[BDEVNAME_SIZE];
+ printk(KERN_INFO "md: export_rdev(%s)\n",
+ bdevname(rdev->bdev,b));
+ if (rdev->mddev)
+ MD_BUG();
+ free_disk_sb(rdev);
+ list_del_init(&rdev->same_set);
+#ifndef MODULE
+ md_autodetect_dev(rdev->bdev->bd_dev);
+#endif
+ unlock_rdev(rdev);
+}
+
+static void export_rdev(mdk_rdev_t * rdev)
+{
+ __export_rdev(rdev);
+ kobject_put(&rdev->kobj);
+}
+
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
{
char b[BDEVNAME_SIZE];
@@ -1353,7 +1386,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
if (rdev->mddev) {
MD_BUG();
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
/* make sure rdev->size exceeds mddev->size */
if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
@@ -1362,8 +1396,10 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
* If mddev->level <= 0, then we don't care
* about aligning sizes (e.g. linear)
*/
- if (mddev->level > 0)
- return -ENOSPC;
+ if (mddev->level > 0) {
+ err = -ENOSPC;
+ goto out;
+ }
} else
mddev->size = rdev->size;
}
@@ -1379,12 +1415,16 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
choice++;
rdev->desc_nr = choice;
} else {
- if (find_rdev_nr(mddev, rdev->desc_nr))
- return -EBUSY;
+ if (find_rdev_nr(mddev, rdev->desc_nr)) {
+ err = -EBUSY;
+ goto out;
+ }
}
bdevname(rdev->bdev,b);
- if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0)
- return -ENOMEM;
+ if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL)
*s = '!';
@@ -1407,9 +1447,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
return 0;
- fail:
+fail:
printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
b, mdname(mddev));
+out:
+ export_rdev(rdev);
return err;
}
@@ -1417,19 +1459,22 @@ static void delayed_delete(struct work_struct *ws)
{
mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
kobject_del(&rdev->kobj);
+ kobject_put(&rdev->kobj);
}
-static void unbind_rdev_from_array(mdk_rdev_t * rdev)
+static void kick_rdev_from_array(mdk_rdev_t * rdev)
{
char b[BDEVNAME_SIZE];
if (!rdev->mddev) {
MD_BUG();
+ export_rdev(rdev);
return;
}
bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
list_del_init(&rdev->same_set);
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
+ __export_rdev(rdev);
sysfs_remove_link(&rdev->kobj, "block");
/* We need to delay this, otherwise we can deadlock when
@@ -1467,40 +1512,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
return err;
}
-static void unlock_rdev(mdk_rdev_t *rdev)
-{
- struct block_device *bdev = rdev->bdev;
- rdev->bdev = NULL;
- if (!bdev)
- MD_BUG();
- bd_release(bdev);
- blkdev_put(bdev);
-}
-
-void md_autodetect_dev(dev_t dev);
-
-static void export_rdev(mdk_rdev_t * rdev)
-{
- char b[BDEVNAME_SIZE];
- printk(KERN_INFO "md: export_rdev(%s)\n",
- bdevname(rdev->bdev,b));
- if (rdev->mddev)
- MD_BUG();
- free_disk_sb(rdev);
- list_del_init(&rdev->same_set);
-#ifndef MODULE
- md_autodetect_dev(rdev->bdev->bd_dev);
-#endif
- unlock_rdev(rdev);
- kobject_put(&rdev->kobj);
-}
-
-static void kick_rdev_from_array(mdk_rdev_t * rdev)
-{
- unbind_rdev_from_array(rdev);
- export_rdev(rdev);
-}
-
static void export_array(mddev_t *mddev)
{
struct list_head *tmp;
@@ -2576,8 +2587,10 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
mdk_rdev_t, same_set);
err = super_types[mddev->major_version]
.load_super(rdev, rdev0, mddev->minor_version);
- if (err < 0)
- goto out;
+ if (err < 0) {
+ export_rdev(rdev);
+ return err;
+ }
}
} else
rdev = md_import_device(dev, -1, -1);
@@ -2585,9 +2598,6 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
if (IS_ERR(rdev))
return PTR_ERR(rdev);
err = bind_rdev_to_array(rdev, mddev);
- out:
- if (err)
- export_rdev(rdev);
return err ? err : len;
}
@@ -3637,8 +3647,7 @@ static void autorun_devices(int part)
printk(KERN_INFO "md: created %s\n", mdname(mddev));
ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
list_del_init(&rdev->same_set);
- if (bind_rdev_to_array(rdev, mddev))
- export_rdev(rdev);
+ bind_rdev_to_array(rdev, mddev);
}
autorun_array(mddev);
mddev_unlock(mddev);
@@ -3807,7 +3816,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
return -EOVERFLOW;
if (!mddev->raid_disks) {
- int err;
/* expecting a device which has a superblock */
rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
if (IS_ERR(rdev)) {
@@ -3830,10 +3838,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
return -EINVAL;
}
}
- err = bind_rdev_to_array(rdev, mddev);
- if (err)
- export_rdev(rdev);
- return err;
+ return bind_rdev_to_array(rdev, mddev);
}
/*
@@ -3887,10 +3892,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
validate_super(mddev, rdev);
err = mddev->pers->hot_add_disk(mddev, rdev);
if (err)
- unbind_rdev_from_array(rdev);
+ kick_rdev_from_array(rdev);
}
- if (err)
- export_rdev(rdev);
md_update_sb(mddev, 1);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -3908,7 +3911,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
}
if (!(info->state & (1<<MD_DISK_FAULTY))) {
- int err;
rdev = md_import_device (dev, -1, 0);
if (IS_ERR(rdev)) {
printk(KERN_WARNING
@@ -3938,11 +3940,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
rdev->size = calc_dev_size(rdev, mddev->chunk_size);
- err = bind_rdev_to_array(rdev, mddev);
- if (err) {
- export_rdev(rdev);
- return err;
- }
+ return bind_rdev_to_array(rdev, mddev);
}
return 0;
@@ -4018,15 +4016,15 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
printk(KERN_WARNING
"md: can not hot-add faulty %s disk to %s!\n",
bdevname(rdev->bdev,b), mdname(mddev));
- err = -EINVAL;
- goto abort_export;
+ export_rdev(rdev);
+ return -EINVAL;
}
clear_bit(In_sync, &rdev->flags);
rdev->desc_nr = -1;
rdev->saved_raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev);
if (err)
- goto abort_export;
+ return err;
/*
* The rest should better be atomic, we can have disk failures
@@ -4036,8 +4034,8 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
if (rdev->desc_nr == mddev->max_disks) {
printk(KERN_WARNING "%s: can not hot-add to full array!\n",
mdname(mddev));
- err = -EBUSY;
- goto abort_unbind_export;
+ kick_rdev_from_array(rdev);
+ return -EBUSY;
}
rdev->raid_disk = -1;
@@ -4052,13 +4050,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
md_wakeup_thread(mddev->thread);
md_new_event(mddev);
return 0;
-
-abort_unbind_export:
- unbind_rdev_from_array(rdev);
next prev parent reply other threads:[~2008-01-10 5:53 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-05 21:06 Top 10 kernel oopses for the week ending January 5th, 2008 Arjan van de Ven
2008-01-05 21:26 ` Al Viro
2008-01-05 21:39 ` Al Viro
2008-01-07 17:44 ` J. Bruce Fields
2008-01-08 1:19 ` Kevin Winchester
2008-01-08 3:26 ` Linus Torvalds
2008-01-08 5:59 ` Al Viro
2008-01-08 7:33 ` Jarek Poplawski
2008-01-10 4:13 ` Neil Brown
2008-01-10 5:53 ` Al Viro [this message]
2008-01-14 1:36 ` Neil Brown
2008-01-08 16:14 ` Randy Dunlap
2008-01-08 17:42 ` Arjan van de Ven
2008-01-08 18:08 ` Linus Torvalds
2008-01-08 18:16 ` Arjan van de Ven
2008-01-08 18:27 ` Linus Torvalds
2008-01-08 19:05 ` Arjan van de Ven
2008-01-08 19:31 ` Linus Torvalds
2008-01-08 22:56 ` Rafael J. Wysocki
2008-01-08 17:08 ` Andi Kleen
2008-01-06 3:30 ` Andi Kleen
2008-01-06 3:31 ` Arjan van de Ven
2008-01-06 3:50 ` Andi Kleen
2008-01-09 14:12 ` Johannes Berg
2008-01-09 15:28 ` Arjan van de Ven
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080110055359.GA27894@ZenIV.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=akpm@linux-foundation.org \
--cc=arjan@linux.intel.com \
--cc=bfields@fieldses.org \
--cc=gregkh@suse.de \
--cc=kjwinchester@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=neilb@suse.de \
--cc=netdev@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).