netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Neil Brown <neilb@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Kevin Winchester <kjwinchester@gmail.com>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	Arjan van de Ven <arjan@linux.intel.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	NetDev <netdev@vger.kernel.org>,
	gregkh@suse.de
Subject: Re: Top 10 kernel oopses for the week ending January 5th, 2008
Date: Thu, 10 Jan 2008 05:53:59 +0000	[thread overview]
Message-ID: <20080110055359.GA27894@ZenIV.linux.org.uk> (raw)
In-Reply-To: <18309.39804.31974.851666@notabene.brown>

On Thu, Jan 10, 2008 at 03:13:48PM +1100, Neil Brown wrote:
> > What guarantees that it doesn't happen before we get to callback?  AFAICS,
> > nothing whatsoever...
> 
> Yes, that's bad isn't it :-)
> 
> I think I should be using sysfs_schedule_callback here.  That makes the 
> required 'get' and 'put' calls.... but it can fail with -ENOMEM.  I
> wonder what I do if -ENOMEM???  Maybe I'll just continue to roll my
> one :-( 

How about this instead (completely untested)

	* split failure exits
	* switch to kick_rdev_from_array()
	* fold unbind_rdev_from_array() into it (no other callers anymore)
	* take export_rdev() into failure case in bind_rdev_to_array()
	* in kick_rdev_from_array() do what export_rdev() does sans
kobject_put() and do that before schedule_work().  Take kobject_put() into
delayed_delete().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cef9ebd..116cc5a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1344,6 +1344,39 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
 
 static LIST_HEAD(pending_raid_disks);
 
+static void unlock_rdev(mdk_rdev_t *rdev)
+{
+	struct block_device *bdev = rdev->bdev;
+	rdev->bdev = NULL;
+	if (!bdev)
+		MD_BUG();
+	bd_release(bdev);
+	blkdev_put(bdev);
+}
+
+void md_autodetect_dev(dev_t dev);
+
+static void __export_rdev(mdk_rdev_t * rdev)
+{
+	char b[BDEVNAME_SIZE];
+	printk(KERN_INFO "md: export_rdev(%s)\n",
+		bdevname(rdev->bdev,b));
+	if (rdev->mddev)
+		MD_BUG();
+	free_disk_sb(rdev);
+	list_del_init(&rdev->same_set);
+#ifndef MODULE
+	md_autodetect_dev(rdev->bdev->bd_dev);
+#endif
+	unlock_rdev(rdev);
+}
+
+static void export_rdev(mdk_rdev_t * rdev)
+{
+	__export_rdev(rdev);
+	kobject_put(&rdev->kobj);
+}
+
 static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 {
 	char b[BDEVNAME_SIZE];
@@ -1353,7 +1386,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 
 	if (rdev->mddev) {
 		MD_BUG();
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 	/* make sure rdev->size exceeds mddev->size */
 	if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
@@ -1362,8 +1396,10 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 			 * If mddev->level <= 0, then we don't care
 			 * about aligning sizes (e.g. linear)
 			 */
-			if (mddev->level > 0)
-				return -ENOSPC;
+			if (mddev->level > 0) {
+				err = -ENOSPC;
+				goto out;
+			}
 		} else
 			mddev->size = rdev->size;
 	}
@@ -1379,12 +1415,16 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 			choice++;
 		rdev->desc_nr = choice;
 	} else {
-		if (find_rdev_nr(mddev, rdev->desc_nr))
-			return -EBUSY;
+		if (find_rdev_nr(mddev, rdev->desc_nr)) {
+			err = -EBUSY;
+			goto out;
+		}
 	}
 	bdevname(rdev->bdev,b);
-	if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0)
-		return -ENOMEM;
+	if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0) {
+		err = -ENOMEM;
+		goto out;
+	}
 	while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL)
 		*s = '!';
 			
@@ -1407,9 +1447,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 	bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
 	return 0;
 
- fail:
+fail:
 	printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
 	       b, mdname(mddev));
+out:
+	export_rdev(rdev);
 	return err;
 }
 
@@ -1417,19 +1459,22 @@ static void delayed_delete(struct work_struct *ws)
 {
 	mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
 	kobject_del(&rdev->kobj);
+	kobject_put(&rdev->kobj);
 }
 
-static void unbind_rdev_from_array(mdk_rdev_t * rdev)
+static void kick_rdev_from_array(mdk_rdev_t * rdev)
 {
 	char b[BDEVNAME_SIZE];
 	if (!rdev->mddev) {
 		MD_BUG();
+		export_rdev(rdev);
 		return;
 	}
 	bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
 	list_del_init(&rdev->same_set);
 	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
 	rdev->mddev = NULL;
+	__export_rdev(rdev);
 	sysfs_remove_link(&rdev->kobj, "block");
 
 	/* We need to delay this, otherwise we can deadlock when
@@ -1467,40 +1512,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
 	return err;
 }
 
-static void unlock_rdev(mdk_rdev_t *rdev)
-{
-	struct block_device *bdev = rdev->bdev;
-	rdev->bdev = NULL;
-	if (!bdev)
-		MD_BUG();
-	bd_release(bdev);
-	blkdev_put(bdev);
-}
-
-void md_autodetect_dev(dev_t dev);
-
-static void export_rdev(mdk_rdev_t * rdev)
-{
-	char b[BDEVNAME_SIZE];
-	printk(KERN_INFO "md: export_rdev(%s)\n",
-		bdevname(rdev->bdev,b));
-	if (rdev->mddev)
-		MD_BUG();
-	free_disk_sb(rdev);
-	list_del_init(&rdev->same_set);
-#ifndef MODULE
-	md_autodetect_dev(rdev->bdev->bd_dev);
-#endif
-	unlock_rdev(rdev);
-	kobject_put(&rdev->kobj);
-}
-
-static void kick_rdev_from_array(mdk_rdev_t * rdev)
-{
-	unbind_rdev_from_array(rdev);
-	export_rdev(rdev);
-}
-
 static void export_array(mddev_t *mddev)
 {
 	struct list_head *tmp;
@@ -2576,8 +2587,10 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
 						       mdk_rdev_t, same_set);
 			err = super_types[mddev->major_version]
 				.load_super(rdev, rdev0, mddev->minor_version);
-			if (err < 0)
-				goto out;
+			if (err < 0) {
+				export_rdev(rdev);
+				return err;
+			}
 		}
 	} else
 		rdev = md_import_device(dev, -1, -1);
@@ -2585,9 +2598,6 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 	err = bind_rdev_to_array(rdev, mddev);
- out:
-	if (err)
-		export_rdev(rdev);
 	return err ? err : len;
 }
 
@@ -3637,8 +3647,7 @@ static void autorun_devices(int part)
 			printk(KERN_INFO "md: created %s\n", mdname(mddev));
 			ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
 				list_del_init(&rdev->same_set);
-				if (bind_rdev_to_array(rdev, mddev))
-					export_rdev(rdev);
+				bind_rdev_to_array(rdev, mddev);
 			}
 			autorun_array(mddev);
 			mddev_unlock(mddev);
@@ -3807,7 +3816,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 		return -EOVERFLOW;
 
 	if (!mddev->raid_disks) {
-		int err;
 		/* expecting a device which has a superblock */
 		rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
 		if (IS_ERR(rdev)) {
@@ -3830,10 +3838,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 				return -EINVAL;
 			}
 		}
-		err = bind_rdev_to_array(rdev, mddev);
-		if (err)
-			export_rdev(rdev);
-		return err;
+		return bind_rdev_to_array(rdev, mddev);
 	}
 
 	/*
@@ -3887,10 +3892,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 				validate_super(mddev, rdev);
 			err = mddev->pers->hot_add_disk(mddev, rdev);
 			if (err)
-				unbind_rdev_from_array(rdev);
+				kick_rdev_from_array(rdev);
 		}
-		if (err)
-			export_rdev(rdev);
 
 		md_update_sb(mddev, 1);
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -3908,7 +3911,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 	}
 
 	if (!(info->state & (1<<MD_DISK_FAULTY))) {
-		int err;
 		rdev = md_import_device (dev, -1, 0);
 		if (IS_ERR(rdev)) {
 			printk(KERN_WARNING 
@@ -3938,11 +3940,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 			rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
 		rdev->size = calc_dev_size(rdev, mddev->chunk_size);
 
-		err = bind_rdev_to_array(rdev, mddev);
-		if (err) {
-			export_rdev(rdev);
-			return err;
-		}
+		return bind_rdev_to_array(rdev, mddev);
 	}
 
 	return 0;
@@ -4018,15 +4016,15 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 		printk(KERN_WARNING 
 			"md: can not hot-add faulty %s disk to %s!\n",
 			bdevname(rdev->bdev,b), mdname(mddev));
-		err = -EINVAL;
-		goto abort_export;
+		export_rdev(rdev);
+		return -EINVAL;
 	}
 	clear_bit(In_sync, &rdev->flags);
 	rdev->desc_nr = -1;
 	rdev->saved_raid_disk = -1;
 	err = bind_rdev_to_array(rdev, mddev);
 	if (err)
-		goto abort_export;
+		return err;
 
 	/*
 	 * The rest should better be atomic, we can have disk failures
@@ -4036,8 +4034,8 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 	if (rdev->desc_nr == mddev->max_disks) {
 		printk(KERN_WARNING "%s: can not hot-add to full array!\n",
 			mdname(mddev));
-		err = -EBUSY;
-		goto abort_unbind_export;
+		kick_rdev_from_array(rdev);
+		return -EBUSY;
 	}
 
 	rdev->raid_disk = -1;
@@ -4052,13 +4050,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 	md_wakeup_thread(mddev->thread);
 	md_new_event(mddev);
 	return 0;
-
-abort_unbind_export:
-	unbind_rdev_from_array(rdev);

  reply	other threads:[~2008-01-10  5:53 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-05 21:06 Top 10 kernel oopses for the week ending January 5th, 2008 Arjan van de Ven
2008-01-05 21:26 ` Al Viro
2008-01-05 21:39 ` Al Viro
2008-01-07 17:44   ` J. Bruce Fields
2008-01-08  1:19     ` Kevin Winchester
2008-01-08  3:26       ` Linus Torvalds
2008-01-08  5:59         ` Al Viro
2008-01-08  7:33           ` Jarek Poplawski
2008-01-10  4:13           ` Neil Brown
2008-01-10  5:53             ` Al Viro [this message]
2008-01-14  1:36               ` Neil Brown
2008-01-08 16:14         ` Randy Dunlap
2008-01-08 17:42           ` Arjan van de Ven
2008-01-08 18:08             ` Linus Torvalds
2008-01-08 18:16               ` Arjan van de Ven
2008-01-08 18:27                 ` Linus Torvalds
2008-01-08 19:05                   ` Arjan van de Ven
2008-01-08 19:31                     ` Linus Torvalds
2008-01-08 22:56                       ` Rafael J. Wysocki
2008-01-08 17:08         ` Andi Kleen
2008-01-06  3:30 ` Andi Kleen
2008-01-06  3:31   ` Arjan van de Ven
2008-01-06  3:50     ` Andi Kleen
2008-01-09 14:12 ` Johannes Berg
2008-01-09 15:28   ` Arjan van de Ven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080110055359.GA27894@ZenIV.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@linux.intel.com \
    --cc=bfields@fieldses.org \
    --cc=gregkh@suse.de \
    --cc=kjwinchester@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).