linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: "Jun'ichi Nomura" <j-nomura@ce.jp.nec.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	Valdis.Kletnieks@vt.edu, linux-kernel@vger.kernel.org,
	linux-raid@vger.kernel.org,
	device-mapper development <dm-devel@redhat.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org, Alasdair G Kergon <agk@redhat.com>,
	Milan Broz <mbroz@redhat.com>
Subject: [PATCH UPDATED] block: restore multiple bd_link_disk_holder() support
Date: Fri, 14 Jan 2011 17:10:43 +0100	[thread overview]
Message-ID: <20110114161043.GB978@htj.dyndns.org> (raw)
In-Reply-To: <4D2FFBED.6010406@ce.jp.nec.com>

Commit e09b457b (block: simplify holder symlink handling) incorrectly
assumed that there is only one link at maximum.  dm may use multiple
links and expects block layer to track reference count for each link,
which is different from and unrelated to the exclusive device holder
identified by @holder when the device is opened.

Remove the single holder assumption and automatic removal of the link
and revive the per-link reference count tracking.  The code
essentially behaves the same as before commit e09b457b sans the
unnecessary kobject reference count dancing.

While at it, note that this facility should not be used by anyone else
than the current ones.  Sysfs symlinks shouldn't be abused like this
and the whole thing doesn't belong in the block layer at all.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Milan Broz <mbroz@redhat.com>
Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Cc: Kay Sievers <kay.sievers@vrfy.org>
---
Thanks for the test commands.  They were very helpful.  Can you please
test this one?

Thanks.

 drivers/md/dm-table.c |    1 
 drivers/md/md.c       |    1 
 fs/block_dev.c        |   93 ++++++++++++++++++++++++++++++++++++++++----------
 include/linux/fs.h    |    8 +++-
 4 files changed, 84 insertions(+), 19 deletions(-)

Index: work/include/linux/fs.h
===================================================================
--- work.orig/include/linux/fs.h
+++ work/include/linux/fs.h
@@ -664,7 +664,7 @@ struct block_device {
 	int			bd_holders;
 	bool			bd_write_holder;
 #ifdef CONFIG_SYSFS
-	struct gendisk *	bd_holder_disk;	/* for sysfs slave linkng */
+	struct list_head	bd_holder_disks;
 #endif
 	struct block_device *	bd_contains;
 	unsigned		bd_block_size;
@@ -2045,12 +2045,18 @@ extern struct block_device *blkdev_get_b
 extern int blkdev_put(struct block_device *bdev, fmode_t mode);
 #ifdef CONFIG_SYSFS
 extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+extern void bd_unlink_disk_holder(struct block_device *bdev,
+				  struct gendisk *disk);
 #else
 static inline int bd_link_disk_holder(struct block_device *bdev,
 				      struct gendisk *disk)
 {
 	return 0;
 }
+static inline void bd_unlink_disk_holder(struct block_device *bdev,
+					 struct gendisk *disk)
+{
+}
 #endif
 #endif
 
Index: work/drivers/md/dm-table.c
===================================================================
--- work.orig/drivers/md/dm-table.c
+++ work/drivers/md/dm-table.c
@@ -347,6 +347,7 @@ static void close_dev(struct dm_dev_inte
 	if (!d->dm_dev.bdev)
 		return;
 
+	bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md));
 	blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
 	d->dm_dev.bdev = NULL;
 }
Index: work/drivers/md/md.c
===================================================================
--- work.orig/drivers/md/md.c
+++ work/drivers/md/md.c
@@ -1907,6 +1907,7 @@ static void unbind_rdev_from_array(mdk_r
 		MD_BUG();
 		return;
 	}
+	bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
 	list_del_rcu(&rdev->same_set);
 	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
 	rdev->mddev = NULL;
Index: work/fs/block_dev.c
===================================================================
--- work.orig/fs/block_dev.c
+++ work/fs/block_dev.c
@@ -426,6 +426,9 @@ static void init_once(void *foo)
 	mutex_init(&bdev->bd_mutex);
 	INIT_LIST_HEAD(&bdev->bd_inodes);
 	INIT_LIST_HEAD(&bdev->bd_list);
+#ifdef CONFIG_SYSFS
+	INIT_LIST_HEAD(&bdev->bd_holder_disks);
+#endif
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize mutex for freeze. */
 	mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -773,6 +776,23 @@ static struct block_device *bd_start_cla
 }
 
 #ifdef CONFIG_SYSFS
+struct bd_holder_disk {
+	struct list_head	list;
+	struct gendisk		*disk;
+	int			refcnt;
+};
+
+static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
+						  struct gendisk *disk)
+{
+	struct bd_holder_disk *holder;
+
+	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
+		if (holder->disk == disk)
+			return holder;
+	return NULL;
+}
+
 static int add_symlink(struct kobject *from, struct kobject *to)
 {
 	return sysfs_create_link(from, to, kobject_name(to));
@@ -788,6 +808,8 @@ static void del_symlink(struct kobject *
  * @bdev: the claimed slave bdev
  * @disk: the holding disk
  *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
  * This functions creates the following sysfs symlinks.
  *
  * - from "slaves" directory of the holder @disk to the claimed @bdev
@@ -811,47 +833,83 @@ static void del_symlink(struct kobject *
  */
 int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 {
+	struct bd_holder_disk *holder;
 	int ret = 0;
 
 	mutex_lock(&bdev->bd_mutex);
 
-	WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
+	WARN_ON_ONCE(!bdev->bd_holder);
 
 	/* FIXME: remove the following once add_disk() handles errors */
 	if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
 		goto out_unlock;
 
-	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
-	if (ret)
+	holder = bd_find_holder_disk(bdev, disk);
+	if (holder) {
+		holder->refcnt++;
 		goto out_unlock;
+	}
 
-	ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
-	if (ret) {
-		del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+	holder = kzalloc(sizeof(*holder), GFP_KERNEL);
+	if (!holder) {
+		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
-	bdev->bd_holder_disk = disk;
+	INIT_LIST_HEAD(&holder->list);
+	holder->disk = disk;
+	holder->refcnt = 1;
+
+	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+	if (ret)
+		goto out_free;
+
+	ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+	if (ret)
+		goto out_del;
+
+	list_add(&holder->list, &bdev->bd_holder_disks);
+	goto out_unlock;
+
+out_del:
+	del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+out_free:
+	kfree(holder);
 out_unlock:
 	mutex_unlock(&bdev->bd_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bd_link_disk_holder);
 
-static void bd_unlink_disk_holder(struct block_device *bdev)
+/**
+ * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
+ * @bdev: the calimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 {
-	struct gendisk *disk = bdev->bd_holder_disk;
+	struct bd_holder_disk *holder;
 
-	bdev->bd_holder_disk = NULL;
-	if (!disk)
-		return;
+	mutex_lock(&bdev->bd_mutex);
 
-	del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
-	del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+	holder = bd_find_holder_disk(bdev, disk);
+
+	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
+		del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+		del_symlink(bdev->bd_part->holder_dir,
+			    &disk_to_dev(disk)->kobj);
+		list_del_init(&holder->list);
+		kfree(holder);
+	}
+
+	mutex_unlock(&bdev->bd_mutex);
 }
-#else
-static inline void bd_unlink_disk_holder(struct block_device *bdev)
-{ }
+EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
 
 /**
@@ -1374,7 +1432,6 @@ int blkdev_put(struct block_device *bdev
 		 * unblock evpoll if it was a write holder.
 		 */
 		if (bdev_free) {
-			bd_unlink_disk_holder(bdev);
 			if (bdev->bd_write_holder) {
 				disk_unblock_events(bdev->bd_disk);
 				bdev->bd_write_holder = false;

  reply	other threads:[~2011-01-14 16:10 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-01-12 17:34 linux-next - WARNING: at fs/block_dev.c:824 bd_link_disk_holder+0x92/0x1ac() Valdis.Kletnieks
2011-01-13  0:23 ` Milan Broz
2011-01-13  2:19   ` Jun'ichi Nomura
2011-01-13 11:06     ` Tejun Heo
2011-01-13 11:26       ` [dm-devel] " Milan Broz
2011-01-13 12:27         ` Karel Zak
2011-01-13 13:12           ` Tejun Heo
2011-01-13 13:26             ` Karel Zak
2011-01-13 13:37               ` Tejun Heo
2011-01-13 13:52                 ` Tejun Heo
2011-01-13 13:58                 ` Milan Broz
2011-01-13 14:11                   ` Tejun Heo
2011-01-13 14:25                     ` Milan Broz
2011-01-13 14:30                       ` Tejun Heo
2011-01-13 14:43                         ` Kay Sievers
2011-01-13 15:03                           ` Milan Broz
2011-01-14  7:38                             ` Jun'ichi Nomura
2011-01-13 15:59                           ` Karel Zak
2011-01-13 16:10                             ` [dm-devel] " Kay Sievers
2011-01-14 15:07                               ` Karel Zak
2011-01-14 15:23                                 ` Kay Sievers
2011-01-13 14:45                         ` Theodore Tso
2011-01-13 20:18                           ` NeilBrown
2011-01-13 20:41                             ` Ted Ts'o
2011-01-14 16:20                               ` Tejun Heo
     [not found]                               ` <20110114162022.GC978@htj.dyndns.org>
2011-01-14 17:59                                 ` Ted Ts'o
2011-01-14 18:23                                   ` Tejun Heo
2011-01-13 14:49                         ` Milan Broz
2011-01-14 16:35                           ` Tejun Heo
2011-01-13 17:21     ` [PATCH] block: restore multiple bd_link_disk_holder() support Tejun Heo
2011-01-13 18:42       ` Milan Broz
2011-01-14  7:31         ` Jun'ichi Nomura
2011-01-14 16:10           ` Tejun Heo [this message]
2011-01-14 21:09             ` [PATCH UPDATED] " Milan Broz
2011-01-17  0:18               ` Jun'ichi Nomura

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110114161043.GB978@htj.dyndns.org \
    --to=tj@kernel.org \
    --cc=Valdis.Kletnieks@vt.edu \
    --cc=agk@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=dm-devel@redhat.com \
    --cc=j-nomura@ce.jp.nec.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=mbroz@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).