xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] linux/blkfront: fixes for 'xm block-detach ... --force'
@ 2010-01-18 10:19 Jan Beulich
  2010-02-26 21:50 ` Daniel Stodden
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2010-01-18 10:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Jeremy Fitzhardinge

[-- Attachment #1: Type: text/plain, Size: 8064 bytes --]

Prevent prematurely freeing 'struct blkfront_info' instances (when the
xenbus data structures are gone, but the Linux ones are still needed).

Prevent adding a disk with the same (major, minor) [and hence the same
name and sysfs entries, which leads to oopses] when the previous
instance wasn't fully de-allocated yet.

This still doesn't address all issues resulting from forced detach:
I/O submitted after the detach still blocks forever, likely preventing
subsequent un-mounting from completing. It's not clear to me (not
knowing much about the block layer) how this can be avoided.

This also doesn't address issues with duplicate device creation caused
by re-using the hdXX and sdXX name spaces - this would require
synchronization with the respective native code.

As usual, written against 2.6.32.3 and made apply to the 2.6.18
tree without further testing.

As I believe that pv-ops Xen has the same issue, I'm also attaching
a respective (compile tested only) patch against 2.6.33-rc4.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

--- sle11-2010-01-12.orig/drivers/xen/blkfront/blkfront.c	2009-06-04 10:47:04.000000000 +0200
+++ sle11-2010-01-12/drivers/xen/blkfront/blkfront.c	2010-01-15 16:47:42.000000000 +0100
@@ -426,7 +426,10 @@ static int blkfront_remove(struct xenbus
 
 	blkif_free(info, 0);
 
-	kfree(info);
+	if(info->users == 0)
+		kfree(info);
+	else
+		info->is_ready = -1;
 
 	return 0;
 }
@@ -488,6 +491,9 @@ static void blkif_restart_queue_callback
 int blkif_open(struct inode *inode, struct file *filep)
 {
 	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+
+	if(info->is_ready < 0)
+		return -ENODEV;
 	info->users++;
 	return 0;
 }
@@ -504,7 +510,10 @@ int blkif_release(struct inode *inode, s
 		struct xenbus_device * dev = info->xbdev;
 		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
 
-		if (state == XenbusStateClosing && info->is_ready)
+		if(info->is_ready < 0) {
+			blkfront_closing(dev);
+			kfree(info);
+		} else if (state == XenbusStateClosing && info->is_ready)
 			blkfront_closing(dev);
 	}
 	return 0;
@@ -894,7 +903,7 @@ int blkfront_is_ready(struct xenbus_devi
 {
 	struct blkfront_info *info = dev->dev.driver_data;
 
-	return info->is_ready;
+	return info->is_ready > 0;
 }
 
 
--- sle11-2010-01-12.orig/drivers/xen/blkfront/block.h	2009-06-04 10:47:04.000000000 +0200
+++ sle11-2010-01-12/drivers/xen/blkfront/block.h	2010-01-18 08:56:41.000000000 +0100
@@ -78,6 +78,7 @@ struct xlbd_major_info
 	int index;
 	int usage;
 	struct xlbd_type_info *type;
+	struct xlbd_minor_state *minors;
 };
 
 struct blk_shadow {
--- sle11-2010-01-12.orig/drivers/xen/blkfront/vbd.c	2009-06-04 10:47:04.000000000 +0200
+++ sle11-2010-01-12/drivers/xen/blkfront/vbd.c	2010-01-18 08:56:32.000000000 +0100
@@ -48,6 +48,12 @@
 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
 
+struct xlbd_minor_state {
+	unsigned int nr;
+	unsigned long *bitmap;
+	spinlock_t lock;
+};
+
 /*
  * For convenience we distinguish between ide, scsi and 'other' (i.e.,
  * potentially combinations of the two) in the naming scheme and in a few other
@@ -97,6 +103,8 @@ static struct xlbd_major_info *major_inf
 #define XLBD_MAJOR_SCSI_RANGE	XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
 #define XLBD_MAJOR_VBD_RANGE	XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
 
+#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
+
 static struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
@@ -114,6 +122,7 @@ static struct xlbd_major_info *
 xlbd_alloc_major_info(int major, int minor, int index)
 {
 	struct xlbd_major_info *ptr;
+	struct xlbd_minor_state *minors;
 	int do_register;
 
 	ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
@@ -121,6 +130,22 @@ xlbd_alloc_major_info(int major, int min
 		return NULL;
 
 	ptr->major = major;
+	minors = kmalloc(sizeof(*minors), GFP_KERNEL);
+	if (minors == NULL) {
+		kfree(ptr);
+		return NULL;
+	}
+
+	minors->bitmap = kzalloc(BITS_TO_LONGS(256) * sizeof(*minors->bitmap),
+				 GFP_KERNEL);
+	if (minors->bitmap == NULL) {
+		kfree(minors);
+		kfree(ptr);
+		return NULL;
+	}
+
+	spin_lock_init(&minors->lock);
+	minors->nr = 256;
 	do_register = 1;
 
 	switch (index) {
@@ -143,13 +168,19 @@ xlbd_alloc_major_info(int major, int min
 		 * if someone already registered block major 202,
 		 * don't try to register it again
 		 */
-		if (major_info[XLBD_MAJOR_VBD_START] != NULL)
+		if (major_info[XLBD_MAJOR_VBD_ALT(index)] != NULL) {
+			kfree(minors->bitmap);
+			kfree(minors);
+			minors = major_info[XLBD_MAJOR_VBD_ALT(index)]->minors;
 			do_register = 0;
+		}
 		break;
 	}
 
 	if (do_register) {
 		if (register_blkdev(ptr->major, ptr->type->devname)) {
+			kfree(minors->bitmap);
+			kfree(minors);
 			kfree(ptr);
 			return NULL;
 		}
@@ -157,6 +188,7 @@ xlbd_alloc_major_info(int major, int min
 		printk("xen-vbd: registered block device major %i\n", ptr->major);
 	}
 
+	ptr->minors = minors;
 	major_info[index] = ptr;
 	return ptr;
 }
@@ -209,6 +241,61 @@ xlbd_put_major_info(struct xlbd_major_in
 }
 
 static int
+xlbd_reserve_minors(struct xlbd_major_info *mi, unsigned int minor,
+		    unsigned int nr_minors)
+{
+	struct xlbd_minor_state *ms = mi->minors;
+	unsigned int end = minor + nr_minors;
+	int rc;
+
+	if (end > ms->nr) {
+		unsigned long *bitmap, *old;
+
+		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+				 GFP_KERNEL);
+		if (bitmap == NULL)
+			return -ENOMEM;
+
+		spin_lock(&ms->lock);
+		if (end > ms->nr) {
+			old = ms->bitmap;
+			memcpy(bitmap, ms->bitmap,
+			       BITS_TO_LONGS(ms->nr) * sizeof(*bitmap));
+			ms->bitmap = bitmap;
+			ms->nr = BITS_TO_LONGS(end) * BITS_PER_LONG;
+		} else
+			old = bitmap;
+		spin_unlock(&ms->lock);
+		kfree(old);
+	}
+
+	spin_lock(&ms->lock);
+	if (find_next_bit(ms->bitmap, end, minor) >= end) {
+		for (; minor < end; ++minor)
+			__set_bit(minor, ms->bitmap);
+		rc = 0;
+	} else
+		rc = -EBUSY;
+	spin_unlock(&ms->lock);
+
+	return rc;
+}
+
+static void
+xlbd_release_minors(struct xlbd_major_info *mi, unsigned int minor,
+		    unsigned int nr_minors)
+{
+	struct xlbd_minor_state *ms = mi->minors;
+	unsigned int end = minor + nr_minors;
+
+	BUG_ON(end > ms->nr);
+	spin_lock(&ms->lock);
+	for (; minor < end; ++minor)
+		__clear_bit(minor, ms->bitmap);
+	spin_unlock(&ms->lock);
+}
+
+static int
 xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 {
 	request_queue_t *rq;
@@ -285,9 +372,14 @@ xlvbd_add(blkif_sector_t capacity, int v
 	if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
 		nr_minors = 1 << mi->type->partn_shift;
 
+	err = xlbd_reserve_minors(mi, minor, nr_minors);
+	if (err)
+		goto out;
+	err = -ENODEV;
+
 	gd = alloc_disk(nr_minors);
 	if (gd == NULL)
-		goto out;
+		goto release;
 
 	offset =  mi->index * mi->type->disks_per_major +
 			(minor >> mi->type->partn_shift);
@@ -326,7 +418,7 @@ xlvbd_add(blkif_sector_t capacity, int v
 
 	if (xlvbd_init_blk_queue(gd, sector_size)) {
 		del_gendisk(gd);
-		goto out;
+		goto release;
 	}
 
 	info->rq = gd->queue;
@@ -346,6 +438,8 @@ xlvbd_add(blkif_sector_t capacity, int v
 
 	return 0;
 
+ release:
+	xlbd_release_minors(mi, minor, nr_minors);
  out:
 	if (mi)
 		xlbd_put_major_info(mi);
@@ -356,14 +450,19 @@ xlvbd_add(blkif_sector_t capacity, int v
 void
 xlvbd_del(struct blkfront_info *info)
 {
+	unsigned int minor, nr_minors;
+
 	if (info->mi == NULL)
 		return;
 
 	BUG_ON(info->gd == NULL);
+	minor = info->gd->first_minor;
+	nr_minors = info->gd->minors;
 	del_gendisk(info->gd);
 	put_disk(info->gd);
 	info->gd = NULL;
 
+	xlbd_release_minors(info->mi, minor, nr_minors);
 	xlbd_put_major_info(info->mi);
 	info->mi = NULL;
 



[-- Attachment #2: xenlinux-blkfront-forced-detach.patch --]
[-- Type: text/plain, Size: 7989 bytes --]

Subject: blkfront: fixes for 'xm block-detach ... --force'

Prevent prematurely freeing 'struct blkfront_info' instances (when the
xenbus data structures are gone, but the Linux ones are still needed).

Prevent adding a disk with the same (major, minor) [and hence the same
name and sysfs entries, which leads to oopses] when the previous
instance wasn't fully de-allocated yet.

This still doesn't address all issues resulting from forced detach:
I/O submitted after the detach still blocks forever, likely preventing
subsequent un-mounting from completing. It's not clear to me (not
knowing much about the block layer) how this can be avoided.

This also doesn't address issues with duplicate device creation caused
by re-using the hdXX and sdXX name spaces - this would require
synchronization with the respective native code.

As usual, written against 2.6.32.3 and made apply to the 2.6.18
tree without further testing.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

--- sle11-2010-01-12.orig/drivers/xen/blkfront/blkfront.c	2009-06-04 10:47:04.000000000 +0200
+++ sle11-2010-01-12/drivers/xen/blkfront/blkfront.c	2010-01-15 16:47:42.000000000 +0100
@@ -426,7 +426,10 @@ static int blkfront_remove(struct xenbus
 
 	blkif_free(info, 0);
 
-	kfree(info);
+	if(info->users == 0)
+		kfree(info);
+	else
+		info->is_ready = -1;
 
 	return 0;
 }
@@ -488,6 +491,9 @@ static void blkif_restart_queue_callback
 int blkif_open(struct inode *inode, struct file *filep)
 {
 	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+
+	if(info->is_ready < 0)
+		return -ENODEV;
 	info->users++;
 	return 0;
 }
@@ -504,7 +510,10 @@ int blkif_release(struct inode *inode, s
 		struct xenbus_device * dev = info->xbdev;
 		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
 
-		if (state == XenbusStateClosing && info->is_ready)
+		if(info->is_ready < 0) {
+			blkfront_closing(dev);
+			kfree(info);
+		} else if (state == XenbusStateClosing && info->is_ready)
 			blkfront_closing(dev);
 	}
 	return 0;
@@ -894,7 +903,7 @@ int blkfront_is_ready(struct xenbus_devi
 {
 	struct blkfront_info *info = dev->dev.driver_data;
 
-	return info->is_ready;
+	return info->is_ready > 0;
 }
 
 
--- sle11-2010-01-12.orig/drivers/xen/blkfront/block.h	2009-06-04 10:47:04.000000000 +0200
+++ sle11-2010-01-12/drivers/xen/blkfront/block.h	2010-01-18 08:56:41.000000000 +0100
@@ -78,6 +78,7 @@ struct xlbd_major_info
 	int index;
 	int usage;
 	struct xlbd_type_info *type;
+	struct xlbd_minor_state *minors;
 };
 
 struct blk_shadow {
--- sle11-2010-01-12.orig/drivers/xen/blkfront/vbd.c	2009-06-04 10:47:04.000000000 +0200
+++ sle11-2010-01-12/drivers/xen/blkfront/vbd.c	2010-01-18 08:56:32.000000000 +0100
@@ -48,6 +48,12 @@
 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
 
+struct xlbd_minor_state {
+	unsigned int nr;
+	unsigned long *bitmap;
+	spinlock_t lock;
+};
+
 /*
  * For convenience we distinguish between ide, scsi and 'other' (i.e.,
  * potentially combinations of the two) in the naming scheme and in a few other
@@ -97,6 +103,8 @@ static struct xlbd_major_info *major_inf
 #define XLBD_MAJOR_SCSI_RANGE	XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
 #define XLBD_MAJOR_VBD_RANGE	XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
 
+#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
+
 static struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
@@ -114,6 +122,7 @@ static struct xlbd_major_info *
 xlbd_alloc_major_info(int major, int minor, int index)
 {
 	struct xlbd_major_info *ptr;
+	struct xlbd_minor_state *minors;
 	int do_register;
 
 	ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
@@ -121,6 +130,22 @@ xlbd_alloc_major_info(int major, int min
 		return NULL;
 
 	ptr->major = major;
+	minors = kmalloc(sizeof(*minors), GFP_KERNEL);
+	if (minors == NULL) {
+		kfree(ptr);
+		return NULL;
+	}
+
+	minors->bitmap = kzalloc(BITS_TO_LONGS(256) * sizeof(*minors->bitmap),
+				 GFP_KERNEL);
+	if (minors->bitmap == NULL) {
+		kfree(minors);
+		kfree(ptr);
+		return NULL;
+	}
+
+	spin_lock_init(&minors->lock);
+	minors->nr = 256;
 	do_register = 1;
 
 	switch (index) {
@@ -143,13 +168,19 @@ xlbd_alloc_major_info(int major, int min
 		 * if someone already registered block major 202,
 		 * don't try to register it again
 		 */
-		if (major_info[XLBD_MAJOR_VBD_START] != NULL)
+		if (major_info[XLBD_MAJOR_VBD_ALT(index)] != NULL) {
+			kfree(minors->bitmap);
+			kfree(minors);
+			minors = major_info[XLBD_MAJOR_VBD_ALT(index)]->minors;
 			do_register = 0;
+		}
 		break;
 	}
 
 	if (do_register) {
 		if (register_blkdev(ptr->major, ptr->type->devname)) {
+			kfree(minors->bitmap);
+			kfree(minors);
 			kfree(ptr);
 			return NULL;
 		}
@@ -157,6 +188,7 @@ xlbd_alloc_major_info(int major, int min
 		printk("xen-vbd: registered block device major %i\n", ptr->major);
 	}
 
+	ptr->minors = minors;
 	major_info[index] = ptr;
 	return ptr;
 }
@@ -209,6 +241,61 @@ xlbd_put_major_info(struct xlbd_major_in
 }
 
 static int
+xlbd_reserve_minors(struct xlbd_major_info *mi, unsigned int minor,
+		    unsigned int nr_minors)
+{
+	struct xlbd_minor_state *ms = mi->minors;
+	unsigned int end = minor + nr_minors;
+	int rc;
+
+	if (end > ms->nr) {
+		unsigned long *bitmap, *old;
+
+		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+				 GFP_KERNEL);
+		if (bitmap == NULL)
+			return -ENOMEM;
+
+		spin_lock(&ms->lock);
+		if (end > ms->nr) {
+			old = ms->bitmap;
+			memcpy(bitmap, ms->bitmap,
+			       BITS_TO_LONGS(ms->nr) * sizeof(*bitmap));
+			ms->bitmap = bitmap;
+			ms->nr = BITS_TO_LONGS(end) * BITS_PER_LONG;
+		} else
+			old = bitmap;
+		spin_unlock(&ms->lock);
+		kfree(old);
+	}
+
+	spin_lock(&ms->lock);
+	if (find_next_bit(ms->bitmap, end, minor) >= end) {
+		for (; minor < end; ++minor)
+			__set_bit(minor, ms->bitmap);
+		rc = 0;
+	} else
+		rc = -EBUSY;
+	spin_unlock(&ms->lock);
+
+	return rc;
+}
+
+static void
+xlbd_release_minors(struct xlbd_major_info *mi, unsigned int minor,
+		    unsigned int nr_minors)
+{
+	struct xlbd_minor_state *ms = mi->minors;
+	unsigned int end = minor + nr_minors;
+
+	BUG_ON(end > ms->nr);
+	spin_lock(&ms->lock);
+	for (; minor < end; ++minor)
+		__clear_bit(minor, ms->bitmap);
+	spin_unlock(&ms->lock);
+}
+
+static int
 xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 {
 	request_queue_t *rq;
@@ -285,9 +372,14 @@ xlvbd_add(blkif_sector_t capacity, int v
 	if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
 		nr_minors = 1 << mi->type->partn_shift;
 
+	err = xlbd_reserve_minors(mi, minor, nr_minors);
+	if (err)
+		goto out;
+	err = -ENODEV;
+
 	gd = alloc_disk(nr_minors);
 	if (gd == NULL)
-		goto out;
+		goto release;
 
 	offset =  mi->index * mi->type->disks_per_major +
 			(minor >> mi->type->partn_shift);
@@ -326,7 +418,7 @@ xlvbd_add(blkif_sector_t capacity, int v
 
 	if (xlvbd_init_blk_queue(gd, sector_size)) {
 		del_gendisk(gd);
-		goto out;
+		goto release;
 	}
 
 	info->rq = gd->queue;
@@ -346,6 +438,8 @@ xlvbd_add(blkif_sector_t capacity, int v
 
 	return 0;
 
+ release:
+	xlbd_release_minors(mi, minor, nr_minors);
  out:
 	if (mi)
 		xlbd_put_major_info(mi);
@@ -356,14 +450,19 @@ xlvbd_add(blkif_sector_t capacity, int v
 void
 xlvbd_del(struct blkfront_info *info)
 {
+	unsigned int minor, nr_minors;
+
 	if (info->mi == NULL)
 		return;
 
 	BUG_ON(info->gd == NULL);
+	minor = info->gd->first_minor;
+	nr_minors = info->gd->minors;
 	del_gendisk(info->gd);
 	put_disk(info->gd);
 	info->gd = NULL;
 
+	xlbd_release_minors(info->mi, minor, nr_minors);
 	xlbd_put_major_info(info->mi);
 	info->mi = NULL;
 

[-- Attachment #3: linux-2.6.33-rc4-xen-blkfront-forced-detach.patch --]
[-- Type: text/plain, Size: 5064 bytes --]

Subject: blkfront: fixes for 'xm block-detach ... --force'

Prevent prematurely freeing 'struct blkfront_info' instances (when the
xenbus data structures are gone, but the Linux ones are still needed).

Prevent adding a disk with the same (major, minor) [and hence the same
name and sysfs entries, which leads to oopses] when the previous
instance wasn't fully de-allocated yet.

This still doesn't address all issues resulting from forced detach:
I/O submitted after the detach still blocks forever, likely preventing
subsequent un-mounting from completing. It's not clear to me (not
knowing much about the block layer) how this can be avoided.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

---
 drivers/block/xen-blkfront.c |   83 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 5 deletions(-)

--- linux-2.6.33-rc4/drivers/block/xen-blkfront.c	2010-01-18 10:43:21.000000000 +0100
+++ 2.6.33-rc4-xen-blkfront-forced-detach/drivers/block/xen-blkfront.c	2010-01-18 11:14:20.000000000 +0100
@@ -103,6 +103,10 @@ struct blkfront_info
 
 static DEFINE_SPINLOCK(blkif_io_lock);
 
+static unsigned int nr_minors;
+static unsigned long *minors;
+static DEFINE_SPINLOCK(minor_lock);
+
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
 	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 #define GRANT_INVALID_REF	0
@@ -137,6 +141,55 @@ static void add_id_to_freelist(struct bl
 	info->shadow_free = id;
 }
 
+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
+{
+	unsigned int end = minor + nr;
+	int rc;
+
+	if (end > nr_minors) {
+		unsigned long *bitmap, *old;
+
+		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+				 GFP_KERNEL);
+		if (bitmap == NULL)
+			return -ENOMEM;
+
+		spin_lock(&minor_lock);
+		if (end > nr_minors) {
+			old = minors;
+			memcpy(bitmap, minors,
+			       BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
+			minors = bitmap;
+			nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
+		} else
+			old = bitmap;
+		spin_unlock(&minor_lock);
+		kfree(old);
+	}
+
+	spin_lock(&minor_lock);
+	if (find_next_bit(minors, end, minor) >= end) {
+		for (; minor < end; ++minor)
+			__set_bit(minor, minors);
+		rc = 0;
+	} else
+		rc = -EBUSY;
+	spin_unlock(&minor_lock);
+
+	return rc;
+}
+
+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
+{
+	unsigned int end = minor + nr;
+
+	BUG_ON(end > nr_minors);
+	spin_lock(&minor_lock);
+	for (; minor < end; ++minor)
+		__clear_bit(minor, minors);
+	spin_unlock(&minor_lock);
+}
+
 static void blkif_restart_queue_callback(void *arg)
 {
 	struct blkfront_info *info = (struct blkfront_info *)arg;
@@ -417,9 +470,14 @@ static int xlvbd_alloc_gendisk(blkif_sec
 	if ((minor % nr_parts) == 0)
 		nr_minors = nr_parts;
 
+	err = xlbd_reserve_minors(minor, nr_minors);
+	if (err)
+		goto out;
+	err = -ENODEV;
+
 	gd = alloc_disk(nr_minors);
 	if (gd == NULL)
-		goto out;
+		goto release;
 
 	offset = minor / nr_parts;
 
@@ -450,7 +508,7 @@ static int xlvbd_alloc_gendisk(blkif_sec
 
 	if (xlvbd_init_blk_queue(gd, sector_size)) {
 		del_gendisk(gd);
-		goto out;
+		goto release;
 	}
 
 	info->rq = gd->queue;
@@ -470,6 +528,8 @@ static int xlvbd_alloc_gendisk(blkif_sec
 
 	return 0;
 
+ release:
+	xlbd_release_minors(minor, nr_minors);
  out:
 	return err;
 }
@@ -924,6 +984,7 @@ static void blkfront_connect(struct blkf
 static void blkfront_closing(struct xenbus_device *dev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+	unsigned int minor, nr_minors;
 	unsigned long flags;
 
 	dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
@@ -946,7 +1007,10 @@ static void blkfront_closing(struct xenb
 	blk_cleanup_queue(info->rq);
 	info->rq = NULL;
 
+	minor = info->gd->first_minor;
+	nr_minors = info->gd->minors;
 	del_gendisk(info->gd);
+	xlbd_release_minors(minor, nr_minors);
 
  out:
 	xenbus_frontend_closed(dev);
@@ -1004,7 +1068,10 @@ static int blkfront_remove(struct xenbus
 
 	blkif_free(info, 0);
 
-	kfree(info);
+	if(info->users == 0)
+		kfree(info);
+	else
+		info->is_ready = -1;
 
 	return 0;
 }
@@ -1013,12 +1080,15 @@ static int blkfront_is_ready(struct xenb
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
-	return info->is_ready;
+	return info->is_ready > 0;
 }
 
 static int blkif_open(struct block_device *bdev, fmode_t mode)
 {
 	struct blkfront_info *info = bdev->bd_disk->private_data;
+
+	if(info->is_ready < 0)
+		return -ENODEV;
 	info->users++;
 	return 0;
 }
@@ -1034,7 +1104,10 @@ static int blkif_release(struct gendisk 
 		struct xenbus_device *dev = info->xbdev;
 		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
 
-		if (state == XenbusStateClosing && info->is_ready)
+		if(info->is_ready < 0) {
+			blkfront_closing(dev);
+			kfree(info);
+		} else if (state == XenbusStateClosing && info->is_ready)
 			blkfront_closing(dev);
 	}
 	return 0;

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] linux/blkfront: fixes for 'xm block-detach ... --force'
  2010-01-18 10:19 [PATCH] linux/blkfront: fixes for 'xm block-detach ... --force' Jan Beulich
@ 2010-02-26 21:50 ` Daniel Stodden
  2010-03-01 10:07   ` Jan Beulich
  0 siblings, 1 reply; 3+ messages in thread
From: Daniel Stodden @ 2010-02-26 21:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Fitzhardinge, xen-devel@lists.xensource.com, Jeremy


Hi Jan.

I think this one resulted in blkfront_remove (from xenbus) racing
against blkif_release (run from close(bdev)). 

Both running for their respective kfree(info) vs. subsequent
dereferences. Or just to leak info entirely.

We only had blkfront_closing() on either thread before, which serializes
around the bd_mutex.

I'm currently thinking that info now rather wants a refcount of its own.
Ideally replacing is_ready entirely, maybe.

Any thoughts?

Daniel

On Mon, 2010-01-18 at 05:19 -0500, Jan Beulich wrote:
> Prevent prematurely freeing 'struct blkfront_info' instances (when the
> xenbus data structures are gone, but the Linux ones are still needed).
> 
> Prevent adding a disk with the same (major, minor) [and hence the same
> name and sysfs entries, which leads to oopses] when the previous
> instance wasn't fully de-allocated yet.
> 
> This still doesn't address all issues resulting from forced detach:
> I/O submitted after the detach still blocks forever, likely preventing
> subsequent un-mounting from completing. It's not clear to me (not
> knowing much about the block layer) how this can be avoided.
> 
> This also doesn't address issues with duplicate device creation caused
> by re-using the hdXX and sdXX name spaces - this would require
> synchronization with the respective native code.
> 
> As usual, written against 2.6.32.3 and made apply to the 2.6.18
> tree without further testing.
> 
> As I believe that pv-ops Xen has the same issue, I'm also attaching
> a respective (compile tested only) patch against 2.6.33-rc4.
> 
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
> 
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/blkfront.c	2009-06-04 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/blkfront.c	2010-01-15 16:47:42.000000000 +0100
> @@ -426,7 +426,10 @@ static int blkfront_remove(struct xenbus
>  
>  	blkif_free(info, 0);
>  
> -	kfree(info);
> +	if(info->users == 0)
> +		kfree(info);
> +	else
> +		info->is_ready = -1;
>  
>  	return 0;
>  }
> @@ -488,6 +491,9 @@ static void blkif_restart_queue_callback
>  int blkif_open(struct inode *inode, struct file *filep)
>  {
>  	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
> +
> +	if(info->is_ready < 0)
> +		return -ENODEV;
>  	info->users++;
>  	return 0;
>  }
> @@ -504,7 +510,10 @@ int blkif_release(struct inode *inode, s
>  		struct xenbus_device * dev = info->xbdev;
>  		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
>  
> -		if (state == XenbusStateClosing && info->is_ready)
> +		if(info->is_ready < 0) {
> +			blkfront_closing(dev);
> +			kfree(info);
> +		} else if (state == XenbusStateClosing && info->is_ready)
>  			blkfront_closing(dev);
>  	}
>  	return 0;
> @@ -894,7 +903,7 @@ int blkfront_is_ready(struct xenbus_devi
>  {
>  	struct blkfront_info *info = dev->dev.driver_data;
>  
> -	return info->is_ready;
> +	return info->is_ready > 0;
>  }
>  
> 
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/block.h	2009-06-04 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/block.h	2010-01-18 08:56:41.000000000 +0100
> @@ -78,6 +78,7 @@ struct xlbd_major_info
>  	int index;
>  	int usage;
>  	struct xlbd_type_info *type;
> +	struct xlbd_minor_state *minors;
>  };
>  
>  struct blk_shadow {
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/vbd.c	2009-06-04 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/vbd.c	2010-01-18 08:56:32.000000000 +0100
> @@ -48,6 +48,12 @@
>  #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
>  #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
>  
> +struct xlbd_minor_state {
> +	unsigned int nr;
> +	unsigned long *bitmap;
> +	spinlock_t lock;
> +};
> +
>  /*
>   * For convenience we distinguish between ide, scsi and 'other' (i.e.,
>   * potentially combinations of the two) in the naming scheme and in a few other
> @@ -97,6 +103,8 @@ static struct xlbd_major_info *major_inf
>  #define XLBD_MAJOR_SCSI_RANGE	XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
>  #define XLBD_MAJOR_VBD_RANGE	XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
>  
> +#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
> +
>  static struct block_device_operations xlvbd_block_fops =
>  {
>  	.owner = THIS_MODULE,
> @@ -114,6 +122,7 @@ static struct xlbd_major_info *
>  xlbd_alloc_major_info(int major, int minor, int index)
>  {
>  	struct xlbd_major_info *ptr;
> +	struct xlbd_minor_state *minors;
>  	int do_register;
>  
>  	ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
> @@ -121,6 +130,22 @@ xlbd_alloc_major_info(int major, int min
>  		return NULL;
>  
>  	ptr->major = major;
> +	minors = kmalloc(sizeof(*minors), GFP_KERNEL);
> +	if (minors == NULL) {
> +		kfree(ptr);
> +		return NULL;
> +	}
> +
> +	minors->bitmap = kzalloc(BITS_TO_LONGS(256) * sizeof(*minors->bitmap),
> +				 GFP_KERNEL);
> +	if (minors->bitmap == NULL) {
> +		kfree(minors);
> +		kfree(ptr);
> +		return NULL;
> +	}
> +
> +	spin_lock_init(&minors->lock);
> +	minors->nr = 256;
>  	do_register = 1;
>  
>  	switch (index) {
> @@ -143,13 +168,19 @@ xlbd_alloc_major_info(int major, int min
>  		 * if someone already registered block major 202,
>  		 * don't try to register it again
>  		 */
> -		if (major_info[XLBD_MAJOR_VBD_START] != NULL)
> +		if (major_info[XLBD_MAJOR_VBD_ALT(index)] != NULL) {
> +			kfree(minors->bitmap);
> +			kfree(minors);
> +			minors = major_info[XLBD_MAJOR_VBD_ALT(index)]->minors;
>  			do_register = 0;
> +		}
>  		break;
>  	}
>  
>  	if (do_register) {
>  		if (register_blkdev(ptr->major, ptr->type->devname)) {
> +			kfree(minors->bitmap);
> +			kfree(minors);
>  			kfree(ptr);
>  			return NULL;
>  		}
> @@ -157,6 +188,7 @@ xlbd_alloc_major_info(int major, int min
>  		printk("xen-vbd: registered block device major %i\n", ptr->major);
>  	}
>  
> +	ptr->minors = minors;
>  	major_info[index] = ptr;
>  	return ptr;
>  }
> @@ -209,6 +241,61 @@ xlbd_put_major_info(struct xlbd_major_in
>  }
>  
>  static int
> +xlbd_reserve_minors(struct xlbd_major_info *mi, unsigned int minor,
> +		    unsigned int nr_minors)
> +{
> +	struct xlbd_minor_state *ms = mi->minors;
> +	unsigned int end = minor + nr_minors;
> +	int rc;
> +
> +	if (end > ms->nr) {
> +		unsigned long *bitmap, *old;
> +
> +		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
> +				 GFP_KERNEL);
> +		if (bitmap == NULL)
> +			return -ENOMEM;
> +
> +		spin_lock(&ms->lock);
> +		if (end > ms->nr) {
> +			old = ms->bitmap;
> +			memcpy(bitmap, ms->bitmap,
> +			       BITS_TO_LONGS(ms->nr) * sizeof(*bitmap));
> +			ms->bitmap = bitmap;
> +			ms->nr = BITS_TO_LONGS(end) * BITS_PER_LONG;
> +		} else
> +			old = bitmap;
> +		spin_unlock(&ms->lock);
> +		kfree(old);
> +	}
> +
> +	spin_lock(&ms->lock);
> +	if (find_next_bit(ms->bitmap, end, minor) >= end) {
> +		for (; minor < end; ++minor)
> +			__set_bit(minor, ms->bitmap);
> +		rc = 0;
> +	} else
> +		rc = -EBUSY;
> +	spin_unlock(&ms->lock);
> +
> +	return rc;
> +}
> +
> +static void
> +xlbd_release_minors(struct xlbd_major_info *mi, unsigned int minor,
> +		    unsigned int nr_minors)
> +{
> +	struct xlbd_minor_state *ms = mi->minors;
> +	unsigned int end = minor + nr_minors;
> +
> +	BUG_ON(end > ms->nr);
> +	spin_lock(&ms->lock);
> +	for (; minor < end; ++minor)
> +		__clear_bit(minor, ms->bitmap);
> +	spin_unlock(&ms->lock);
> +}
> +
> +static int
>  xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
>  {
>  	request_queue_t *rq;
> @@ -285,9 +372,14 @@ xlvbd_add(blkif_sector_t capacity, int v
>  	if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
>  		nr_minors = 1 << mi->type->partn_shift;
>  
> +	err = xlbd_reserve_minors(mi, minor, nr_minors);
> +	if (err)
> +		goto out;
> +	err = -ENODEV;
> +
>  	gd = alloc_disk(nr_minors);
>  	if (gd == NULL)
> -		goto out;
> +		goto release;
>  
>  	offset =  mi->index * mi->type->disks_per_major +
>  			(minor >> mi->type->partn_shift);
> @@ -326,7 +418,7 @@ xlvbd_add(blkif_sector_t capacity, int v
>  
>  	if (xlvbd_init_blk_queue(gd, sector_size)) {
>  		del_gendisk(gd);
> -		goto out;
> +		goto release;
>  	}
>  
>  	info->rq = gd->queue;
> @@ -346,6 +438,8 @@ xlvbd_add(blkif_sector_t capacity, int v
>  
>  	return 0;
>  
> + release:
> +	xlbd_release_minors(mi, minor, nr_minors);
>   out:
>  	if (mi)
>  		xlbd_put_major_info(mi);
> @@ -356,14 +450,19 @@ xlvbd_add(blkif_sector_t capacity, int v
>  void
>  xlvbd_del(struct blkfront_info *info)
>  {
> +	unsigned int minor, nr_minors;
> +
>  	if (info->mi == NULL)
>  		return;
>  
>  	BUG_ON(info->gd == NULL);
> +	minor = info->gd->first_minor;
> +	nr_minors = info->gd->minors;
>  	del_gendisk(info->gd);
>  	put_disk(info->gd);
>  	info->gd = NULL;
>  
> +	xlbd_release_minors(info->mi, minor, nr_minors);
>  	xlbd_put_major_info(info->mi);
>  	info->mi = NULL;
>  
> 
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] linux/blkfront: fixes for 'xm block-detach ... --force'
  2010-02-26 21:50 ` Daniel Stodden
@ 2010-03-01 10:07   ` Jan Beulich
  0 siblings, 0 replies; 3+ messages in thread
From: Jan Beulich @ 2010-03-01 10:07 UTC (permalink / raw)
  To: Daniel Stodden; +Cc: JeremyFitzhardinge, xen-devel@lists.xensource.com

>>> Daniel Stodden <daniel.stodden@citrix.com> 26.02.10 22:50 >>>
>I think this one resulted in blkfront_remove (from xenbus) racing
>against blkif_release (run from close(bdev)). 
>
>Both running for their respective kfree(info) vs. subsequent
>dereferences. Or just to leak info entirely.
>
>We only had blkfront_closing() on either thread before, which serializes
>around the bd_mutex.
>
>I'm currently thinking that info now rather wants a refcount of its own.
>Ideally replacing is_ready entirely, maybe.
>
>Any thoughts?

Could you check the fixup patch I sent just a couple of minutes
ago (paralleling c/s 993 on the 2.6.18 tree)?

Thanks, Jan

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-03-01 10:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-18 10:19 [PATCH] linux/blkfront: fixes for 'xm block-detach ... --force' Jan Beulich
2010-02-26 21:50 ` Daniel Stodden
2010-03-01 10:07   ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).