All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
To: Alasdair Kergon <agk@redhat.com>
Cc: Christof Schmitt <christof.schmitt@de.ibm.com>,
	device-mapper development <dm-devel@redhat.com>
Subject: [PATCH 2/8] dm core: enable request-based dm
Date: Wed, 18 Mar 2009 17:52:10 +0900	[thread overview]
Message-ID: <49C0B63A.7090504@ct.jp.nec.com> (raw)
In-Reply-To: <49C0B474.4000204@ct.jp.nec.com>

This patch enables request-based dm.

o Request-based dm and bio-based dm coexist, since there are
  some target drivers which are more fitting to bio-based dm.
  Also, there are other bio-based devices in the kernel
  (e.g. md, loop).
  Since bio-based device can't receive struct request,
  there are some limitations on device stacking between
  bio-based and request-based.

                     type of underlying device
                   bio-based      requeset-based
   ----------------------------------------------
    bio-based         OK                OK
    request-based     NG                OK

  The device type is recognized by the queue flag in the kernel,
  so dm follows that.

o The type of a dm device is decided at the first table binding time.
  Once the type of a dm device is decided, the type can't be changed.

o Mempool allocations are deferred to at the table loading time, since
  mempools for request-based dm are different from those for bio-based
  dm and needed mempool type is fixed by the type of table.

o Currently, request-based dm supports only tables that have a single
  target.  To support multiple targets, we need to support request
  splitting or prevent bio/request from spanning multiple targets.
  The former needs lots of changes in the block layer, and the latter
  needs that all target drivers support merge() function.
  Both will take a time.


Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c         |   13 +++
 drivers/md/dm-table.c         |  108 ++++++++++++++++++++++++++++
 drivers/md/dm.c               |  158 +++++++++++++++++++++++++++++++++++-------
 drivers/md/dm.h               |   20 +++++
 include/linux/device-mapper.h |    1 
 5 files changed, 277 insertions(+), 23 deletions(-)

Index: 2.6.29-rc8/drivers/md/dm-table.c
===================================================================
--- 2.6.29-rc8.orig/drivers/md/dm-table.c
+++ 2.6.29-rc8/drivers/md/dm-table.c
@@ -73,6 +73,8 @@ struct dm_table {
 	/* events get handed up using this callback */
 	void (*event_fn)(void *);
 	void *event_context;
+
+	struct dm_md_mempools *mempools;
 };
 
 /*
@@ -124,6 +126,8 @@ static void combine_restrictions_low(str
 	lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
 
 	lhs->no_cluster |= rhs->no_cluster;
+
+	lhs->no_request_stacking |= rhs->no_request_stacking;
 }
 
 /*
@@ -305,6 +309,8 @@ void dm_table_destroy(struct dm_table *t
 		free_devices(&t->devices);
 	}
 
+	dm_free_md_mempools(t->mempools);
+
 	kfree(t);
 }
 
@@ -545,6 +551,8 @@ void dm_set_device_limits(struct dm_targ
 	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
 
 	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+
+	rs->no_request_stacking |= !blk_queue_stackable(q);
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
@@ -761,6 +769,93 @@ int dm_table_add_target(struct dm_table 
 	return r;
 }
 
+int dm_table_set_type(struct dm_table *t)
+{
+	int i;
+	int bio_based = 0, request_based = 0;
+	struct dm_target *tgt;
+
+	for (i = 0; i < t->num_targets; i++) {
+		tgt = t->targets + i;
+		if (tgt->type->map_rq)
+			request_based = 1;
+		else
+			bio_based = 1;
+
+		if (bio_based && request_based) {
+			DMWARN("Inconsistent table: different target types"
+			       " can't be mixed up");
+			return -EINVAL;
+		}
+	}
+
+	if (bio_based) {
+		/* We must use this table as bio-based */
+		t->limits.no_request_stacking = 1;
+		return 0;
+	}
+
+	BUG_ON(!request_based); /* No targets in this table */
+
+	/* Non-request-stackable devices can't be used for request-based dm */
+	if (t->limits.no_request_stacking) {
+		DMWARN("table load rejected: including non-request-stackable"
+		       " devices");
+		return -EINVAL;
+	}
+
+	/*
+	 * Request-based dm supports only tables that have a single target now.
+	 * To support multiple targets, request splitting support is needed,
+	 * and that needs lots of changes in the block-layer.
+	 * (e.g. request completion process for partial completion.)
+	 */
+	if (t->num_targets > 1) {
+		DMWARN("Request-based dm doesn't support multiple targets yet");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int dm_table_get_type(struct dm_table *t)
+{
+	return t->limits.no_request_stacking ?
+		DM_TYPE_BIO_BASED : DM_TYPE_REQUEST_BASED;
+}
+
+int dm_table_request_based(struct dm_table *t)
+{
+	return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
+}
+
+int dm_table_alloc_md_mempools(struct dm_table *t)
+{
+	int type = dm_table_get_type(t);
+
+	if (unlikely(type == DM_TYPE_NONE)) {
+		DMWARN("no table type is set, can't allocate mempools");
+		return -EINVAL;
+	}
+
+	t->mempools = dm_alloc_md_mempools(type);
+	if (!t->mempools)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void dm_table_free_md_mempools(struct dm_table *t)
+{
+	dm_free_md_mempools(t->mempools);
+	t->mempools = NULL;
+}
+
+struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
+{
+	return t->mempools;
+}
+
 static int setup_indexes(struct dm_table *t)
 {
 	int i;
@@ -937,6 +1032,19 @@ void dm_table_set_restrictions(struct dm
 		queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
 
 	dm_table_set_integrity(t);
+
+	/*
+	 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
+	 * visible to other CPUs because, once the flag is set, incoming bios
+	 * are processed by request-based dm, which refers to the queue
+	 * settings.
+	 * Until the flag set, bios are passed to bio-based dm and queued to
+	 * md->deferred where queue settings are not needed yet.
+	 * Those bios are passed to request-based dm at the resume time.
+	 */
+	smp_mb();
+	if (!t->limits.no_request_stacking)
+		queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
Index: 2.6.29-rc8/drivers/md/dm.c
===================================================================
--- 2.6.29-rc8.orig/drivers/md/dm.c
+++ 2.6.29-rc8/drivers/md/dm.c
@@ -186,6 +186,15 @@ struct mapped_device {
 	struct kobject kobj;
 };
 
+/*
+ * For mempools pre-allocation at the table loading time.
+ */
+struct dm_md_mempools {
+	mempool_t *io_pool;
+	mempool_t *tio_pool;
+	struct bio_set *bs;
+};
+
 #define MIN_IOS 256
 static struct kmem_cache *_io_cache;
 static struct kmem_cache *_tio_cache;
@@ -1740,10 +1749,22 @@ static struct mapped_device *alloc_dev(i
 	INIT_LIST_HEAD(&md->uevent_list);
 	spin_lock_init(&md->uevent_lock);
 
-	md->queue = blk_alloc_queue(GFP_KERNEL);
+	md->queue = blk_init_queue(dm_request_fn, NULL);
 	if (!md->queue)
 		goto bad_queue;
 
+	/*
+	 * Request-based dm devices cannot be stacked on top of bio-based dm
+	 * devices.  The type of this dm device has not been decided yet,
+	 * although we initialized the queue using blk_init_queue().
+	 * The type is decided at the first table loading time.
+	 * To prevent problematic device stacking, clear the queue flag
+	 * for request stacking support until then.
+	 *
+	 * This queue is new, so no concurrency on the queue_flags.
+	 */
+	queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
+	md->saved_make_request_fn = md->queue->make_request_fn;
 	md->queue->queuedata = md;
 	md->queue->backing_dev_info.congested_fn = dm_any_congested;
 	md->queue->backing_dev_info.congested_data = md;
@@ -1751,18 +1772,9 @@ static struct mapped_device *alloc_dev(i
 	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 	md->queue->unplug_fn = dm_unplug_all;
 	blk_queue_merge_bvec(md->queue, dm_merge_bvec);
-
-	md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
-	if (!md->io_pool)
-		goto bad_io_pool;
-
-	md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
-	if (!md->tio_pool)
-		goto bad_tio_pool;
-
-	md->bs = bioset_create(16, 0);
-	if (!md->bs)
-		goto bad_no_bioset;
+	blk_queue_softirq_done(md->queue, dm_softirq_done);
+	blk_queue_prep_rq(md->queue, dm_prep_fn);
+	blk_queue_lld_busy(md->queue, dm_lld_busy);
 
 	md->disk = alloc_disk(1);
 	if (!md->disk)
@@ -1797,12 +1809,6 @@ static struct mapped_device *alloc_dev(i
 bad_thread:
 	put_disk(md->disk);
 bad_disk:
-	bioset_free(md->bs);
-bad_no_bioset:
-	mempool_destroy(md->tio_pool);
-bad_tio_pool:
-	mempool_destroy(md->io_pool);
-bad_io_pool:
 	blk_cleanup_queue(md->queue);
 bad_queue:
 	free_minor(minor);
@@ -1824,9 +1830,12 @@ static void free_dev(struct mapped_devic
 		bdput(md->suspended_bdev);
 	}
 	destroy_workqueue(md->wq);
-	mempool_destroy(md->tio_pool);
-	mempool_destroy(md->io_pool);
-	bioset_free(md->bs);
+	if (md->tio_pool)
+		mempool_destroy(md->tio_pool);
+	if (md->io_pool)
+		mempool_destroy(md->io_pool);
+	if (md->bs)
+		bioset_free(md->bs);
 	blk_integrity_unregister(md->disk);
 	del_gendisk(md->disk);
 	free_minor(minor);
@@ -1841,6 +1850,29 @@ static void free_dev(struct mapped_devic
 	kfree(md);
 }
 
+static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
+{
+	struct dm_md_mempools *p;
+
+	if (md->io_pool && md->tio_pool && md->bs)
+		/* the md already has necessary mempools */
+		goto out;
+
+	p = dm_table_get_md_mempools(t);
+	BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
+
+	md->io_pool = p->io_pool;
+	p->io_pool = NULL;
+	md->tio_pool = p->tio_pool;
+	p->tio_pool = NULL;
+	md->bs = p->bs;
+	p->bs = NULL;
+
+out:
+	/* mempool bind completed, now no need any mempools in the table */
+	dm_table_free_md_mempools(t);
+}
+
 /*
  * Bind a table to the device.
  */
@@ -1892,6 +1924,18 @@ static int __bind(struct mapped_device *
 
 	dm_table_event_callback(t, event_callback, md);
 
+	/*
+	 * The queue hasn't been stopped yet, if the old table type wasn't
+	 * for request-based during suspension.  So stop it to prevent
+	 * I/O mapping before resume.
+	 * This must be done before setting the queue restrictions,
+	 * because request-based dm may be run just after the setting.
+	 */
+	if (dm_table_request_based(t) && !blk_queue_stopped(q))
+		stop_queue(q);
+
+	__bind_mempools(md, t);
+
 	write_lock(&md->map_lock);
 	md->map = t;
 	dm_table_set_restrictions(t, q);
@@ -2050,7 +2094,13 @@ static void __flush_deferred_io(struct m
 	struct bio *c;
 
 	while ((c = bio_list_pop(&md->deferred))) {
-		if (__split_bio(md, c))
+		/*
+		 * Some bios might have been queued here during suspension
+		 * before setting of request-based dm in resume
+		 */
+		if (dm_request_based(md))
+			generic_make_request(c);
+		else if (__split_bio(md, c))
 			bio_io_error(c);
 	}
 
@@ -2121,6 +2171,13 @@ int dm_swap_table(struct mapped_device *
 		if (get_capacity(md->disk) != dm_table_get_size(table))
 			goto out;
 
+	/* cannot change the device type, once a table is bound */
+	if (md->map &&
+	    (dm_table_get_type(md->map) != dm_table_get_type(table))) {
+		DMWARN("can't change the device type after a table is bound");
+		goto out;
+	}
+
 	__unbind(md);
 	r = __bind(md, table);
 
@@ -2520,6 +2577,61 @@ int dm_noflush_suspending(struct dm_targ
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
+struct dm_md_mempools *dm_alloc_md_mempools(int type)
+{
+	struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
+
+	if (!pools)
+		return NULL;
+
+	pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
+			 mempool_create_slab_pool(MIN_IOS, _io_cache) :
+			 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
+	if (!pools->io_pool)
+		goto free_pools_and_out;
+
+	pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
+			  mempool_create_slab_pool(MIN_IOS, _tio_cache) :
+			  mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
+	if (!pools->tio_pool)
+		goto free_io_pool_and_out;
+
+	pools->bs = (type == DM_TYPE_BIO_BASED) ?
+		    bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
+	if (!pools->bs)
+		goto free_tio_pool_and_out;
+
+	return pools;
+
+free_tio_pool_and_out:
+	mempool_destroy(pools->tio_pool);
+
+free_io_pool_and_out:
+	mempool_destroy(pools->io_pool);
+
+free_pools_and_out:
+	kfree(pools);
+
+	return NULL;
+}
+
+void dm_free_md_mempools(struct dm_md_mempools *pools)
+{
+	if (!pools)
+		return;
+
+	if (pools->io_pool)
+		mempool_destroy(pools->io_pool);
+
+	if (pools->tio_pool)
+		mempool_destroy(pools->tio_pool);
+
+	if (pools->bs)
+		bioset_free(pools->bs);
+
+	kfree(pools);
+}
+
 static struct block_device_operations dm_blk_dops = {
 	.open = dm_blk_open,
 	.release = dm_blk_close,
Index: 2.6.29-rc8/drivers/md/dm.h
===================================================================
--- 2.6.29-rc8.orig/drivers/md/dm.h
+++ 2.6.29-rc8/drivers/md/dm.h
@@ -23,6 +23,13 @@
 #define DM_SUSPEND_NOFLUSH_FLAG		(1 << 1)
 
 /*
+ * Type of table and mapped_device's mempool
+ */
+#define DM_TYPE_NONE		0
+#define DM_TYPE_BIO_BASED	1
+#define DM_TYPE_REQUEST_BASED	2
+
+/*
  * List of devices that a metadevice uses and should open/close.
  */
 struct dm_dev_internal {
@@ -32,6 +39,7 @@ struct dm_dev_internal {
 };
 
 struct dm_table;
+struct dm_md_mempools;
 
 /*-----------------------------------------------------------------
  * Internal table functions.
@@ -48,6 +56,12 @@ void dm_table_postsuspend_targets(struct
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 int dm_table_any_busy_target(struct dm_table *t);
+int dm_table_set_type(struct dm_table *t);
+int dm_table_get_type(struct dm_table *t);
+int dm_table_request_based(struct dm_table *t);
+int dm_table_alloc_md_mempools(struct dm_table *t);
+void dm_table_free_md_mempools(struct dm_table *t);
+struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
 /*
  * To check the return value from dm_table_find_target().
@@ -99,4 +113,10 @@ void dm_kobject_uevent(struct mapped_dev
 int dm_kcopyd_init(void);
 void dm_kcopyd_exit(void);
 
+/*
+ * Mempool operations
+ */
+struct dm_md_mempools *dm_alloc_md_mempools(int type);
+void dm_free_md_mempools(struct dm_md_mempools *pools);
+
 #endif
Index: 2.6.29-rc8/drivers/md/dm-ioctl.c
===================================================================
--- 2.6.29-rc8.orig/drivers/md/dm-ioctl.c
+++ 2.6.29-rc8/drivers/md/dm-ioctl.c
@@ -1044,6 +1044,12 @@ static int populate_table(struct dm_tabl
 		next = spec->next;
 	}
 
+	r = dm_table_set_type(table);
+	if (r) {
+		DMWARN("unable to set table type");
+		return r;
+	}
+
 	return dm_table_complete(table);
 }
 
@@ -1089,6 +1095,13 @@ static int table_load(struct dm_ioctl *p
 		goto out;
 	}
 
+	r = dm_table_alloc_md_mempools(t);
+	if (r) {
+		DMWARN("unable to allocate mempools for this table");
+		dm_table_destroy(t);
+		goto out;
+	}
+
 	down_write(&_hash_lock);
 	hc = dm_get_mdptr(md);
 	if (!hc || hc->md != md) {
Index: 2.6.29-rc8/include/linux/device-mapper.h
===================================================================
--- 2.6.29-rc8.orig/include/linux/device-mapper.h
+++ 2.6.29-rc8/include/linux/device-mapper.h
@@ -154,6 +154,7 @@ struct io_restrictions {
 	unsigned short max_hw_segments;
 	unsigned short max_phys_segments;
 	unsigned char no_cluster; /* inverted so that 0 is default */
+	unsigned char no_request_stacking;
 };
 
 struct dm_target {

  parent reply	other threads:[~2009-03-18  8:52 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-18  8:44 [PATCH 0/8] request-based dm-multipath (v1) Kiyoshi Ueda
2009-03-18  8:50 ` [PATCH 1/8] dm core: add core functions for request-based dm Kiyoshi Ueda
2009-03-18  8:52 ` Kiyoshi Ueda [this message]
2009-03-18  8:53 ` [PATCH 3/8] dm core: reject I/O violating new queue limits Kiyoshi Ueda
2009-03-18  8:54 ` [PATCH 4/8] dm core: disable interrupt when taking map_lock Kiyoshi Ueda
2009-03-18  8:55 ` [PATCH 5/8] dm-mpath: convert to request-based Kiyoshi Ueda
2009-03-18  8:56 ` [PATCH 6/8] block: add gfp_mask parameter to bio_integrity_clone() Kiyoshi Ueda
2009-03-18  8:57 ` [PATCH 7/8] dm core: pass gfp_mask " Kiyoshi Ueda
2009-03-18  8:58 ` [PATCH 8/8] dm core: add integrity feature to request-based dm Kiyoshi Ueda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49C0B63A.7090504@ct.jp.nec.com \
    --to=k-ueda@ct.jp.nec.com \
    --cc=agk@redhat.com \
    --cc=christof.schmitt@de.ibm.com \
    --cc=dm-devel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.