All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joe Thornber <ejt@redhat.com>
To: dm-devel@redhat.com
Cc: Joe Thornber <ejt@redhat.com>, Mike Snitzer <snitzer@redhat.com>
Subject: [PATCH 10/14] dm_thin: add support for REQ_DISCARD
Date: Fri, 16 Mar 2012 15:22:33 +0000	[thread overview]
Message-ID: <1331911358-8848-10-git-send-email-ejt@redhat.com> (raw)
In-Reply-To: <1331911358-8848-1-git-send-email-ejt@redhat.com>

Enhancement.

On discard the corresponding mapping(s) are removed from the thin
device.  If the associated block(s) are no longer shared the discard
is passed to the underlying device.

All bios other than discards now have an associated deferred_entry
that is saved to the 'all_io_entry' in endio_hook.  When non-discard
IO completes and associated mappings are quiesced any discards that
were deferred, via ds_add_work() in process_discard(), will be queued
for processing by the worker thread.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-thin.c |  173 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 159 insertions(+), 14 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 3d60d8c..1691be9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -528,10 +528,12 @@ struct pool {
 	struct bio_list deferred_bios;
 	struct bio_list deferred_flush_bios;
 	struct list_head prepared_mappings;
+	struct list_head prepared_discards;
 
 	struct bio_list retry_on_resume_list;
 
 	struct deferred_set shared_read_ds;
+	struct deferred_set all_io_ds;
 
 	struct new_mapping *next_mapping;
 	mempool_t *mapping_pool;
@@ -629,6 +631,7 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev
 struct endio_hook {
 	struct thin_c *tc;
 	struct deferred_entry *shared_read_entry;
+	struct deferred_entry *all_io_entry;
 	struct new_mapping *overwrite_mapping;
 };
 
@@ -736,11 +739,12 @@ struct new_mapping {
 
 	unsigned quiesced:1;
 	unsigned prepared:1;
+	unsigned pass_discard:1;
 
 	struct thin_c *tc;
 	dm_block_t virt_block;
 	dm_block_t data_block;
-	struct cell *cell;
+	struct cell *cell, *cell2;
 	int err;
 
 	/*
@@ -880,7 +884,30 @@ static void process_prepared_mapping(struct new_mapping *m)
 	mempool_free(m, tc->pool->mapping_pool);
 }
 
-static void process_prepared_mappings(struct pool *pool)
+static void process_prepared_discard(struct new_mapping *m)
+{
+	int r;
+	struct thin_c *tc = m->tc;
+
+	r = dm_thin_remove_block(tc->td, m->virt_block);
+	if (r)
+		DMERR("dm_thin_remove_block() failed");
+
+	/*
+	 * Pass the discard down to the underlying device?
+	 */
+	if (m->pass_discard)
+		remap_and_issue(tc, m->bio, m->data_block);
+	else
+		bio_endio(m->bio, 0);
+
+	cell_defer_except(tc, m->cell);
+	cell_defer_except(tc, m->cell2);
+	mempool_free(m, tc->pool->mapping_pool);
+}
+
+static void process_prepared(struct pool *pool, struct list_head *head,
+			     void (*fn)(struct new_mapping *))
 {
 	unsigned long flags;
 	struct list_head maps;
@@ -888,21 +915,27 @@ static void process_prepared_mappings(struct pool *pool)
 
 	INIT_LIST_HEAD(&maps);
 	spin_lock_irqsave(&pool->lock, flags);
-	list_splice_init(&pool->prepared_mappings, &maps);
+	list_splice_init(head, &maps);
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	list_for_each_entry_safe(m, tmp, &maps, list)
-		process_prepared_mapping(m);
+		fn(m);
 }
 
 /*
  * Deferred bio jobs.
  */
-static int io_overwrites_block(struct pool *pool, struct bio *bio)
+static int io_overlaps_block(struct pool *pool, struct bio *bio)
 {
-	return ((bio_data_dir(bio) == WRITE) &&
-		!(bio->bi_sector & pool->offset_mask)) &&
+	return !(bio->bi_sector & pool->offset_mask) &&
 		(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
+
+}
+
+static int io_overwrites_block(struct pool *pool, struct bio *bio)
+{
+	return (bio_data_dir(bio) == WRITE) &&
+		io_overlaps_block(pool, bio);
 }
 
 static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
@@ -1140,6 +1173,86 @@ static void no_space(struct cell *cell)
 		retry_on_resume(bio);
 }
 
+static void process_discard(struct thin_c *tc, struct bio *bio)
+{
+	int r;
+	struct pool *pool = tc->pool;
+	struct cell *cell, *cell2;
+	struct cell_key key, key2;
+	dm_block_t block = get_bio_block(tc, bio);
+	struct dm_thin_lookup_result lookup_result;
+	struct new_mapping *m;
+
+	build_virtual_key(tc->td, block, &key);
+	if (bio_detain(tc->pool->prison, &key, bio, &cell))
+		return;
+
+	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
+	switch (r) {
+	case 0:
+		/*
+		 * Check nobody is fiddling with this pool block.  This can
+		 * happen if someone's in the process of breaking sharing
+		 * on this block.
+		 */
+		build_data_key(tc->td, lookup_result.block, &key2);
+		if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
+			cell_release_singleton(cell, bio);
+			break;
+		}
+
+		if (io_overlaps_block(pool, bio)) {
+			/*
+			 * IO may still be going to the destination block.  We must
+			 * quiesce before we can do the removal.
+			 */
+			m = get_next_mapping(pool);
+			m->tc = tc;
+			m->pass_discard = !lookup_result.shared;
+			m->virt_block = block;
+			m->data_block = lookup_result.block;
+			m->cell = cell;
+			m->cell2 = cell2;
+			m->err = 0;
+			m->bio = bio;
+
+			if (!ds_add_work(&pool->all_io_ds, &m->list)) {
+				list_add(&m->list, &pool->prepared_discards);
+				wake_worker(pool);
+			}
+		} else {
+			/*
+			 * This path is hit if people are ignoring
+			 * limits->discard_granularity.  It ignores any
+			 * part of the discard that is in a subsequent
+			 * block.
+			 */
+			sector_t offset = bio->bi_sector - (block << pool->block_shift);
+			unsigned remaining = (pool->sectors_per_block - offset) << 9;
+			bio->bi_size = min(bio->bi_size, remaining);
+
+			cell_release_singleton(cell, bio);
+			cell_release_singleton(cell2, bio);
+			remap_and_issue(tc, bio, lookup_result.block);
+		}
+		break;
+
+	case -ENODATA:
+		/*
+		 * It isn't provisioned, just forget it.
+		 */
+		cell_release_singleton(cell, bio);
+		bio_endio(bio, 0);
+		break;
+
+	default:
+		DMERR("discard: find block unexpectedly returned %d\n", r);
+		cell_release_singleton(cell, bio);
+		bio_io_error(bio);
+		break;
+	}
+}
+
 static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
 			  struct cell_key *key,
 			  struct dm_thin_lookup_result *lookup_result,
@@ -1285,6 +1398,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 
 	default:
 		DMERR("dm_thin_find_block() failed, error = %d", r);
+		cell_release_singleton(cell, bio);
 		bio_io_error(bio);
 		break;
 	}
@@ -1320,7 +1434,11 @@ static void process_deferred_bios(struct pool *pool)
 
 			break;
 		}
-		process_bio(tc, bio);
+
+		if (bio->bi_rw & REQ_DISCARD)
+			process_discard(tc, bio);
+		else
+			process_bio(tc, bio);
 	}
 
 	/*
@@ -1353,7 +1471,8 @@ static void do_worker(struct work_struct *ws)
 {
 	struct pool *pool = container_of(ws, struct pool, worker);
 
-	process_prepared_mappings(pool);
+	process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
+	process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
 	process_deferred_bios(pool);
 }
 
@@ -1385,6 +1504,7 @@ static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
 
 	h->tc = tc;
 	h->shared_read_entry = NULL;
+	h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds);
 	h->overwrite_mapping = NULL;
 
 	return h;
@@ -1403,7 +1523,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
 	struct dm_thin_lookup_result result;
 
 	map_context->ptr = thin_hook_bio(tc, bio);
-	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+	if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
 		thin_defer_bio(tc, bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -1578,10 +1698,12 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 	bio_list_init(&pool->deferred_bios);
 	bio_list_init(&pool->deferred_flush_bios);
 	INIT_LIST_HEAD(&pool->prepared_mappings);
+	INIT_LIST_HEAD(&pool->prepared_discards);
 	pool->low_water_triggered = 0;
 	pool->no_free_space = 0;
 	bio_list_init(&pool->retry_on_resume_list);
 	ds_init(&pool->shared_read_ds);
+	ds_init(&pool->all_io_ds);
 
 	pool->next_mapping = NULL;
 	pool->mapping_pool =
@@ -1821,7 +1943,8 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	pt->low_water_blocks = low_water_blocks;
 	pt->zero_new_blocks = pf.zero_new_blocks;
 	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 0;
+	ti->num_discard_requests = 1;
+	ti->discards_supported = 1;
 	ti->private = pt;
 
 	pt->callbacks.congested_fn = pool_is_congested;
@@ -2213,6 +2336,17 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
+static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
+{
+	limits->max_discard_sectors = pool->sectors_per_block;
+
+	/*
+	 * This is just a hint, and not enforced.  We have to cope with
+	 * bios that overlap 2 blocks.
+	 */
+	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
+}
+
 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct pool_c *pt = ti->private;
@@ -2220,6 +2354,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 	blk_limits_io_min(limits, 0);
 	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+	set_discard_limits(pool, limits);
 }
 
 static struct target_type pool_target = {
@@ -2336,8 +2471,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
 	ti->split_io = tc->pool->sectors_per_block;
 	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 0;
-	ti->discards_supported = 0;
+	ti->num_discard_requests = 1;
+	ti->discards_supported = 1;
 
 	dm_put(pool_md);
 
@@ -2393,6 +2528,14 @@ static int thin_endio(struct dm_target *ti,
 		spin_unlock_irqrestore(&pool->lock, flags);
 	}
 
+	if (h->all_io_entry) {
+		INIT_LIST_HEAD(&work);
+		ds_dec(h->all_io_entry, &work);
+		list_for_each_entry_safe(m, tmp, &work, list)
+			list_add(&m->list, &pool->prepared_discards);
+	}
+
+	mempool_free(h, pool->endio_hook_pool);
 	return 0;
 }
 
@@ -2472,9 +2615,11 @@ static int thin_iterate_devices(struct dm_target *ti,
 static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct thin_c *tc = ti->private;
+	struct pool *pool = tc->pool;
 
 	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT);
+	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+	set_discard_limits(pool, limits);
 }
 
 static struct target_type thin_target = {
-- 
1.7.9.1

  parent reply	other threads:[~2012-03-16 15:22 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-16 15:21 dm-thin patches for 3.4 Joe Thornber
2012-03-16 15:22 ` [PATCH 01/14] dm-thin: don't use the bi_next field for the holder of a cell Joe Thornber
2012-03-16 15:22   ` [PATCH 02/14] dm-thin: remove some documentation for the unimplemented 'trim' target message Joe Thornber
2012-03-16 15:22   ` [PATCH 03/14] dm_thin: dm_sm_root_size() was being called for the wrong space-map Joe Thornber
2012-03-16 15:22   ` [PATCH 04/14] dm_thin: tweak a comment Joe Thornber
2012-03-16 15:22   ` [PATCH 05/14] dm_btree: remove redundant arg from value_ptr() Joe Thornber
2012-03-16 15:22   ` [PATCH 06/14] dm_btree: fix rebalancing of 3 nodes after remove Joe Thornber
2012-03-16 15:22   ` [PATCH 07/14] dm_space_map: remove entries from the ref_count tree if they're no longer needed Joe Thornber
2012-03-16 15:22   ` [PATCH 08/14] dm_thin: add support for read-only external snapshot origins Joe Thornber
2012-03-16 15:22   ` [PATCH 09/14] dm_thin: foundation for discard support Joe Thornber
2012-03-16 15:22   ` Joe Thornber [this message]
2012-03-23 12:45     ` [PATCH 10/14] dm_thin: add support for REQ_DISCARD Alasdair G Kergon
2012-03-16 15:22   ` [PATCH 11/14] dm_thin: add pool target flags to control discard Joe Thornber
2012-03-23 12:37     ` Alasdair G Kergon
2012-03-23 21:55       ` [PATCH] dm thin: fix pool target flags that " Mike Snitzer
2012-03-26 14:15         ` Joe Thornber
2012-03-26 15:33           ` Mike Snitzer
2012-03-26 19:56             ` Mike Snitzer
2012-03-26 15:34           ` [PATCH] " Joe Thornber
2012-03-26 15:46           ` Joe Thornber
2012-03-16 15:22   ` [PATCH 12/14] dm_thin: commit metadata every second Joe Thornber
2012-03-16 15:22   ` [PATCH 13/14] dm_thin: commit just before processing a pool target info request Joe Thornber
2012-03-19 14:00     ` Alasdair G Kergon
2012-03-20 10:12       ` Joe Thornber
2012-03-16 15:22   ` [PATCH 14/14] dm_thin: bump the target versions Joe Thornber
2012-03-20 18:24   ` [PATCH 01/14] dm-thin: don't use the bi_next field for the holder of a cell Alasdair G Kergon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1331911358-8848-10-git-send-email-ejt@redhat.com \
    --to=ejt@redhat.com \
    --cc=dm-devel@redhat.com \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.