From: Joe Thornber <ejt@redhat.com>
To: dm-devel@redhat.com
Cc: Joe Thornber <ejt@redhat.com>
Subject: [PATCH 10/11] [dm-thin] Add support for REQ_DISCARD
Date: Thu, 2 Feb 2012 16:39:13 +0000 [thread overview]
Message-ID: <1328200754-13642-11-git-send-email-ejt@redhat.com> (raw)
In-Reply-To: <1328200754-13642-1-git-send-email-ejt@redhat.com>
---
drivers/md/dm-thin.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 159 insertions(+), 14 deletions(-)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c5e3102..304a934 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -508,10 +508,12 @@ struct pool {
struct bio_list deferred_bios;
struct bio_list deferred_flush_bios;
struct list_head prepared_mappings;
+ struct list_head prepared_discards;
struct bio_list retry_on_resume_list;
struct deferred_set shared_read_ds;
+ struct deferred_set all_io_ds;
struct new_mapping *next_mapping;
mempool_t *mapping_pool;
@@ -609,6 +611,7 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev
struct endio_hook {
struct thin_c *tc;
struct deferred_entry *shared_read_entry;
+ struct deferred_entry *all_io_entry;
struct new_mapping *overwrite_mapping;
};
@@ -718,11 +721,12 @@ struct new_mapping {
unsigned quiesced:1;
unsigned prepared:1;
+ unsigned pass_discard:1;
struct thin_c *tc;
dm_block_t virt_block;
dm_block_t data_block;
- struct cell *cell;
+ struct cell *cell, *cell2;
int err;
/*
@@ -867,7 +871,30 @@ static void process_prepared_mapping(struct new_mapping *m)
mempool_free(m, tc->pool->mapping_pool);
}
-static void process_prepared_mappings(struct pool *pool)
+static void process_prepared_discard(struct new_mapping *m)
+{
+ int r;
+ struct thin_c *tc = m->tc;
+
+ r = dm_thin_remove_block(tc->td, m->virt_block);
+ if (r)
+ DMERR("dm_thin_metadata_remove() failed");
+
+ /*
+ * Pass the discard down to the underlying device?
+ */
+ if (m->pass_discard)
+ remap_and_issue(tc, m->bio, m->data_block);
+ else
+ bio_endio(m->bio, 0);
+
+ cell_defer_except(tc, m->cell, m->bio);
+ cell_defer_except(tc, m->cell2, m->bio);
+ mempool_free(m, tc->pool->mapping_pool);
+}
+
+static void process_prepared(struct pool *pool, struct list_head *head,
+ void (*fn)(struct new_mapping *))
{
unsigned long flags;
struct list_head maps;
@@ -875,21 +902,27 @@ static void process_prepared_mappings(struct pool *pool)
INIT_LIST_HEAD(&maps);
spin_lock_irqsave(&pool->lock, flags);
- list_splice_init(&pool->prepared_mappings, &maps);
+ list_splice_init(head, &maps);
spin_unlock_irqrestore(&pool->lock, flags);
list_for_each_entry_safe(m, tmp, &maps, list)
- process_prepared_mapping(m);
+ fn(m);
}
/*
* Deferred bio jobs.
*/
-static int io_overwrites_block(struct pool *pool, struct bio *bio)
+static int io_overlaps_block(struct pool *pool, struct bio *bio)
{
- return ((bio_data_dir(bio) == WRITE) &&
- !(bio->bi_sector & pool->offset_mask)) &&
+ return !(bio->bi_sector & pool->offset_mask) &&
(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
+
+}
+
+static int io_overwrites_block(struct pool *pool, struct bio *bio)
+{
+ return (bio_data_dir(bio) == WRITE) &&
+ io_overlaps_block(pool, bio);
}
static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
@@ -1127,6 +1160,86 @@ static void no_space(struct cell *cell)
retry_on_resume(bio);
}
+static void process_discard(struct thin_c *tc, struct bio *bio)
+{
+ int r;
+ struct pool *pool = tc->pool;
+ struct cell *cell, *cell2;
+ struct cell_key key, key2;
+ dm_block_t block = get_bio_block(tc, bio);
+ struct dm_thin_lookup_result lookup_result;
+ struct new_mapping *m;
+
+ build_virtual_key(tc->td, block, &key);
+ if (bio_detain(tc->pool->prison, &key, bio, &cell))
+ return;
+
+ r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
+ switch (r) {
+ case 0:
+ /*
+ * Check nobody is fiddling with this pool block. This can
+ * happen if someone's in the process of breaking sharing
+ * on this block.
+ */
+ build_data_key(tc->td, lookup_result.block, &key2);
+ if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
+ cell_release_singleton(cell, bio);
+ break;
+ }
+
+ if (io_overlaps_block(pool, bio)) {
+ /*
+ * IO may still be going to the destination block. We must
+ * quiesce before we can do the removal.
+ */
+ m = get_next_mapping(pool);
+ m->tc = tc;
+ m->pass_discard = !lookup_result.shared;
+ m->virt_block = block;
+ m->data_block = lookup_result.block;
+ m->cell = cell;
+ m->cell2 = cell2;
+ m->err = 0;
+ m->bio = bio;
+
+ if (!ds_add_work(&pool->all_io_ds, &m->list)) {
+ list_add(&m->list, &pool->prepared_discards);
+ wake_worker(pool);
+ }
+ } else {
+ /*
+ * This path is hit if people are ignoring
+ * limits->discard_granularity. It ignores any
+ * part of the discard that is in a subsequent
+ * block.
+ */
+ sector_t offset = bio->bi_sector - (block << pool->block_shift);
+ unsigned remaining = (pool->sectors_per_block - offset) << 9;
+ bio->bi_size = min(bio->bi_size, remaining);
+
+ cell_release_singleton(cell, bio);
+ cell_release_singleton(cell2, bio);
+ remap_and_issue(tc, bio, lookup_result.block);
+ }
+ break;
+
+ case -ENODATA:
+ /*
+ * It isn't provisioned, just forget it.
+ */
+ cell_release_singleton(cell, bio);
+ bio_endio(bio, 0);
+ break;
+
+ default:
+ DMERR("discard: find block unexpectedly returned %d\n", r);
+ cell_release_singleton(cell, bio);
+ bio_io_error(bio);
+ break;
+ }
+}
+
static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
struct cell_key *key,
struct dm_thin_lookup_result *lookup_result,
@@ -1272,6 +1385,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
default:
DMERR("dm_thin_find_block() failed, error = %d", r);
+ cell_release_singleton(cell, bio);
bio_io_error(bio);
break;
}
@@ -1313,7 +1427,11 @@ static void process_deferred_bios(struct pool *pool)
break;
}
- process_bio(tc, bio);
+
+ if (bio->bi_rw & REQ_DISCARD)
+ process_discard(tc, bio);
+ else
+ process_bio(tc, bio);
}
/*
@@ -1349,7 +1467,8 @@ static void do_worker(struct work_struct *ws)
{
struct pool *pool = container_of(ws, struct pool, worker);
- process_prepared_mappings(pool);
+ process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
+ process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
process_deferred_bios(pool);
}
@@ -1392,6 +1511,7 @@ static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
h->tc = tc;
h->shared_read_entry = NULL;
+ h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds);
h->overwrite_mapping = NULL;
return h;
@@ -1410,7 +1530,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
struct dm_thin_lookup_result result;
map_context->ptr = thin_hook_bio(tc, bio);
- if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+ if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
thin_defer_bio(tc, bio);
return DM_MAPIO_SUBMITTED;
}
@@ -1586,10 +1706,12 @@ static struct pool *pool_create(struct mapped_device *pool_md,
bio_list_init(&pool->deferred_bios);
bio_list_init(&pool->deferred_flush_bios);
INIT_LIST_HEAD(&pool->prepared_mappings);
+ INIT_LIST_HEAD(&pool->prepared_discards);
pool->low_water_triggered = 0;
pool->no_free_space = 0;
bio_list_init(&pool->retry_on_resume_list);
ds_init(&pool->shared_read_ds);
+ ds_init(&pool->all_io_ds);
pool->next_mapping = NULL;
pool->mapping_pool =
@@ -1830,7 +1952,8 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
pt->low_water_blocks = low_water_blocks;
pt->zero_new_blocks = pf.zero_new_blocks;
ti->num_flush_requests = 1;
- ti->num_discard_requests = 0;
+ ti->num_discard_requests = 1;
+ ti->discards_supported = 1;
ti->private = pt;
pt->callbacks.congested_fn = pool_is_congested;
@@ -2223,6 +2346,17 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
+static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
+{
+ limits->max_discard_sectors = pool->sectors_per_block;
+
+ /*
+ * This is just a hint, and not enforced. We have to cope with
+ * bios that overlap 2 blocks.
+ */
+ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
+}
+
static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct pool_c *pt = ti->private;
@@ -2230,6 +2364,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, 0);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+ set_discard_limits(pool, limits);
}
static struct target_type pool_target = {
@@ -2346,8 +2481,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->split_io = tc->pool->sectors_per_block;
ti->num_flush_requests = 1;
- ti->num_discard_requests = 0;
- ti->discards_supported = 0;
+ ti->num_discard_requests = 1;
+ ti->discards_supported = 1;
dm_put(pool_md);
@@ -2403,6 +2538,14 @@ static int thin_endio(struct dm_target *ti,
spin_unlock_irqrestore(&pool->lock, flags);
}
+ if (h->all_io_entry) {
+ INIT_LIST_HEAD(&work);
+ ds_dec(h->all_io_entry, &work);
+ list_for_each_entry_safe(m, tmp, &work, list)
+ list_add(&m->list, &pool->prepared_discards);
+ }
+
+ mempool_free(h, pool->endio_hook_pool);
return 0;
}
@@ -2479,9 +2622,11 @@ static int thin_iterate_devices(struct dm_target *ti,
static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct thin_c *tc = ti->private;
+ struct pool *pool = tc->pool;
blk_limits_io_min(limits, 0);
- blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT);
+ blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+ set_discard_limits(pool, limits);
}
static struct target_type thin_target = {
--
1.7.5.4
next prev parent reply other threads:[~2012-02-02 16:39 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-02 16:39 [PATCH 00/11] Latest dm-thin patches Joe Thornber
2012-02-02 16:39 ` [PATCH 01/11] Unlock the superblock on an error path for new metadata dev creation Joe Thornber
2012-02-02 16:39 ` [PATCH 02/11] Remove redundant arg from value_ptr() Joe Thornber
2012-02-02 16:39 ` [PATCH 03/11] [PATCH 18/19] [dm-thin] [bio prison] Don't use the bi_next field for the holder of a cell Joe Thornber
2012-02-07 23:18 ` Mike Snitzer
2012-02-10 14:55 ` Joe Thornber
2012-02-10 18:03 ` [PATCH 04/12 v2] dm thin: don't " Mike Snitzer
2012-02-02 16:39 ` [PATCH 04/11] [PATCH 15/19] [dm-thin] dm_thin_remove_block() wasn't decrementing the mapped_blocks counter Joe Thornber
2012-02-02 16:39 ` [PATCH 05/11] [dm-thin] btree-remove - fix rebalancing of 3 nodes Joe Thornber
2012-02-02 16:39 ` [PATCH 06/11] Remove entries from the ref_count tree if they're no longer needed Joe Thornber
2012-02-02 16:39 ` [PATCH 07/11] [dm-thin] Commit every second to prevent too much of a position building up Joe Thornber
2012-02-07 16:53 ` Mike Snitzer
2012-02-07 23:00 ` Mike Snitzer
2012-02-10 14:48 ` Joe Thornber
2012-02-10 14:55 ` Mike Snitzer
2012-02-02 16:39 ` [PATCH 08/11] [dm-thin] Add support for external origins Joe Thornber
2012-02-02 16:39 ` [PATCH 09/11] [dm-thin] Discard support part 1 Joe Thornber
2012-02-02 16:39 ` Joe Thornber [this message]
2012-02-10 18:08 ` [PATCH 12/12 v2] dm thin: add discard support Mike Snitzer
2012-02-02 16:39 ` [PATCH 11/11] [dm-thin] some tidy ups of the __open_device() error path (Mike Snitzer) Joe Thornber
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1328200754-13642-11-git-send-email-ejt@redhat.com \
--to=ejt@redhat.com \
--cc=dm-devel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.