From: Mike Snitzer <snitzer@kernel.org>
To: dm-devel@redhat.com
Cc: axboe@kernel.dk, hch@lst.de, ming.lei@redhat.com
Subject: [dm-devel] [dm-5.19 PATCH 16/21] dm: improve bio splitting and associated IO accounting
Date: Sun, 17 Apr 2022 22:27:28 -0400 [thread overview]
Message-ID: <20220418022733.56168-17-snitzer@kernel.org> (raw)
In-Reply-To: <20220418022733.56168-1-snitzer@kernel.org>
[-- Attachment #1: Type: application/octet-stream, Size: 7086 bytes --]
From: Ming Lei <ming.lei@redhat.com>
The current DM code (ab)uses late assignment of dm_io->orig_bio (after
__map_bio() returns and any bio splitting is complete) to indicate the
FS bio has been processed and can be accounted. This results in
awkward waiting until ->orig_bio is set in dm_submit_bio_remap().
Also the bio splitting was implemented using bio_split()+bio_chain()
-- a well-worn pattern but it requires bio cloning purely for the
benefit of more natural IO accounting. The bio_split() result was
stored in ->orig_bio to represent the mapped part of the original FS
bio.
DM has switched to the bdev based IO accounting interface. DM's IO
accounting can be implemented in terms of the original FS bio (now
stored early in ->orig_bio) via access to its sectors/bio_op. And
if/when splitting is needed, set a new DM_IO_WAS_SPLIT flag and use
new dm_io fields of .sector_offset & .sectors to allow IO accounting
for split bios _without_ needing to clone a new bio to store in
->orig_bio.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Co-developed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
drivers/md/dm-core.h | 8 +++++-
drivers/md/dm.c | 75 ++++++++++++++++++++++++++++++++++++----------------
2 files changed, 59 insertions(+), 24 deletions(-)
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 8ba99eaa0872..37ddedf61249 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -267,7 +267,12 @@ struct dm_io {
blk_status_t status;
atomic_t io_count;
struct mapped_device *md;
+
+ /* The three fields represent mapped part of original bio */
struct bio *orig_bio;
+ unsigned int sector_offset; /* offset to end of orig_bio */
+ unsigned int sectors;
+
/* last member of dm_target_io is 'struct bio' */
struct dm_target_io tio;
};
@@ -277,7 +282,8 @@ struct dm_io {
*/
enum {
DM_IO_START_ACCT,
- DM_IO_ACCOUNTED
+ DM_IO_ACCOUNTED,
+ DM_IO_WAS_SPLIT
};
static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index cb41384cd814..860d2aaffd2a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -516,8 +516,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
*/
if (bio_is_flush_with_data(bio))
sectors = 0;
- else
+ else if (likely(!(dm_io_flagged(io, DM_IO_WAS_SPLIT))))
sectors = bio_sectors(bio);
+ else
+ sectors = io->sectors;
if (!end)
bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
@@ -526,10 +528,18 @@ static void dm_io_acct(struct dm_io *io, bool end)
bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
if (static_branch_unlikely(&stats_enabled) &&
- unlikely(dm_stats_used(&md->stats)))
+ unlikely(dm_stats_used(&md->stats))) {
+ sector_t sector;
+
+ if (likely(!dm_io_flagged(io, DM_IO_WAS_SPLIT)))
+ sector = bio->bi_iter.bi_sector;
+ else
+ sector = bio_end_sector(bio) - io->sector_offset;
+
dm_stats_account_io(&md->stats, bio_data_dir(bio),
- bio->bi_iter.bi_sector, sectors,
+ sector, sectors,
end, start_time, stats_aux);
+ }
}
static void __dm_start_io_acct(struct dm_io *io)
@@ -582,7 +592,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->status = BLK_STS_OK;
atomic_set(&io->io_count, 1);
this_cpu_inc(*md->pending_io);
- io->orig_bio = NULL;
+ io->orig_bio = bio;
io->md = md;
io->map_task = current;
spin_lock_init(&io->lock);
@@ -1220,6 +1230,13 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
*tio->len_ptr -= bio_sectors - n_sectors;
bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
+
+ /*
+ * __split_and_process_bio() may have already saved mapped part
+ * for accounting but it is being reduced so update accordingly.
+ */
+ dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
+ tio->io->sectors = n_sectors;
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
@@ -1258,13 +1275,6 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
/* Still in target's map function */
dm_io_set_flag(io, DM_IO_START_ACCT);
} else {
- /*
- * Called by another thread, managed by DM target,
- * wait for dm_split_and_process_bio() to store
- * io->orig_bio
- */
- while (unlikely(!smp_load_acquire(&io->orig_bio)))
- msleep(1);
dm_start_io_acct(io, clone);
}
@@ -1358,6 +1368,31 @@ static void __map_bio(struct bio *clone)
}
}
+static void setup_split_accounting(struct clone_info *ci, unsigned len)
+{
+ struct dm_io *io = ci->io;
+
+ if (ci->sector_count > len) {
+ /*
+ * Split needed, save the mapped part for accounting.
+ * NOTE: dm_accept_partial_bio() will update accordingly.
+ */
+ dm_io_set_flag(io, DM_IO_WAS_SPLIT);
+ io->sectors = len;
+ }
+
+ if (static_branch_unlikely(&stats_enabled) &&
+ unlikely(dm_stats_used(&io->md->stats))) {
+ /*
+ * Save bi_sector in terms of its offset from end of
+ * original bio, only needed for DM-stats' benefit.
+ * - saved regardless of whether split needed so that
+ * dm_accept_partial_bio() doesn't need to.
+ */
+ io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
+ }
+}
+
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned num_bios)
{
@@ -1397,6 +1432,8 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
case 0:
break;
case 1:
+ if (len)
+ setup_split_accounting(ci, *len);
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
__map_bio(clone);
break;
@@ -1560,6 +1597,7 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED;
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
+ setup_split_accounting(ci, len);
clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
__map_bio(clone);
@@ -1593,7 +1631,6 @@ static void dm_split_and_process_bio(struct mapped_device *md,
{
struct clone_info ci;
struct dm_io *io;
- struct bio *orig_bio = NULL;
blk_status_t error = BLK_STS_OK;
init_clone_info(&ci, md, map, bio);
@@ -1609,23 +1646,15 @@ static void dm_split_and_process_bio(struct mapped_device *md,
io->map_task = NULL;
if (error || !ci.sector_count)
goto out;
-
/*
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
- * We take a clone of the original to store in io->orig_bio to be
- * used by dm_end_io_acct() and for dm_io_complete() to use for
- * completion handling.
*/
- orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count,
- GFP_NOIO, &md->queue->bio_split);
- bio_chain(orig_bio, bio);
- trace_block_split(orig_bio, bio->bi_iter.bi_sector);
+ bio_trim(bio, io->sectors, ci.sector_count);
+ trace_block_split(bio, bio->bi_iter.bi_sector);
+ bio_inc_remaining(bio);
submit_bio_noacct(bio);
out:
- if (!orig_bio)
- orig_bio = bio;
- smp_store_release(&io->orig_bio, orig_bio);
if (dm_io_flagged(io, DM_IO_START_ACCT))
dm_start_io_acct(io, NULL);
--
2.15.0
[-- Attachment #2: Type: text/plain, Size: 98 bytes --]
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
next prev parent reply other threads:[~2022-04-18 2:28 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-18 2:27 [dm-devel] [dm-5.19 PATCH 00/21] dm: changes staged for 5.19 Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 01/21] block: change exported IO accounting interface from gendisk to bdev Mike Snitzer
2022-04-18 2:27 ` Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 02/21] dm: conditionally enable BIOSET_PERCPU_CACHE for dm_io bioset Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 03/21] dm: factor out dm_io_set_error and __dm_io_dec_pending Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 04/21] dm: simplify dm_io access in dm_split_and_process_bio Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 05/21] dm: simplify dm_start_io_acct Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 06/21] dm: mark various branches unlikely Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 07/21] dm: add local variables to clone_endio and __map_bio Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 08/21] dm: move hot dm_io members to same cacheline as dm_target_io Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 09/21] dm: introduce dm_{get, put}_live_table_bio called from dm_submit_bio Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 10/21] dm: conditionally enable branching for less used features Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 11/21] dm: simplify basic targets Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 12/21] dm: use bio_sectors in dm_aceept_partial_bio Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 13/21] dm: don't pass bio to __dm_start_io_acct and dm_end_io_acct Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 14/21] dm: pass dm_io instance to dm_io_acct directly Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 15/21] dm: switch to bdev based IO accounting interfaces Mike Snitzer
2022-04-18 2:27 ` Mike Snitzer [this message]
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 17/21] dm: don't grab target io reference in dm_zone_map_bio Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 18/21] dm: improve dm_io reference counting Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 19/21] dm: put all polled dm_io instances into a single list Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 20/21] dm: simplify bio-based IO accounting further Mike Snitzer
2022-04-18 2:27 ` [dm-devel] [dm-5.19 PATCH 21/21] dm: improve abnormal bio processing Mike Snitzer
2022-04-21 4:06 ` Shinichiro Kawasaki
2022-04-22 13:13 ` Mike Snitzer
2022-04-22 13:26 ` Mike Snitzer
2022-04-25 23:57 ` Shinichiro Kawasaki
2022-04-18 3:00 ` [dm-devel] [dm-5.19 PATCH 00/21] dm: changes staged for 5.19 Damien Le Moal
2022-04-18 3:16 ` Mike Snitzer
2022-04-18 12:49 ` Jens Axboe
2022-04-18 12:51 ` [dm-devel] (subset) " Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220418022733.56168-17-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=axboe@kernel.dk \
--cc=dm-devel@redhat.com \
--cc=hch@lst.de \
--cc=ming.lei@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.