public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Logan Gunthorpe <logang@deltatee.com>
To: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org,
	Song Liu <song@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>,
	Guoqing Jiang <guoqing.jiang@linux.dev>,
	Stephen Bates <sbates@raithlin.com>,
	Martin Oliveira <Martin.Oliveira@eideticom.com>,
	David Sloan <David.Sloan@eideticom.com>,
	Logan Gunthorpe <logang@deltatee.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [PATCH v3 10/15] md/raid5: Keep a reference to last stripe_head for batch
Date: Thu, 16 Jun 2022 13:19:40 -0600	[thread overview]
Message-ID: <20220616191945.23935-11-logang@deltatee.com> (raw)
In-Reply-To: <20220616191945.23935-1-logang@deltatee.com>

When batching, every stripe head has to find the previous stripe head to
add to the batch list. This involves taking the hash lock which is
highly contended during IO.

Instead of finding the previous stripe_head each time, store a
reference to the previous stripe_head in a pointer so that it doesn't
require taking the contended lock another time.

The reference to the previous stripe must be released before scheduling
and waiting for work to get done. Otherwise, it can hold up
raid5_activate_delayed() and deadlock.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
---
 drivers/md/raid5.c | 52 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 12 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 17ddaa41147c..34f8d6c18bd3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -843,7 +843,8 @@ static bool stripe_can_batch(struct stripe_head *sh)
 }
 
 /* we only do back search */
-static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
+static void stripe_add_to_batch_list(struct r5conf *conf,
+		struct stripe_head *sh, struct stripe_head *last_sh)
 {
 	struct stripe_head *head;
 	sector_t head_sector, tmp_sec;
@@ -856,15 +857,20 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
 		return;
 	head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
 
-	hash = stripe_hash_locks_hash(conf, head_sector);
-	spin_lock_irq(conf->hash_locks + hash);
-	head = find_get_stripe(conf, head_sector, conf->generation, hash);
-	spin_unlock_irq(conf->hash_locks + hash);
-
-	if (!head)
-		return;
-	if (!stripe_can_batch(head))
-		goto out;
+	if (last_sh && head_sector == last_sh->sector) {
+		head = last_sh;
+		atomic_inc(&head->count);
+	} else {
+		hash = stripe_hash_locks_hash(conf, head_sector);
+		spin_lock_irq(conf->hash_locks + hash);
+		head = find_get_stripe(conf, head_sector, conf->generation,
+				       hash);
+		spin_unlock_irq(conf->hash_locks + hash);
+		if (!head)
+			return;
+		if (!stripe_can_batch(head))
+			goto out;
+	}
 
 	lock_two_stripes(head, sh);
 	/* clear_batch_ready clear the flag */
@@ -5794,6 +5800,8 @@ enum stripe_result {
 };
 
 struct stripe_request_ctx {
+	/* a reference to the last stripe_head for batching */
+	struct stripe_head *batch_last;
 	/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
 	bool do_flush;
 };
@@ -5888,8 +5896,13 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
 		goto out_release;
 	}
 
-	if (stripe_can_batch(sh))
-		stripe_add_to_batch_list(conf, sh);
+	if (stripe_can_batch(sh)) {
+		stripe_add_to_batch_list(conf, sh, ctx->batch_last);
+		if (ctx->batch_last)
+			raid5_release_stripe(ctx->batch_last);
+		atomic_inc(&sh->count);
+		ctx->batch_last = sh;
+	}
 
 	if (ctx->do_flush) {
 		set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
@@ -5984,6 +5997,18 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 			continue;
 
 		if (res == STRIPE_SCHEDULE_AND_RETRY) {
+			/*
+			 * Must release the reference to batch_last before
+			 * scheduling and waiting for work to be done,
+			 * otherwise the batch_last stripe head could prevent
+			 * raid5_activate_delayed() from making progress
+			 * and thus deadlocking.
+			 */
+			if (ctx.batch_last) {
+				raid5_release_stripe(ctx.batch_last);
+				ctx.batch_last = NULL;
+			}
+
 			schedule();
 			prepare_to_wait(&conf->wait_for_overlap, &w,
 					TASK_UNINTERRUPTIBLE);
@@ -5995,6 +6020,9 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 
 	finish_wait(&conf->wait_for_overlap, &w);
 
+	if (ctx.batch_last)
+		raid5_release_stripe(ctx.batch_last);
+
 	if (rw == WRITE)
 		md_write_end(mddev);
 	bio_endio(bi);
-- 
2.30.2


  parent reply	other threads:[~2022-06-16 19:20 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-16 19:19 [PATCH v3 00/15] Improve Raid5 Lock Contention Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 01/15] md/raid5: Make logic blocking check consistent with logic that blocks Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 02/15] md/raid5: Factor out ahead_of_reshape() function Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 03/15] md/raid5: Refactor raid5_make_request loop Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 04/15] md/raid5: Move stripe_add_to_batch_list() call out of add_stripe_bio() Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 05/15] md/raid5: Move common stripe get code into new find_get_stripe() helper Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 06/15] md/raid5: Factor out helper from raid5_make_request() loop Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 07/15] md/raid5: Drop the do_prepare flag in raid5_make_request() Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 08/15] md/raid5: Move read_seqcount_begin() into make_stripe_request() Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 09/15] md/raid5: Refactor for loop in raid5_make_request() into while loop Logan Gunthorpe
2022-06-16 19:19 ` Logan Gunthorpe [this message]
2022-06-16 19:19 ` [PATCH v3 11/15] md/raid5: Refactor add_stripe_bio() Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 12/15] md/raid5: Check all disks in a stripe_head for reshape progress Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 13/15] md/raid5: Pivot raid5_make_request() Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 14/15] md/raid5: Improve debug prints Logan Gunthorpe
2022-06-16 19:19 ` [PATCH v3 15/15] md/raid5: Increase restriction on max segments per request Logan Gunthorpe
2022-06-16 21:32   ` [Non-DoD Source] " Finlayson, James M CIV (USA)
2022-06-16 21:35     ` Logan Gunthorpe
2022-06-23  4:48 ` [PATCH v3 00/15] Improve Raid5 Lock Contention Song Liu
2022-07-03  7:51   ` Christoph Hellwig
2022-07-03 14:54     ` Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220616191945.23935-11-logang@deltatee.com \
    --to=logang@deltatee.com \
    --cc=David.Sloan@eideticom.com \
    --cc=Martin.Oliveira@eideticom.com \
    --cc=guoqing.jiang@linux.dev \
    --cc=hch@infradead.org \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=sbates@raithlin.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox