public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
From: Mike Snitzer <snitzer@redhat.com>
To: hch@lst.de
Cc: dm-devel@redhat.com, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org
Subject: [for-4.14 RFC PATCH 1/2] dm rq: avoid deadlock if dm-mq is stacked on old .request_fn device(s)
Date: Thu, 13 Jul 2017 17:12:16 -0400	[thread overview]
Message-ID: <20170713211217.52361-2-snitzer@redhat.com> (raw)
In-Reply-To: <20170713211217.52361-1-snitzer@redhat.com>

Conditionally use __blk_put_request() or blk_put_request() instead of
just blk_put_request() in multipath_release_clone().

Otherwise a deadlock will occur because scsi_end_request() will take the
clone request's queue_lock, around its call to blk_finish_request(), and
then the later blk_put_request() also tries to take it:

[12749.916332]  queued_spin_lock_slowpath+0xb/0xf
[12749.916335]  _raw_spin_lock_irqsave+0x37/0x40
[12749.916339]  blk_put_request+0x39/0x60
[12749.916342]  multipath_release_clone+0xe/0x10 [dm_multipath]
[12749.916350]  dm_softirq_done+0x156/0x240 [dm_mod]
[12749.916353]  __blk_mq_complete_request+0x90/0x140
[12749.916355]  blk_mq_complete_request+0x16/0x20
[12749.916360]  dm_complete_request+0x23/0x40 [dm_mod]
[12749.916365]  end_clone_request+0x1d/0x20 [dm_mod]
[12749.916367]  blk_finish_request+0x83/0x120
[12749.916370]  scsi_end_request+0x12d/0x1d0
[12749.916371]  scsi_io_completion+0x13c/0x630
[12749.916374]  ? set_next_entity+0x7c/0x780
[12749.916376]  scsi_finish_command+0xd9/0x120
[12749.916378]  scsi_softirq_done+0x12a/0x150
[12749.916380]  blk_done_softirq+0x9e/0xd0
[12749.916382]  __do_softirq+0xc9/0x269
[12749.916384]  run_ksoftirqd+0x29/0x50
[12749.916385]  smpboot_thread_fn+0x110/0x160
[12749.916387]  kthread+0x109/0x140
[12749.916389]  ? sort_range+0x30/0x30
[12749.916390]  ? kthread_park+0x60/0x60
[12749.916391]  ret_from_fork+0x25/0x30

This "fix" is gross in that the long-term fitness of stacking blk-mq DM
multipath (dm-mq) ontop of old .request_fn devices is questionable.  The
above stack trace shows just how ugly it is to have old .request_fn SCSI
code cascade into blk-mq code during DM multipath request completion.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c         | 16 ++++++++++++++--
 drivers/md/dm-rq.c            |  8 +++++---
 drivers/md/dm-target.c        |  4 ++--
 include/linux/device-mapper.h |  3 ++-
 4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 0e8ab5b..34cf7b6 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -520,9 +520,21 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 	return DM_MAPIO_REMAPPED;
 }
 
-static void multipath_release_clone(struct request *clone)
+static void multipath_release_clone(struct dm_target *ti, struct request *clone)
 {
-	blk_put_request(clone);
+	struct multipath *m = ti->private;
+	struct request_queue *q = clone->q;
+
+	if (!q->mq_ops && m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) {
+		/*
+		 * dm-mq on .request_fn already holds clone->q->queue_lock
+		 * via blk_finish_request()...
+		 * - true for .request_fn SCSI, but is it _always_ true?
+		 */
+		lockdep_assert_held(q->queue_lock);
+		__blk_put_request(q, clone);
+	} else
+		blk_put_request(clone);
 }
 
 /*
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index c6ebc5b..95bb44c 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -220,11 +220,12 @@ static void dm_end_request(struct request *clone, blk_status_t error)
 {
 	int rw = rq_data_dir(clone);
 	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct dm_target *ti = tio->ti;
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 
 	blk_rq_unprep_clone(clone);
-	tio->ti->type->release_clone_rq(clone);
+	ti->type->release_clone_rq(ti, clone);
 
 	rq_end_stats(md, rq);
 	if (!rq->q->mq_ops)
@@ -267,6 +268,7 @@ static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 
 static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
 {
+	struct dm_target *ti = tio->ti;
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 	int rw = rq_data_dir(rq);
@@ -274,7 +276,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	rq_end_stats(md, rq);
 	if (tio->clone) {
 		blk_rq_unprep_clone(tio->clone);
-		tio->ti->type->release_clone_rq(tio->clone);
+		ti->type->release_clone_rq(ti, tio->clone);
 	}
 
 	if (!rq->q->mq_ops)
@@ -488,7 +490,7 @@ static int map_request(struct dm_rq_target_io *tio)
 	case DM_MAPIO_REMAPPED:
 		if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
 			/* -ENOMEM */
-			ti->type->release_clone_rq(clone);
+			ti->type->release_clone_rq(ti, clone);
 			return DM_MAPIO_REQUEUE;
 		}
 
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index c0d7e60..adbd17b 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -138,12 +138,12 @@ static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
 	return DM_MAPIO_KILL;
 }
 
-static void io_err_release_clone_rq(struct request *clone)
+static void io_err_release_clone_rq(struct dm_target *ti, struct request *clone)
 {
 }
 
 static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+				     long nr_pages, void **kaddr, pfn_t *pfn)
 {
 	return -EIO;
 }
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0c1b50ad..f2ca0ab 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -61,7 +61,8 @@ typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti,
 					    struct request *rq,
 					    union map_info *map_context,
 					    struct request **clone);
-typedef void (*dm_release_clone_request_fn) (struct request *clone);
+typedef void (*dm_release_clone_request_fn) (struct dm_target *ti,
+					     struct request *clone);
 
 /*
  * Returns:
-- 
2.10.1

  reply	other threads:[~2017-07-13 21:12 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-13 21:12 [for-4.14 RFC PATCH 0/2] dm rq: eliminate historic blk-mq and .request_fn queue stacking restrictions Mike Snitzer
2017-07-13 21:12 ` Mike Snitzer [this message]
2017-07-14  7:22   ` [for-4.14 RFC PATCH 1/2] dm rq: avoid deadlock if dm-mq is stacked on old .request_fn device(s) Christoph Hellwig
2017-07-14 14:19     ` Mike Snitzer
2017-07-14 17:17       ` Ewan D. Milne
2017-07-14 21:15         ` Mike Snitzer
2017-07-13 21:12 ` [for-4.14 RFC PATCH 2/2] dm rq: eliminate historic blk-mq and .request_fn queue stacking restrictions Mike Snitzer
2017-07-14  7:12 ` [for-4.14 RFC PATCH 0/2] " Christoph Hellwig
2017-07-14 14:02   ` Mike Snitzer
2017-07-15  8:44     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170713211217.52361-2-snitzer@redhat.com \
    --to=snitzer@redhat.com \
    --cc=dm-devel@redhat.com \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox