From: Mike Snitzer <snitzer@redhat.com>
To: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Laurence Oberman <loberman@redhat.com>,
dm-devel@redhat.com, linux-scsi@vger.kernel.org
Subject: Re: dm-mq and end_clone_request()
Date: Mon, 1 Aug 2016 13:59:48 -0400 [thread overview]
Message-ID: <20160801175948.GA6685@redhat.com> (raw)
In-Reply-To: <17da3ab0-233a-2cec-f921-bfd42c953ccc@sandisk.com>
With this debug patch ontop of v4.7:
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 52baf8a..22baf29 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -433,10 +433,22 @@ failed:
*/
static int must_push_back(struct multipath *m)
{
+ bool queue_if_no_path = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
+ bool suspend_active = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
+ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
+ bool suspending = (suspend_active && dm_noflush_suspending(m->ti));
+
+#if 0
return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
dm_noflush_suspending(m->ti)));
+#else
+ if (!queue_if_no_path || !suspending)
+ DMERR_LIMIT("%s: queue_if_no_path=%d suspend_active=%d suspending=%d",
+ __func__, queue_if_no_path, suspend_active, suspending);
+ return (queue_if_no_path || suspending);
+#endif
}
/*
@@ -459,7 +471,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
pgpath = choose_pgpath(m, nr_bytes);
if (!pgpath) {
- if (!must_push_back(m))
+ if (WARN_ON_ONCE(!must_push_back(m)))
r = -EIO; /* Failed */
return r;
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
@@ -1347,7 +1359,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!atomic_read(&m->nr_valid_paths)) {
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- if (!must_push_back(m))
+ if (WARN_ON_ONCE(!must_push_back(m)))
r = -EIO;
} else {
if (error == -EBADE)
I got:
Aug 1 13:41:48 client kernel: blk_update_request: I/O error, dev sdbf, sector 34376
Aug 1 13:41:48 client kernel: blk_update_request: I/O error, dev sdbf, sector 183608
Aug 1 13:41:48 client kernel: device-mapper: multipath: Failing path 67:144.
Aug 1 13:41:48 client kernel: blk_update_request: I/O error, dev sdbf, sector 237352
Aug 1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug 1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug 1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug 1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug 1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=0
Aug 1 13:41:48 client kernel: ------------[ cut here ]------------
Aug 1 13:41:48 client kernel: WARNING: CPU: 10 PID: 6445 at drivers/md/dm-mpath.c:474 __multipath_map.isra.11+0xb7/0x240 [dm_multipath]
Aug 1 13:41:48 client kernel: Modules linked in: dm_round_robin(O) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter rpcrdma ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_c
lmulni_intel aesni_intel glue_helper
Aug 1 13:41:48 client kernel: lrw gf128mul ablk_helper cryptd ipmi_ssif iTCO_wdt iTCO_vendor_support sg ipmi_si hpilo pcspkr ipmi_msghandler acpi_power_meter hpwdt lpc_ich shpchp i7core_edac mfd_core edac_core pcc_cpufreq acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc dm_multipath(O) ip_tables xfs libcrc32c mlx5_ib ib_core sd_mod radeon i2c_algo_bit drm_kms_helper syscopyarea sysfillrect mlx5_core sysimgblt fb_sys_fops ttm drm crc32c_intel serio_raw bnx2 hpsa i2c_core ptp pps_core scsi_transport_sas dm_mod(O) [last unloaded: dm_log]
Aug 1 13:41:48 client kernel: CPU: 10 PID: 6445 Comm: kworker/10:1H Tainted: G IO 4.7.0.snitm #1
Aug 1 13:41:48 client kernel: Hardware name: HP ProLiant DL380 G7, BIOS P67 08/16/2015
Aug 1 13:41:48 client kernel: Workqueue: kblockd blk_mq_requeue_work
Aug 1 13:41:48 client kernel: 0000000000000286 000000008737cd27 ffff880607ecfbb8 ffffffff8134259f
Aug 1 13:41:48 client kernel: 0000000000000000 0000000000000000 ffff880607ecfbf8 ffffffff81088fb1
Aug 1 13:41:48 client kernel: 000001da00000000 ffff880bd0393100 0000000000000000 0000000000000000
Aug 1 13:41:48 client kernel: Call Trace:
Aug 1 13:41:48 client kernel: [<ffffffff8134259f>] dump_stack+0x63/0x84
Aug 1 13:41:48 client kernel: [<ffffffff81088fb1>] __warn+0xd1/0xf0
Aug 1 13:41:48 client kernel: [<ffffffff810890ed>] warn_slowpath_null+0x1d/0x20
Aug 1 13:41:48 client kernel: [<ffffffffa0104e27>] __multipath_map.isra.11+0xb7/0x240 [dm_multipath]
Aug 1 13:41:48 client kernel: [<ffffffffa0104fca>] multipath_clone_and_map+0x1a/0x20 [dm_multipath]
Aug 1 13:41:48 client kernel: [<ffffffffa0002242>] map_request+0xd2/0x240 [dm_mod]
Aug 1 13:41:48 client kernel: [<ffffffffa000407e>] dm_mq_queue_rq+0x7e/0x110 [dm_mod]
Aug 1 13:41:48 client kernel: [<ffffffff81320d32>] __blk_mq_run_hw_queue+0x1f2/0x370
Aug 1 13:41:48 client kernel: [<ffffffff81320b25>] blk_mq_run_hw_queue+0x95/0xb0
Aug 1 13:41:48 client kernel: [<ffffffff81322438>] ? blk_mq_insert_request+0x88/0xc0
Aug 1 13:41:48 client kernel: [<ffffffff81321ba5>] blk_mq_start_hw_queue+0x15/0x20
Aug 1 13:41:48 client kernel: [<ffffffff81321be2>] blk_mq_start_hw_queues+0x32/0x50
Aug 1 13:41:48 client kernel: [<ffffffff813229e5>] blk_mq_requeue_work+0x115/0x140
Aug 1 13:41:48 client kernel: [<ffffffff810a1e72>] process_one_work+0x152/0x400
Aug 1 13:41:48 client kernel: [<ffffffff810a2765>] worker_thread+0x125/0x4b0
Aug 1 13:41:48 client kernel: [<ffffffff810a2640>] ? rescuer_thread+0x380/0x380
Aug 1 13:41:48 client kernel: [<ffffffff810a8298>] kthread+0xd8/0xf0
Aug 1 13:41:48 client kernel: [<ffffffff816ba17f>] ret_from_fork+0x1f/0x40
Aug 1 13:41:48 client kernel: [<ffffffff810a81c0>] ? kthread_park+0x60/0x60
Aug 1 13:41:48 client kernel: ---[ end trace 1be159facc3adabe ]---
Aug 1 13:41:48 client kernel: blk_update_request: I/O error, dev dm-28, sector 267144
Aug 1 13:41:48 client multipathd: 360001ff0b035d0000000001a8d8a001b: load table [0 62277025792 multipath 1 queue_if_no_path 0 0 0]
Aug 1 13:41:48 client multipathd: 360001ff0b035d0000000001a8d8a001b: Entering recovery mode: max_retries=12
This says to me that must_push_back is returning false because
dm_noflush_suspending() is false. When this happens -EIO will escape up
the IO stack.
And this confirms that must_push_back() calling dm_noflush_suspending()
is quite suspect given queue_if_no_path was configured: we should
_always_ pushback if no paths are available.
I'll dig deeper on really understanding _why_ must_push_back() is coded
like it is. There is a deep historic reason but hell if I can recall
what it is...
next prev parent reply other threads:[~2016-08-01 17:59 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <4ed669ed-beae-76a8-b806-a284565b327a@sandisk.com>
2016-07-20 14:08 ` dm-mq and end_clone_request() Mike Snitzer
2016-07-20 14:27 ` Mike Snitzer
2016-07-20 17:37 ` Bart Van Assche
2016-07-20 18:33 ` Mike Snitzer
2016-07-21 20:58 ` [dm-devel] " Bart Van Assche
2016-07-25 17:53 ` Mike Snitzer
2016-07-25 21:23 ` Mike Snitzer
2016-07-25 22:00 ` Bart Van Assche
2016-07-26 1:16 ` Mike Snitzer
2016-07-26 22:51 ` Bart Van Assche
2016-07-27 14:08 ` Mike Snitzer
2016-07-27 15:52 ` [dm-devel] " Benjamin Marzinski
2016-07-27 19:06 ` Bart Van Assche
2016-07-27 19:54 ` Mike Snitzer
2016-07-27 20:09 ` Mike Snitzer
2016-07-27 23:05 ` Bart Van Assche
2016-07-28 13:33 ` Mike Snitzer
2016-07-28 15:23 ` Bart Van Assche
2016-07-28 15:40 ` Mike Snitzer
2016-07-29 6:28 ` [dm-devel] " Hannes Reinecke
[not found] ` <317679447.7168375.1469729769593.JavaMail.zimbra@redhat.com>
[not found] ` <6880321d-e14f-169b-d100-6e460dd9bd09@sandisk.com>
[not found] ` <1110327939.7305916.1469819453678.JavaMail.zimbra@redhat.com>
[not found] ` <a5c1a149-b1a2-b5a4-2207-bdaf32db3cbd@sandisk.com>
[not found] ` <757522831.7667712.1470059860543.JavaMail.zimbra@redhat.com>
[not found] ` <536022978.7668211.1470060125271.JavaMail.zimbra@redhat.com>
[not found] ` <931235537.7668834.1470060339483.JavaMail.zimbra@redhat.com>
[not found] ` <1264951811.7684268.1470065187014.JavaMail.zimbra@redhat.com>
[not found] ` <17da3ab0-233a-2cec-f921-bfd42c953ccc@sandisk.com>
2016-08-01 17:59 ` Mike Snitzer [this message]
2016-08-01 18:55 ` Bart Van Assche
2016-08-01 19:15 ` Mike Snitzer
2016-08-01 20:46 ` Mike Snitzer
2016-08-01 22:41 ` Bart Van Assche
2016-08-02 17:45 ` Mike Snitzer
2016-08-03 0:19 ` Bart Van Assche
2016-08-03 0:40 ` Mike Snitzer
2016-08-03 1:33 ` Laurence Oberman
2016-08-03 2:10 ` Mike Snitzer
2016-08-03 2:18 ` Laurence Oberman
2016-08-03 2:55 ` Laurence Oberman
2016-08-03 15:10 ` Laurence Oberman
2016-08-03 16:06 ` Bart Van Assche
2016-08-03 17:25 ` Laurence Oberman
2016-08-03 18:03 ` [dm-devel] " Laurence Oberman
2016-08-03 16:55 ` Bart Van Assche
2016-08-04 16:10 ` Mike Snitzer
2016-08-04 17:42 ` Bart Van Assche
2016-08-04 23:58 ` Mike Snitzer
2016-08-05 1:07 ` Laurence Oberman
2016-08-05 11:43 ` Laurence Oberman
2016-08-05 15:39 ` Laurence Oberman
2016-08-05 15:43 ` Bart Van Assche
2016-08-05 18:42 ` [dm-devel] " Bart Van Assche
2016-08-06 14:47 ` Laurence Oberman
2016-08-07 22:31 ` [dm-devel] " Bart Van Assche
2016-08-08 12:45 ` Laurence Oberman
2016-08-08 13:44 ` Johannes Thumshirn
2016-08-08 14:32 ` Laurence Oberman
2016-08-08 14:54 ` Bart Van Assche
2016-08-08 15:11 ` Bart Van Assche
2016-08-08 15:26 ` Laurence Oberman
2016-08-08 15:28 ` Bart Van Assche
2016-08-08 22:39 ` Bart Van Assche
2016-08-08 22:52 ` Laurence Oberman
2016-08-09 0:09 ` Laurence Oberman
2016-08-09 15:51 ` Bart Van Assche
2016-08-09 17:12 ` [dm-devel] " Laurence Oberman
2016-08-09 17:16 ` Bart Van Assche
2016-08-09 17:21 ` Laurence Oberman
2016-08-10 21:38 ` Laurence Oberman
2016-08-11 16:51 ` Laurence Oberman
2016-08-05 18:40 ` Bart Van Assche
2016-07-21 20:32 ` Mike Snitzer
2016-07-21 20:40 ` [dm-devel] " Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160801175948.GA6685@redhat.com \
--to=snitzer@redhat.com \
--cc=bart.vanassche@sandisk.com \
--cc=dm-devel@redhat.com \
--cc=linux-scsi@vger.kernel.org \
--cc=loberman@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).