stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Liu Bo <bo.liu@linux.alibaba.com>, Jens Axboe <axboe@kernel.dk>,
	Sasha Levin <sashal@kernel.org>,
	linux-block@vger.kernel.org
Subject: [PATCH AUTOSEL 4.20 81/81] blk-iolatency: fix IO hang due to negative inflight counter
Date: Thu, 28 Feb 2019 10:08:13 -0500	[thread overview]
Message-ID: <20190228150813.10256-81-sashal@kernel.org> (raw)
In-Reply-To: <20190228150813.10256-1-sashal@kernel.org>

From: Liu Bo <bo.liu@linux.alibaba.com>

[ Upstream commit 8c772a9bfc7c07c76f4a58b58910452fbb20843b ]

Our test reported the following stack, and vmcore showed that
->inflight counter is -1.

[ffffc9003fcc38d0] __schedule at ffffffff8173d95d
[ffffc9003fcc3958] schedule at ffffffff8173de26
[ffffc9003fcc3970] io_schedule at ffffffff810bb6b6
[ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb
[ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3
[ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a
[ffffc9003fcc3b08] generic_make_request at ffffffff81368b49
[ffffc9003fcc3b68] submit_bio at ffffffff81368d7d
[ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4]
[ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4]
[ffffc9003fcc3d68] do_writepages at ffffffff811c49ae
[ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188
[ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301
[ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4]
[ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b
[ffffc9003fcc3ee8] do_fsync at ffffffff81285abd
[ffffc9003fcc3f18] sys_fsync at ffffffff81285d50
[ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04
[ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e

The ->inflight counter may be negative (-1) if

1) blk-iolatency was disabled when the IO was issued,

2) blk-iolatency was enabled before this IO reached its endio,

3) the ->inflight counter is decreased from 0 to -1 in endio()

In fact the hang can be easily reproduced by the below script,

H=/sys/fs/cgroup/unified/
P=/sys/fs/cgroup/unified/test

echo "+io" > $H/cgroup.subtree_control
mkdir -p $P

echo $$ > $P/cgroup.procs

xfs_io -f -d -c "pwrite 0 4k" /dev/sdg

echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency

xfs_io -f -d -c "pwrite 0 4k" /dev/sdg

This fixes the problem by freezing the queue so that while
enabling/disabling iolatency, there is no inflight rq running.

Note that quiesce_queue is not needed as this only updating iolatency
configuration about which dispatching request_queue doesn't care.

Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 block/blk-iolatency.c | 52 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 38c35c32aff2d..c1c72b42dda0a 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -72,6 +72,7 @@
 #include <linux/sched/loadavg.h>
 #include <linux/sched/signal.h>
 #include <trace/events/block.h>
+#include <linux/blk-mq.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 
@@ -648,6 +649,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 		return;
 
 	enabled = blk_iolatency_enabled(iolat->blkiolat);
+	if (!enabled)
+		return;
+
 	while (blkg && blkg->parent) {
 		iolat = blkg_to_lat(blkg);
 		if (!iolat) {
@@ -657,7 +661,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 		rqw = &iolat->rq_wait;
 
 		atomic_dec(&rqw->inflight);
-		if (!enabled || iolat->min_lat_nsec == 0)
+		if (iolat->min_lat_nsec == 0)
 			goto next;
 		iolatency_record_time(iolat, &bio->bi_issue, now,
 				      issue_as_root);
@@ -801,10 +805,13 @@ int blk_iolatency_init(struct request_queue *q)
 	return 0;
 }
 
-static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+/*
+ * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
+ * return 0.
+ */
+static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
 {
 	struct iolatency_grp *iolat = blkg_to_lat(blkg);
-	struct blk_iolatency *blkiolat = iolat->blkiolat;
 	u64 oldval = iolat->min_lat_nsec;
 
 	iolat->min_lat_nsec = val;
@@ -813,9 +820,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
 				    BLKIOLATENCY_MAX_WIN_SIZE);
 
 	if (!oldval && val)
-		atomic_inc(&blkiolat->enabled);
+		return 1;
 	if (oldval && !val)
-		atomic_dec(&blkiolat->enabled);
+		return -1;
+	return 0;
 }
 
 static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -847,6 +855,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 	u64 lat_val = 0;
 	u64 oldval;
 	int ret;
+	int enable = 0;
 
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
 	if (ret)
@@ -881,7 +890,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 	blkg = ctx.blkg;
 	oldval = iolat->min_lat_nsec;
 
-	iolatency_set_min_lat_nsec(blkg, lat_val);
+	enable = iolatency_set_min_lat_nsec(blkg, lat_val);
+	if (enable) {
+		WARN_ON_ONCE(!blk_get_queue(blkg->q));
+		blkg_get(blkg);
+	}
+
 	if (oldval != iolat->min_lat_nsec) {
 		iolatency_clear_scaling(blkg);
 	}
@@ -889,6 +903,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 	ret = 0;
 out:
 	blkg_conf_finish(&ctx);
+	if (ret == 0 && enable) {
+		struct iolatency_grp *tmp = blkg_to_lat(blkg);
+		struct blk_iolatency *blkiolat = tmp->blkiolat;
+
+		blk_mq_freeze_queue(blkg->q);
+
+		if (enable == 1)
+			atomic_inc(&blkiolat->enabled);
+		else if (enable == -1)
+			atomic_dec(&blkiolat->enabled);
+		else
+			WARN_ON_ONCE(1);
+
+		blk_mq_unfreeze_queue(blkg->q);
+
+		blkg_put(blkg);
+		blk_put_queue(blkg->q);
+	}
 	return ret ?: nbytes;
 }
 
@@ -1024,8 +1056,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
 {
 	struct iolatency_grp *iolat = pd_to_lat(pd);
 	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+	struct blk_iolatency *blkiolat = iolat->blkiolat;
+	int ret;
 
-	iolatency_set_min_lat_nsec(blkg, 0);
+	ret = iolatency_set_min_lat_nsec(blkg, 0);
+	if (ret == 1)
+		atomic_inc(&blkiolat->enabled);
+	if (ret == -1)
+		atomic_dec(&blkiolat->enabled);
 	iolatency_clear_scaling(blkg);
 }
 
-- 
2.19.1


      parent reply	other threads:[~2019-02-28 15:28 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-28 15:06 [PATCH AUTOSEL 4.20 01/81] ARM: OMAP: dts: N950/N9: fix onenand timings Sasha Levin
2019-02-28 15:06 ` [PATCH AUTOSEL 4.20 02/81] ARM: dts: omap4-droid4: Fix typo in cpcap IRQ flags Sasha Levin
2019-02-28 15:06 ` [PATCH AUTOSEL 4.20 03/81] ARM: dts: sun8i: h3: Add ethernet0 alias to Beelink X2 Sasha Levin
2019-02-28 15:06 ` [PATCH AUTOSEL 4.20 04/81] arm: dts: meson: Fix IRQ trigger type for macirq Sasha Levin
2019-02-28 15:06 ` [PATCH AUTOSEL 4.20 05/81] ARM: dts: meson8b: odroidc1: mark the SD card detection GPIO active-low Sasha Levin
2019-02-28 15:06 ` [PATCH AUTOSEL 4.20 06/81] ARM: dts: meson8b: ec100: " Sasha Levin
2019-02-28 15:06 ` [PATCH AUTOSEL 4.20 07/81] ARM: dts: meson8m2: mxiii-plus: " Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 08/81] signal: Make siginmask safe when passed a signal of 0 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 09/81] ARM: dts: imx6sx: correct backward compatible of gpt Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 10/81] arm64: dts: renesas: r8a7796: Enable DMA for SCIF2 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 11/81] arm64: dts: renesas: r8a77965: " Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 12/81] soc: fsl: qbman: avoid race in clearing QMan interrupt Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 13/81] pinctrl: mcp23s08: spi: Fix regmap allocation for mcp23s18 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 14/81] wlcore: sdio: Fixup power on/off sequence Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 15/81] bpftool: Fix prog dump by tag Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 16/81] bpftool: fix percpu maps updating Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 17/81] bpf: sock recvbuff must be limited by rmem_max in bpf_setsockopt() Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 18/81] ARM: pxa: ssp: unneeded to free devm_ allocated data Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 19/81] ARM: dts: omap3-gta04: Fix graph_port warning Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 20/81] ARM: dts: n900: fix mmc1 card detect gpio polarity Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 21/81] ARM: dts: am335x-shc.dts: fix wrong cd pin level Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 22/81] arm64: dts: add msm8996 compatible to gicv3 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 23/81] batman-adv: release station info tidstats Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 24/81] MIPS: DTS: jz4740: Correct interrupt number of DMA core Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 25/81] DTS: CI20: Fix bugs in ci20's device tree Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 26/81] usb: phy: fix link errors Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 27/81] usb: dwc3: exynos: Fix error handling of clk_prepare_enable Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 28/81] irqchip/gic-v4: Fix occasional VLPI drop Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 29/81] sk_msg: Always cancel strp work before freeing the psock Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 30/81] irqchip/gic-v3-its: Gracefully fail on LPI exhaustion Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 31/81] irqchip/mmp: Only touch the PJ4 IRQ & FIQ bits on enable/disable Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 32/81] drm/amdgpu: Add missing power attribute to APU check Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 33/81] drm/radeon: check if device is root before getting pci speed caps Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 34/81] debugfs: return error values, not NULL Sasha Levin
2019-02-28 15:25   ` Greg Kroah-Hartman
2019-03-11 17:22     ` Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 35/81] debugfs: debugfs_lookup() should return NULL if not found Sasha Levin
2019-02-28 15:25   ` Greg Kroah-Hartman
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 36/81] binder: fix CONFIG_ANDROID_BINDER_DEVICES Sasha Levin
2019-02-28 15:24   ` Greg Kroah-Hartman
2019-03-11 17:23     ` Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 37/81] drm/amdgpu: Transfer fences to dmabuf importer Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 38/81] net: stmmac: Fallback to Platform Data clock in Watchdog conversion Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 39/81] net: stmmac: Send TSO packets always from Queue 0 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 40/81] net: stmmac: Disable EEE mode earlier in XMIT callback Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 41/81] irqchip/gic-v3-its: Fix ITT_entry_size accessor Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 42/81] relay: check return of create_buf_file() properly Sasha Levin
2019-02-28 15:26   ` Greg Kroah-Hartman
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 43/81] blk-mq: protect debugfs_create_files() from failures Sasha Levin
2019-02-28 15:26   ` Greg Kroah-Hartman
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 44/81] ath10k: correct bus type for WCN3990 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 45/81] bpf, selftests: fix handling of sparse CPU allocations Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 46/81] bpf: run bpf programs with preemption disabled Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 47/81] bpf: fix lockdep false positive in percpu_freelist Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 48/81] bpf: fix potential deadlock in bpf_prog_register Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 49/81] bpf: Fix syscall's stackmap lookup potential deadlock Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 50/81] drm/amdgpu: Implement doorbell self-ring for NBIO 7.4 Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 51/81] drm/amdgpu: fix the incorrect external id for raven series Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 52/81] drm/sun4i: tcon: Prepare and enable TCON channel 0 clock at init Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 53/81] dmaengine: at_xdmac: Fix wrongfull report of a channel as in use Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 54/81] vsock/virtio: fix kernel panic after device hot-unplug Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 55/81] vsock/virtio: reset connected sockets on device removal Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 56/81] dmaengine: dmatest: Abort test in case of mapping error Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 57/81] selftests: netfilter: fix config fragment CONFIG_NF_TABLES_INET Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 58/81] selftests: netfilter: add simple masq/redirect test cases Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 59/81] netfilter: nf_nat: skip nat clash resolution for same-origin entries Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 60/81] arm64: ptdump: Don't iterate kernel page tables using PTRS_PER_PXX Sasha Levin
2019-02-28 15:14   ` Will Deacon
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 61/81] s390/qeth: release cmd buffer in error paths Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 62/81] s390/qeth: fix use-after-free in error path Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 63/81] s390/qeth: cancel close_dev work before removing a card Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 64/81] s390/qeth: conclude all event processing before offlining " Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 65/81] perf symbols: Filter out hidden symbols from labels Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 66/81] perf trace: Support multiple "vfs_getname" probes Sasha Levin
2019-02-28 15:07 ` [PATCH AUTOSEL 4.20 67/81] MIPS: Loongson: Introduce and use loongson_llsc_mb() Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 68/81] MIPS: Remove function size check in get_frame_info() Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 69/81] Revert "scsi: libfc: Add WARN_ON() when deleting rports" Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 70/81] i2c: omap: Use noirq system sleep pm ops to idle device for suspend Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 71/81] drm/amdgpu: use spin_lock_irqsave to protect vm_manager.pasid_idr Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 72/81] drm/omap: dsi: Fix crash in DSI debug dumps Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 73/81] drm/omap: dsi: Fix OF platform depopulate Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 74/81] drm/omap: dsi: Hack-fix DSI bus flags Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 75/81] nvme: lock NS list changes while handling command effects Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 76/81] nvme-pci: fix rapid add remove sequence Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 77/81] fs: ratelimit __find_get_block_slow() failure message Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 78/81] qed: Fix EQ full firmware assert Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 79/81] qed: Consider TX tcs while deriving the max num_queues for PF Sasha Levin
2019-02-28 15:08 ` [PATCH AUTOSEL 4.20 80/81] qede: Fix system crash on configuring channels Sasha Levin
2019-02-28 15:08 ` Sasha Levin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190228150813.10256-81-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=bo.liu@linux.alibaba.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).