public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Uday Shankar <ushankar@purestorage.com>
To: Ming Lei <ming.lei@redhat.com>,
	 Caleb Sander Mateos <csander@purestorage.com>,
	Jens Axboe <axboe@kernel.dk>,  Shuah Khan <shuah@kernel.org>
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	 linux-kselftest@vger.kernel.org,
	Uday Shankar <ushankar@purestorage.com>
Subject: [PATCH 2/2] selftests: ublk: test that teardown after incomplete recovery completes
Date: Fri, 03 Apr 2026 21:23:56 -0600	[thread overview]
Message-ID: <20260403-cancel-v1-2-86e5a6b3d3af@purestorage.com> (raw)
In-Reply-To: <20260403-cancel-v1-0-86e5a6b3d3af@purestorage.com>

Before the fix, teardown of a ublk server that was attempting to recover
a device, but died when it had submitted a nonempty proper subset of the
fetch commands to any queue would loop forever. Add a test to verify
that, after the fix, teardown completes. This is done by:

- Adding a new argument to the fault_inject target that causes it die
  after fetching a nonempty proper subset of the IOs to a queue
- Using that argument in a new test while trying to recover an
  already-created device
- Attempting to delete the ublk device at the end of the test; this
  hangs forever if teardown from the fault-injected ublk server never
  completed.

It was manually verified that the test passes with the fix and hangs
without it.

Signed-off-by: Uday Shankar <ushankar@purestorage.com>
---
 tools/testing/selftests/ublk/Makefile           |  1 +
 tools/testing/selftests/ublk/fault_inject.c     | 51 +++++++++++++++++++++++--
 tools/testing/selftests/ublk/kublk.c            |  4 ++
 tools/testing/selftests/ublk/kublk.h            |  3 ++
 tools/testing/selftests/ublk/test_generic_17.sh | 35 +++++++++++++++++
 5 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 8ac2d4a682a1768fb1eb9d2dd2a5d01294a67a03..d338668c5a5fbd73f6d70165455a3551ab13e894 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += test_generic_10.sh
 TEST_PROGS += test_generic_12.sh
 TEST_PROGS += test_generic_13.sh
 TEST_PROGS += test_generic_16.sh
+TEST_PROGS += test_generic_17.sh
 
 TEST_PROGS += test_batch_01.sh
 TEST_PROGS += test_batch_02.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 3b897f69c014cc73b4b469d816e80284dd21b577..228a9605053409c84baaf255f97c4abc271a8bfd 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -10,11 +10,17 @@
 
 #include "kublk.h"
 
+struct fi_opts {
+	long long delay_ns;
+	bool die_during_fetch;
+};
+
 static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
 				      struct ublk_dev *dev)
 {
 	const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
 	unsigned long dev_size = 250UL << 30;
+	struct fi_opts *opts = NULL;
 
 	if (ctx->auto_zc_fallback) {
 		ublk_err("%s: not support auto_zc_fallback\n", __func__);
@@ -35,17 +41,51 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
 	};
 	ublk_set_integrity_params(ctx, &dev->tgt.params);
 
-	dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
+	opts = calloc(1, sizeof(*opts));
+	if (!opts) {
+		ublk_err("%s: couldn't allocate memory for opts\n", __func__);
+		return -ENOMEM;
+	}
+
+	opts->delay_ns = ctx->fault_inject.delay_us * 1000;
+	opts->die_during_fetch = ctx->fault_inject.die_during_fetch;
+	dev->private_data = opts;
+
 	return 0;
 }
 
+static void ublk_fault_inject_pre_fetch_io(struct ublk_thread *t,
+					   struct ublk_queue *q, int tag)
+{
+	struct fi_opts *opts = q->dev->private_data;
+
+	if (!opts->die_during_fetch)
+		return;
+
+	/*
+	 * Each queue fetches its IOs in increasing order of tags, so
+	 * dying just before we're about to fetch tag 1 (regardless of
+	 * what queue we're on) guarantees that we've fetched a nonempty
+	 * proper subset of the tags on that queue.
+	 */
+	if (tag == 1) {
+		/*
+		 * Ensure our commands are actually live in the kernel
+		 * before we die.
+		 */
+		io_uring_submit(&t->ring);
+		raise(SIGKILL);
+	}
+}
+
 static int ublk_fault_inject_queue_io(struct ublk_thread *t,
 				      struct ublk_queue *q, int tag)
 {
 	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
 	struct io_uring_sqe *sqe;
+	struct fi_opts *opts = q->dev->private_data;
 	struct __kernel_timespec ts = {
-		.tv_nsec = (long long)q->dev->private_data,
+		.tv_nsec = opts->delay_ns,
 	};
 
 	ublk_io_alloc_sqes(t, &sqe, 1);
@@ -77,29 +117,34 @@ static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv
 {
 	static const struct option longopts[] = {
 		{ "delay_us", 	1,	NULL,  0  },
+		{ "die_during_fetch", 1, NULL, 0  },
 		{ 0, 0, 0, 0 }
 	};
 	int option_idx, opt;
 
 	ctx->fault_inject.delay_us = 0;
+	ctx->fault_inject.die_during_fetch = false;
 	while ((opt = getopt_long(argc, argv, "",
 				  longopts, &option_idx)) != -1) {
 		switch (opt) {
 		case 0:
 			if (!strcmp(longopts[option_idx].name, "delay_us"))
 				ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
+			if (!strcmp(longopts[option_idx].name, "die_during_fetch"))
+				ctx->fault_inject.die_during_fetch = strtoll(optarg, NULL, 10);
 		}
 	}
 }
 
 static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
 {
-	printf("\tfault_inject: [--delay_us us (default 0)]\n");
+	printf("\tfault_inject: [--delay_us us (default 0)] [--die_during_fetch 1]\n");
 }
 
 const struct ublk_tgt_ops fault_inject_tgt_ops = {
 	.name = "fault_inject",
 	.init_tgt = ublk_fault_inject_tgt_init,
+	.pre_fetch_io = ublk_fault_inject_pre_fetch_io,
 	.queue_io = ublk_fault_inject_queue_io,
 	.tgt_io_done = ublk_fault_inject_tgt_io_done,
 	.parse_cmd_line = ublk_fault_inject_cmd_line,
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index e1c3b3c55e565c8cad6b6fe9b9b764cd244818c0..8260c96a39c05584065f41a52f3d9050614454c6 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -796,6 +796,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
 			q = &t->dev->q[q_id];
 			io = &q->ios[tag];
 			io->buf_index = j++;
+			if (q->tgt_ops->pre_fetch_io)
+				q->tgt_ops->pre_fetch_io(t, q, tag);
 			ublk_queue_io_cmd(t, io);
 		}
 	} else {
@@ -807,6 +809,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
 		for (i = 0; i < q->q_depth; i++) {
 			io = &q->ios[i];
 			io->buf_index = i;
+			if (q->tgt_ops->pre_fetch_io)
+				q->tgt_ops->pre_fetch_io(t, q, i);
 			ublk_queue_io_cmd(t, io);
 		}
 	}
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 02f0c55d006b4c791fea4456687d0d8757be7be2..f784946144ad3d6e347a867fb1389a8e057766a0 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -60,6 +60,7 @@ struct stripe_ctx {
 struct fault_inject_ctx {
 	/* fault_inject */
 	unsigned long   delay_us;
+	bool die_during_fetch;
 };
 
 struct dev_ctx {
@@ -138,6 +139,8 @@ struct ublk_tgt_ops {
 	int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
 	void (*deinit_tgt)(struct ublk_dev *);
 
+	void (*pre_fetch_io)(struct ublk_thread *t, struct ublk_queue *q,
+			     int tag);
 	int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
 	void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
 			    const struct io_uring_cqe *);
diff --git a/tools/testing/selftests/ublk/test_generic_17.sh b/tools/testing/selftests/ublk/test_generic_17.sh
new file mode 100755
index 0000000000000000000000000000000000000000..2278b5fc9dba523bb1bf8411d22f4f95a28da905
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_17.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_prep_test "fault_inject" "teardown after incomplete recovery"
+
+# First start and stop a ublk server with device configured for recovery
+dev_id=$(_add_ublk_dev -t fault_inject -r 1)
+_check_add_dev $TID $?
+state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+if [ "$state" != "QUIESCED" ]; then
+        echo "device isn't quiesced($state) after $action"
+        ERR_CODE=255
+fi
+
+# Then recover the device, but use --die_during_fetch to have the ublk
+# server die while a queue has some (but not all) I/Os fetched
+${UBLK_PROG} recover -n "${dev_id}" --foreground -t fault_inject --die_during_fetch 1
+RECOVER_RES=$?
+# 137 is the result when dying of SIGKILL
+if (( RECOVER_RES != 137 )); then
+        echo "recover command exited with unexpected code ${RECOVER_RES}!"
+        ERR_CODE=255
+fi
+
+# Clean up the device. This can only succeed once teardown of the above
+# exited ublk server completes. So if teardown never completes, we will
+# time out here
+_ublk_del_dev "${dev_id}"
+
+_cleanup_test "fault_inject"
+_show_result $TID $ERR_CODE

-- 
2.34.1


  parent reply	other threads:[~2026-04-04  3:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-04  3:23 [PATCH 0/2] ublk: fix infinite loop in ublk server teardown Uday Shankar
2026-04-04  3:23 ` [PATCH 1/2] ublk: reset per-IO canceled flag on each fetch Uday Shankar
2026-04-04 13:28   ` Ming Lei
2026-04-05  1:02     ` zhang
2026-04-06  3:16       ` Ming Lei
2026-04-06  3:37         ` zhang
2026-04-04  3:23 ` Uday Shankar [this message]
2026-04-04 13:33   ` [PATCH 2/2] selftests: ublk: test that teardown after incomplete recovery completes Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260403-cancel-v1-2-86e5a6b3d3af@purestorage.com \
    --to=ushankar@purestorage.com \
    --cc=axboe@kernel.dk \
    --cc=csander@purestorage.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=shuah@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox