From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, hreitz@redhat.com, jsnow@redhat.com,
qemu-devel@nongnu.org, qemu-stable@nongnu.org
Subject: [PATCH 2/7] block: Add blk_co_start/end_request() and BDRV_REQ_NO_QUEUE
Date: Tue, 21 Apr 2026 18:11:27 +0200 [thread overview]
Message-ID: <20260421161132.99878-3-kwolf@redhat.com> (raw)
In-Reply-To: <20260421161132.99878-1-kwolf@redhat.com>
If a device uses blk_inc/dec_in_flight() in order to build macro
operations that involve multiple requests for the block layer and that
need to be completed as a unit before the BlockBackend can be considered
drained, it sets the stage for a deadlock: When a drain is requested,
the inner request at the BlockBackend level will be queued in
blk_wait_while_drained() and wait until the drained section ends, but at
the same time, drain_begin can only return if the whole macro operation
at the device level has completed.
Introduce a new interface to allow implementing the logic correctly:
Instead of queueing individual requests, blk_co_start_request() calls
blk_wait_while_drained() once at the beginning. The individual requests
must then set BDRV_REQ_NO_QUEUE to avoid being queued and running into
the deadlock; being wrapped in blk_co_start/end_request() makes sure
that drain_begin waits for them and they don't sneak in when the
BlockBackend is supposed to already be quiescent.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/block-common.h | 11 ++++++++-
include/system/block-backend-io.h | 2 ++
block/block-backend.c | 38 +++++++++++++++++++++++--------
3 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/include/block/block-common.h b/include/block/block-common.h
index c8c626daeaa..895ea175413 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -215,8 +215,17 @@ typedef enum {
*/
BDRV_REQ_NO_WAIT = 0x400,
+ /*
+ * Used between blk_co_start_request() and blk_end_request() to avoid
+ * that the request waits in a drained BlockBackend until the drained
+ * section ends. Waiting would cause a deadlock because drain waits for
+ * blk_end_request() to be called, but the request never completes
+ * because it waits for the drain to end.
+ */
+ BDRV_REQ_NO_QUEUE = 0x800,
+
/* Mask of valid flags */
- BDRV_REQ_MASK = 0x7ff,
+ BDRV_REQ_MASK = 0xfff,
} BdrvRequestFlags;
#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
diff --git a/include/system/block-backend-io.h b/include/system/block-backend-io.h
index 6d5ac476fc0..0248c1c36e2 100644
--- a/include/system/block-backend-io.h
+++ b/include/system/block-backend-io.h
@@ -71,6 +71,8 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
void blk_inc_in_flight(BlockBackend *blk);
void blk_dec_in_flight(BlockBackend *blk);
+void coroutine_fn blk_co_start_request(BlockBackend *blk);
+void blk_end_request(BlockBackend *blk);
bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk);
bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index 99446571201..ee00440e28d 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -82,6 +82,7 @@ struct BlockBackend {
QemuMutex queued_requests_lock; /* protects queued_requests */
CoQueue queued_requests;
bool disable_request_queuing; /* atomic */
+ int start_request_count; /* atomic */
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
@@ -1306,10 +1307,16 @@ bool blk_in_drain(BlockBackend *blk)
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk,
+ BdrvRequestFlags flags)
{
assert(blk->in_flight > 0);
+ if (flags & BDRV_REQ_NO_QUEUE) {
+ assert(qatomic_read(&blk->start_request_count));
+ return;
+ }
+
if (qatomic_read(&blk->quiesce_counter) &&
!qatomic_read(&blk->disable_request_queuing)) {
/*
@@ -1335,7 +1342,7 @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockDriverState *bs;
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, flags);
GRAPH_RDLOCK_GUARD();
/* Call blk_bs() only after waiting, the graph may have changed */
@@ -1410,7 +1417,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockDriverState *bs;
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, flags);
GRAPH_RDLOCK_GUARD();
/* Call blk_bs() only after waiting, the graph may have changed */
@@ -1523,6 +1530,19 @@ void blk_dec_in_flight(BlockBackend *blk)
aio_wait_kick();
}
+void coroutine_fn blk_co_start_request(BlockBackend *blk)
+{
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk, 0);
+ qatomic_inc(&blk->start_request_count);
+}
+
+void blk_end_request(BlockBackend *blk)
+{
+ qatomic_dec(&blk->start_request_count);
+ blk_dec_in_flight(blk);
+}
+
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
@@ -1741,7 +1761,7 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
if (!blk_co_is_available(blk)) {
@@ -1788,7 +1808,7 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
int ret;
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
ret = blk_check_byte_request(blk, offset, bytes);
@@ -1834,7 +1854,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
{
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
if (!blk_co_is_available(blk)) {
@@ -2009,7 +2029,7 @@ int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
IO_CODE();
blk_inc_in_flight(blk); /* increase before waiting */
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
@@ -2034,7 +2054,7 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
IO_CODE();
blk_inc_in_flight(blk);
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
ret = blk_check_byte_request(blk, offset, len);
@@ -2058,7 +2078,7 @@ int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
IO_CODE();
blk_inc_in_flight(blk);
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, flags);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
--
2.53.0
next prev parent reply other threads:[~2026-04-21 16:13 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-21 16:11 [PATCH 0/7] ide: Fix deadlock between TRIM and drain Kevin Wolf
2026-04-21 16:11 ` [PATCH 1/7] blkdebug: Add 'delay-ns' option Kevin Wolf
2026-04-21 16:11 ` Kevin Wolf [this message]
2026-04-21 16:11 ` [PATCH 3/7] block: Add flags parameter to blk_*_pdiscard() Kevin Wolf
2026-04-21 16:11 ` [PATCH 4/7] ide: Minimal fix for deadlock between TRIM and drain Kevin Wolf
2026-04-21 16:11 ` [PATCH 5/7] ide: Clean up ide_trim_co_entry() to be idiomatic coroutine code Kevin Wolf
2026-04-21 16:11 ` [PATCH 6/7] ide-test: Factor out wait_dma_completion() Kevin Wolf
2026-04-21 16:11 ` [PATCH 7/7] ide-test: Test reset during TRIM Kevin Wolf
2026-05-12 11:58 ` [PATCH 0/7] ide: Fix deadlock between TRIM and drain Kevin Wolf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260421161132.99878-3-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=hreitz@redhat.com \
--cc=jsnow@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=qemu-stable@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.