From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, vsementsov@virtuozzo.com, den@virtuozzo.com,
qemu-devel@nongnu.org, mreitz@redhat.com,
dplotnikov@virtuozzo.com
Subject: [Qemu-devel] [PATCH 4/4] block-backend: Queue requests while drained
Date: Thu, 25 Jul 2019 18:27:04 +0200 [thread overview]
Message-ID: <20190725162704.12622-5-kwolf@redhat.com> (raw)
In-Reply-To: <20190725162704.12622-1-kwolf@redhat.com>
This fixes device like IDE that can still start new requests from I/O
handlers in the CPU thread while the block backend is drained.
The basic assumption is that in a drain section, no new requests should
be allowed through a BlockBackend (blk_drained_begin/end don't exist,
we get drain sections only on the node level). However, there are two
special cases where requests should not be queued:
1. Block jobs: We already make sure that block jobs are paused in a
drain section, so they won't start new requests. However, if the
drain_begin is called on the job's BlockBackend first, it can happen
that we deadlock because the job stays busy until it reaches a pause
point - which it can't if it's requests aren't processed any more.
The proper solution here would be to make all requests through the
job's filter node instead of using a BlockBackend. For now, just
disabling request queuin on the job BlockBackend is simpler.
2. In test cases where making requests through bdrv_* would be
cumbersome because we'd need a BdrvChild. As we already got the
functionality to disable request queuing from 1., use it in tests,
too, for convenience.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/sysemu/block-backend.h | 11 +++---
block/backup.c | 1 +
block/block-backend.c | 69 +++++++++++++++++++++++++++++-----
block/commit.c | 2 +
block/mirror.c | 6 ++-
blockjob.c | 3 ++
tests/test-bdrv-drain.c | 1 +
7 files changed, 76 insertions(+), 17 deletions(-)
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 7320b58467..d453a4e9a1 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -104,6 +104,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
void blk_iostatus_enable(BlockBackend *blk);
bool blk_iostatus_is_enabled(const BlockBackend *blk);
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
@@ -119,10 +120,10 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
+ BdrvRequestFlags flags, bool wait_while_drained);
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, bool wait_while_drained);
static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
unsigned int bytes, void *buf,
@@ -130,7 +131,7 @@ static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
- return blk_co_preadv(blk, offset, bytes, &qiov, flags);
+ return blk_co_preadv(blk, offset, bytes, &qiov, flags, true);
}
static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
@@ -139,7 +140,7 @@ static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
- return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
+ return blk_co_pwritev(blk, offset, bytes, &qiov, flags, true);
}
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
diff --git a/block/backup.c b/block/backup.c
index 715e1d3be8..f66b2f4ee7 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -635,6 +635,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
if (ret < 0) {
goto error;
}
+ blk_set_disable_request_queuing(job->target, true);
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
diff --git a/block/block-backend.c b/block/block-backend.c
index fdd6b01ecf..603b281743 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -79,6 +79,9 @@ struct BlockBackend {
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
int quiesce_counter;
+ CoQueue queued_requests;
+ bool disable_request_queuing;
+
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
@@ -339,6 +342,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
block_acct_init(&blk->stats);
+ qemu_co_queue_init(&blk->queued_requests);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
QLIST_INIT(&blk->aio_notifiers);
@@ -1096,6 +1100,11 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
blk->allow_aio_context_change = allow;
}
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
+{
+ blk->disable_request_queuing = disable;
+}
+
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
size_t size)
{
@@ -1127,13 +1136,26 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return 0;
}
+static void blk_wait_while_drained(BlockBackend *blk)
+{
+ if (blk->quiesce_counter && !blk->disable_request_queuing) {
+ qemu_co_queue_wait(&blk->queued_requests, NULL);
+ }
+}
+
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+ BdrvRequestFlags flags, bool wait_while_drained)
{
int ret;
- BlockDriverState *bs = blk_bs(blk);
+ BlockDriverState *bs;
+ if (wait_while_drained) {
+ blk_wait_while_drained(blk);
+ }
+
+ /* Call blk_bs() only after waiting, the graph may have changed */
+ bs = blk_bs(blk);
trace_blk_co_preadv(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
@@ -1156,11 +1178,17 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+ BdrvRequestFlags flags, bool wait_while_drained)
{
int ret;
- BlockDriverState *bs = blk_bs(blk);
+ BlockDriverState *bs;
+
+ if (wait_while_drained) {
+ blk_wait_while_drained(blk);
+ }
+ /* Call blk_bs() only after waiting, the graph may have changed */
+ bs = blk_bs(blk);
trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
@@ -1198,7 +1226,7 @@ static void blk_read_entry(void *opaque)
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
+ qiov, rwco->flags, true);
aio_wait_kick();
}
@@ -1208,7 +1236,7 @@ static void blk_write_entry(void *opaque)
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
+ qiov, rwco->flags, true);
aio_wait_kick();
}
@@ -1349,9 +1377,15 @@ static void blk_aio_read_entry(void *opaque)
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
+ if (rwco->blk->quiesce_counter) {
+ blk_dec_in_flight(rwco->blk);
+ blk_wait_while_drained(rwco->blk);
+ blk_inc_in_flight(rwco->blk);
+ }
+
assert(qiov->size == acb->bytes);
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ qiov, rwco->flags, false);
blk_aio_complete(acb);
}
@@ -1361,9 +1395,15 @@ static void blk_aio_write_entry(void *opaque)
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
+ if (rwco->blk->quiesce_counter) {
+ blk_dec_in_flight(rwco->blk);
+ blk_wait_while_drained(rwco->blk);
+ blk_inc_in_flight(rwco->blk);
+ }
+
assert(!qiov || qiov->size == acb->bytes);
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ qiov, rwco->flags, false);
blk_aio_complete(acb);
}
@@ -1482,6 +1522,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ blk_wait_while_drained(blk);
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1522,7 +1564,11 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
- int ret = blk_check_byte_request(blk, offset, bytes);
+ int ret;
+
+ blk_wait_while_drained(blk);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
@@ -2004,7 +2050,7 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
return blk_co_pwritev(blk, offset, bytes, NULL,
- flags | BDRV_REQ_ZERO_WRITE);
+ flags | BDRV_REQ_ZERO_WRITE, true);
}
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
@@ -2232,6 +2278,9 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
if (blk->dev_ops && blk->dev_ops->drained_end) {
blk->dev_ops->drained_end(blk->dev_opaque);
}
+ while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+ /* Resume all queued requests */
+ }
}
}
diff --git a/block/commit.c b/block/commit.c
index 2c5a6d4ebc..408ae15389 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -350,6 +350,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
+ blk_set_disable_request_queuing(s->base, true);
s->base_bs = base;
/* Required permissions are already taken with block_job_add_bdrv() */
@@ -358,6 +359,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
+ blk_set_disable_request_queuing(s->top, true);
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
diff --git a/block/mirror.c b/block/mirror.c
index 7483051f8d..8d0a3a987d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -231,7 +231,8 @@ static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret)
return;
}
- ret = blk_co_pwritev(s->target, op->offset, op->qiov.size, &op->qiov, 0);
+ ret = blk_co_pwritev(s->target, op->offset, op->qiov.size, &op->qiov, 0,
+ false);
mirror_write_complete(op, ret);
}
@@ -1237,7 +1238,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
switch (method) {
case MIRROR_METHOD_COPY:
ret = blk_co_pwritev(job->target, dirty_offset, dirty_bytes,
- qiov ? &target_qiov : NULL, flags);
+ qiov ? &target_qiov : NULL, flags, false);
break;
case MIRROR_METHOD_ZERO:
@@ -1624,6 +1625,7 @@ static BlockJob *mirror_start_job(
blk_set_force_allow_inactivate(s->target);
}
blk_set_allow_aio_context_change(s->target, true);
+ blk_set_disable_request_queuing(s->target, true);
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
diff --git a/blockjob.c b/blockjob.c
index 20b7f557da..73d9f1ba2b 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -445,6 +445,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
+ /* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
+ * The job reports that it's busy until it reaches a pause point. */
+ blk_set_disable_request_queuing(blk, true);
blk_set_allow_aio_context_change(blk, true);
/* Only set speed when necessary to avoid NotSupported error */
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 03fa1142a1..3fcf7c1c95 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -677,6 +677,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
&error_abort);
s = bs->opaque;
blk_insert_bs(blk, bs, &error_abort);
+ blk_set_disable_request_queuing(blk, true);
blk_set_aio_context(blk, ctx_a, &error_abort);
aio_context_acquire(ctx_a);
--
2.20.1
next prev parent reply other threads:[~2019-07-25 16:28 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-25 16:27 [Qemu-devel] [PATCH 0/4] block-backend: Queue requests while drained Kevin Wolf
2019-07-25 16:27 ` [Qemu-devel] [PATCH 1/4] block: Remove blk_pread_unthrottled() Kevin Wolf
2019-07-26 9:18 ` Max Reitz
2019-07-25 16:27 ` [Qemu-devel] [PATCH 2/4] block: Reduce (un)drains when replacing a child Kevin Wolf
2019-07-25 16:27 ` [Qemu-devel] [PATCH 3/4] mirror: Keep target drained until graph changes are done Kevin Wolf
2019-07-25 17:03 ` Eric Blake
2019-07-26 9:52 ` Max Reitz
2019-07-26 11:36 ` Kevin Wolf
2019-07-26 12:30 ` Max Reitz
2019-07-25 16:27 ` Kevin Wolf [this message]
2019-07-25 17:06 ` [Qemu-devel] [PATCH 4/4] block-backend: Queue requests while drained Eric Blake
2019-07-26 10:50 ` Max Reitz
2019-07-26 11:49 ` Kevin Wolf
2019-07-26 12:34 ` Max Reitz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190725162704.12622-5-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=den@virtuozzo.com \
--cc=dplotnikov@virtuozzo.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=vsementsov@virtuozzo.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).