qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, mreitz@redhat.com, pbonzini@redhat.com,
	famz@redhat.com, stefanha@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 18/19] block: Allow graph changes in bdrv_drain_all_begin/end sections
Date: Wed, 11 Apr 2018 18:39:39 +0200	[thread overview]
Message-ID: <20180411163940.2523-19-kwolf@redhat.com> (raw)
In-Reply-To: <20180411163940.2523-1-kwolf@redhat.com>

bdrv_drain_all_*() used bdrv_next() to iterate over all root nodes and
did a subtree drain for each of them. This works fine as long as the
graph is static, but sadly, reality looks different.

If the graph changes so that root nodes are added or removed, we would
have to compensate for this. bdrv_next() returns each root node only
once even if it's the root node for multiple BlockBackends or for a
monitor-owned block driver tree, which would only complicate things.

The much easier and more obviously correct way is to fundamentally
change the way the functions work: Iterate over all BlockDriverStates,
no matter who owns them, and drain them individually. Compensation is
only necessary when a new BDS is created inside a drain_all section.
Removal of a BDS doesn't require any action because it's gone afterwards
anyway.

This change means that some nodes get a higher bs->quiesce_count now
because each node propagates its individual drain to all of its parents.
In the old subtree drain, propagation back to the parent that made the
recursive drain request was avoided. While this isn't perfectly
beautiful, accepting the higher counts seems preferable to adding drain
code to multiple other places that modify the graph.

The test case is changed to account for the higher counts where
necessary.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/block/block.h     |  1 +
 include/block/block_int.h |  1 +
 block.c                   | 20 +++++++++++++++-
 block/io.c                | 58 ++++++++++++++++++++++++++++++++++++-----------
 tests/test-bdrv-drain.c   | 37 +++++++++++++++++-------------
 5 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index de2cba2c74..0b59d519c5 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -412,6 +412,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
                                  Error **errp);
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
 BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
 
 typedef struct BdrvNextIterator {
     enum {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index dc6985e3ae..2c86a7b53f 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -804,6 +804,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
 
+extern unsigned int bdrv_drain_all_count;
 void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
 void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
 
diff --git a/block.c b/block.c
index 330238de19..c1b339c4a4 100644
--- a/block.c
+++ b/block.c
@@ -332,6 +332,10 @@ BlockDriverState *bdrv_new(void)
 
     qemu_co_queue_init(&bs->flush_queue);
 
+    for (i = 0; i < bdrv_drain_all_count; i++) {
+        bdrv_drained_begin(bs);
+    }
+
     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
 
     return bs;
@@ -1160,7 +1164,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
                             int open_flags, Error **errp)
 {
     Error *local_err = NULL;
-    int ret;
+    int i, ret;
 
     bdrv_assign_node_name(bs, node_name, &local_err);
     if (local_err) {
@@ -1208,6 +1212,12 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
     assert(bdrv_min_mem_align(bs) != 0);
     assert(is_power_of_2(bs->bl.request_alignment));
 
+    for (i = 0; i < bs->quiesce_counter; i++) {
+        if (drv->bdrv_co_drain_begin) {
+            drv->bdrv_co_drain_begin(bs);
+        }
+    }
+
     return 0;
 open_failed:
     bs->drv = NULL;
@@ -4034,6 +4044,14 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
     return QTAILQ_NEXT(bs, node_list);
 }
 
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
+{
+    if (!bs) {
+        return QTAILQ_FIRST(&all_bdrv_states);
+    }
+    return QTAILQ_NEXT(bs, bs_list);
+}
+
 const char *bdrv_get_node_name(const BlockDriverState *bs)
 {
     return bs->node_name;
diff --git a/block/io.c b/block/io.c
index db6810b35f..af65c3ec2f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -38,6 +38,8 @@
 /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
 #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
 
+static AioWait drain_all_aio_wait;
+
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int64_t offset, int bytes, BdrvRequestFlags flags);
 
@@ -445,6 +447,29 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
     }
 }
 
+unsigned int bdrv_drain_all_count = 0;
+
+static bool bdrv_drain_all_poll(void)
+{
+    BlockDriverState *bs = NULL;
+    bool result = false;
+
+    /* Execute pending BHs first (may modify the graph) and check everything
+     * else only after the BHs have executed. */
+    while (aio_poll(qemu_get_aio_context(), false));
+
+    /* bdrv_drain_poll() with top_level = false can't make changes to the
+     * graph, so iterating bdrv_next_all_states() is safe. */
+    while ((bs = bdrv_next_all_states(bs))) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(aio_context);
+        result |= bdrv_drain_poll(bs, false, false);
+        aio_context_release(aio_context);
+    }
+
+    return result;
+}
+
 /*
  * Wait for pending requests to complete across all BlockDriverStates
  *
@@ -459,45 +484,51 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
  */
 void bdrv_drain_all_begin(void)
 {
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    BlockDriverState *bs = NULL;
 
     if (qemu_in_coroutine()) {
         bdrv_co_yield_to_drain(NULL, true, false, NULL, true);
         return;
     }
 
-    /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
-     * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
-     * nodes in several different AioContexts, so make sure we're in the main
-     * context. */
+    /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
+     * loop AioContext, so make sure we're in the main context. */
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    assert(bdrv_drain_all_count < INT_MAX);
+    bdrv_drain_all_count++;
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    /* Quiesce all nodes, without polling in-flight requests yet. The graph
+     * cannot change during this loop. */
+    while ((bs = bdrv_next_all_states(bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
-        bdrv_do_drained_begin(bs, true, NULL, true);
+        bdrv_do_drained_begin(bs, false, NULL, false);
         aio_context_release(aio_context);
     }
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    /* Now poll the in-flight requests */
+    AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
+
+    while ((bs = bdrv_next_all_states(bs))) {
         bdrv_drain_assert_idle(bs);
     }
 }
 
 void bdrv_drain_all_end(void)
 {
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    BlockDriverState *bs = NULL;
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    while ((bs = bdrv_next_all_states(bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
-        bdrv_do_drained_end(bs, true, NULL);
+        bdrv_do_drained_end(bs, false, NULL);
         aio_context_release(aio_context);
     }
+
+    assert(bdrv_drain_all_count > 0);
+    bdrv_drain_all_count--;
 }
 
 void bdrv_drain_all(void)
@@ -620,6 +651,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
 void bdrv_wakeup(BlockDriverState *bs)
 {
     aio_wait_kick(bdrv_get_aio_wait(bs));
+    aio_wait_kick(&drain_all_aio_wait);
 }
 
 void bdrv_dec_in_flight(BlockDriverState *bs)
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 79d845ecbb..97ca0743c6 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -174,7 +174,8 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
     }
 }
 
-static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
+static void test_drv_cb_common(enum drain_type drain_type, int top_drain_count,
+                               int backing_drain_count)
 {
     BlockBackend *blk;
     BlockDriverState *bs, *backing;
@@ -205,8 +206,8 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 
     do_drain_begin(drain_type, bs);
 
-    g_assert_cmpint(s->drain_count, ==, 1);
-    g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
+    g_assert_cmpint(s->drain_count, ==, top_drain_count);
+    g_assert_cmpint(backing_s->drain_count, ==, backing_drain_count);
 
     do_drain_end(drain_type, bs);
 
@@ -225,8 +226,8 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
     do_drain_begin(drain_type, bs);
 
     g_assert_cmpint(aio_ret, ==, 0);
-    g_assert_cmpint(s->drain_count, ==, 1);
-    g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
+    g_assert_cmpint(s->drain_count, ==, top_drain_count);
+    g_assert_cmpint(backing_s->drain_count, ==, backing_drain_count);
 
     do_drain_end(drain_type, bs);
 
@@ -240,17 +241,17 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 
 static void test_drv_cb_drain_all(void)
 {
-    test_drv_cb_common(BDRV_DRAIN_ALL, true);
+    test_drv_cb_common(BDRV_DRAIN_ALL, 2, 1);
 }
 
 static void test_drv_cb_drain(void)
 {
-    test_drv_cb_common(BDRV_DRAIN, false);
+    test_drv_cb_common(BDRV_DRAIN, 1, 0);
 }
 
 static void test_drv_cb_drain_subtree(void)
 {
-    test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
+    test_drv_cb_common(BDRV_SUBTREE_DRAIN, 1, 1);
 }
 
 static void test_drv_cb_co_drain_all(void)
@@ -268,7 +269,9 @@ static void test_drv_cb_co_drain_subtree(void)
     call_in_coroutine(test_drv_cb_drain_subtree);
 }
 
-static void test_quiesce_common(enum drain_type drain_type, bool recursive)
+static void test_quiesce_common(enum drain_type drain_type,
+                                int top_quiesce_count,
+                                int backing_quiesce_count)
 {
     BlockBackend *blk;
     BlockDriverState *bs, *backing;
@@ -286,8 +289,8 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
 
     do_drain_begin(drain_type, bs);
 
-    g_assert_cmpint(bs->quiesce_counter, ==, 1);
-    g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
+    g_assert_cmpint(bs->quiesce_counter, ==, top_quiesce_count);
+    g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce_count);
 
     do_drain_end(drain_type, bs);
 
@@ -301,17 +304,17 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
 
 static void test_quiesce_drain_all(void)
 {
-    test_quiesce_common(BDRV_DRAIN_ALL, true);
+    test_quiesce_common(BDRV_DRAIN_ALL, 2, 1);
 }
 
 static void test_quiesce_drain(void)
 {
-    test_quiesce_common(BDRV_DRAIN, false);
+    test_quiesce_common(BDRV_DRAIN, 1, 0);
 }
 
 static void test_quiesce_drain_subtree(void)
 {
-    test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
+    test_quiesce_common(BDRV_SUBTREE_DRAIN, 1, 1);
 }
 
 static void test_quiesce_co_drain_all(void)
@@ -348,6 +351,8 @@ static void test_nested(void)
 
     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
+            int top_quiesce = 2 + (outer == BDRV_DRAIN_ALL) +
+                                  (inner == BDRV_DRAIN_ALL);
             int backing_quiesce = (outer != BDRV_DRAIN) +
                                   (inner != BDRV_DRAIN);
 
@@ -359,9 +364,9 @@ static void test_nested(void)
             do_drain_begin(outer, bs);
             do_drain_begin(inner, bs);
 
-            g_assert_cmpint(bs->quiesce_counter, ==, 2);
+            g_assert_cmpint(bs->quiesce_counter, ==, top_quiesce);
             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
-            g_assert_cmpint(s->drain_count, ==, 2);
+            g_assert_cmpint(s->drain_count, ==, top_quiesce);
             g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
 
             do_drain_end(inner, bs);
-- 
2.13.6

  parent reply	other threads:[~2018-04-11 16:40 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-11 16:39 [Qemu-devel] [PATCH 00/19] Drain fixes and cleanups, part 3 Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 01/19] test-bdrv-drain: bdrv_drain() works with cross-AioContext events Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 02/19] block: Use bdrv_do_drain_begin/end in bdrv_drain_all() Kevin Wolf
2018-04-20  7:07   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 03/19] block: Remove 'recursive' parameter from bdrv_drain_invoke() Kevin Wolf
2018-04-20  7:09   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 04/19] block: Don't manually poll in bdrv_drain_all() Kevin Wolf
2018-04-11 18:32   ` Eric Blake
2018-04-20  7:11   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 05/19] tests/test-bdrv-drain: bdrv_drain_all() works in coroutines now Kevin Wolf
2018-04-11 18:33   ` Eric Blake
2018-04-20  7:12   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 06/19] block: Avoid unnecessary aio_poll() in AIO_WAIT_WHILE() Kevin Wolf
2018-04-11 17:33   ` Su Hang
2018-04-20  7:17   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 07/19] block: Really pause block jobs on drain Kevin Wolf
2018-04-12  8:37   ` Paolo Bonzini
2018-04-12  9:51     ` Kevin Wolf
2018-04-12 10:12       ` Paolo Bonzini
2018-04-12 11:11         ` Kevin Wolf
2018-04-12 11:30           ` Paolo Bonzini
2018-04-12 11:53             ` Kevin Wolf
2018-04-12 12:02               ` Paolo Bonzini
2018-04-12 13:27                 ` Kevin Wolf
2018-04-12 13:42                   ` Paolo Bonzini
2018-04-12 14:25                     ` Kevin Wolf
2018-04-12 20:44                       ` Paolo Bonzini
2018-04-13  8:01                         ` Kevin Wolf
2018-04-13 11:05                           ` [Qemu-devel] [Qemu-block] " Paolo Bonzini
2018-04-13 12:40                             ` Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 08/19] block: Remove bdrv_drain_recurse() Kevin Wolf
2018-04-12  8:39   ` Paolo Bonzini
2018-04-20  7:20   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 09/19] test-bdrv-drain: Add test for node deletion Kevin Wolf
2018-04-20  7:32   ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2018-04-11 16:39 ` [Qemu-devel] [PATCH 10/19] block: Drain recursively with a single BDRV_POLL_WHILE() Kevin Wolf
2018-04-12  8:41   ` Paolo Bonzini
2018-04-11 16:39 ` [Qemu-devel] [PATCH 11/19] test-bdrv-drain: Test node deletion in subtree recursion Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 12/19] block: Don't poll in parent drain callbacks Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 13/19] test-bdrv-drain: Graph change through parent callback Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 14/19] block: Defer .bdrv_drain_begin callback to polling phase Kevin Wolf
2018-06-27 14:30   ` Max Reitz
2018-06-29 15:14     ` Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 15/19] test-bdrv-drain: Test that bdrv_drain_invoke() doesn't poll Kevin Wolf
2018-04-11 16:39 ` [Qemu-devel] [PATCH 16/19] block: Allow AIO_WAIT_WHILE with NULL ctx Kevin Wolf
2018-04-12  8:43   ` Paolo Bonzini
2018-04-11 16:39 ` [Qemu-devel] [PATCH 17/19] block: Move bdrv_drain_all_begin() out of coroutine context Kevin Wolf
2018-04-11 16:39 ` Kevin Wolf [this message]
2018-04-12  8:47   ` [Qemu-devel] [PATCH 18/19] block: Allow graph changes in bdrv_drain_all_begin/end sections Paolo Bonzini
2018-04-11 16:39 ` [Qemu-devel] [PATCH 19/19] test-bdrv-drain: Test graph changes in drain_all section Kevin Wolf
2018-04-11 17:05 ` [Qemu-devel] [PATCH 00/19] Drain fixes and cleanups, part 3 no-reply
2018-04-20  7:35 ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180411163940.2523-19-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=famz@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).