* [PATCH 1/2] aio-posix: do not nest poll handlers
2023-05-02 18:41 [PATCH 0/2] aio-posix: do not nest poll handlers Stefan Hajnoczi
@ 2023-05-02 18:41 ` Stefan Hajnoczi
2023-05-02 18:41 ` [PATCH 2/2] tested: add test for nested aio_poll() in " Stefan Hajnoczi
2023-05-17 16:07 ` [PATCH 0/2] aio-posix: do not nest " Kevin Wolf
2 siblings, 0 replies; 4+ messages in thread
From: Stefan Hajnoczi @ 2023-05-02 18:41 UTC (permalink / raw)
To: qemu-devel
Cc: Fam Zheng, Paolo Bonzini, Stefan Hajnoczi, qemu-block, Kevin Wolf,
Emanuele Giuseppe Esposito
QEMU's event loop supports nesting, which means that event handler
functions may themselves call aio_poll(). The condition that triggered a
handler must be reset before the nested aio_poll() call, otherwise the
same handler will be called and immediately re-enter aio_poll. This
leads to an infinite loop and stack exhaustion.
Poll handlers are especially prone to this issue, because they typically
reset their condition by finishing the processing of pending work.
Unfortunately it is during the processing of pending work that nested
aio_poll() calls typically occur and the condition has not yet been
reset.
Disable a poll handler during ->io_poll_ready() so that a nested
aio_poll() call cannot invoke ->io_poll_ready() again. As a result, the
disabled poll handler and its associated fd handler do not run during
the nested aio_poll(). Calling aio_set_fd_handler() from inside nested
aio_poll() could cause it to run again. If the fd handler is pending
inside nested aio_poll(), then it will also run again.
In theory fd handlers can be affected by the same issue, but they are
more likely to reset the condition before calling nested aio_poll().
This is a special case and it's somewhat complex, but I don't see a way
around it as long as nested aio_poll() is supported.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2186181
Fixes: c38270692593 ("block: Mark bdrv_co_io_(un)plug() and callers GRAPH_RDLOCK")
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/aio-posix.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/util/aio-posix.c b/util/aio-posix.c
index a8be940f76..34bc2a64d8 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -353,8 +353,19 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
poll_ready && revents == 0 &&
aio_node_check(ctx, node->is_external) &&
node->io_poll_ready) {
+ /*
+ * Remove temporarily to avoid infinite loops when ->io_poll_ready()
+ * calls aio_poll() before clearing the condition that made the poll
+ * handler become ready.
+ */
+ QLIST_SAFE_REMOVE(node, node_poll);
+
node->io_poll_ready(node->opaque);
+ if (!QLIST_IS_INSERTED(node, node_poll)) {
+ QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
+ }
+
/*
* Return early since revents was zero. aio_notify() does not count as
* progress.
--
2.40.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] tested: add test for nested aio_poll() in poll handlers
2023-05-02 18:41 [PATCH 0/2] aio-posix: do not nest poll handlers Stefan Hajnoczi
2023-05-02 18:41 ` [PATCH 1/2] " Stefan Hajnoczi
@ 2023-05-02 18:41 ` Stefan Hajnoczi
2023-05-17 16:07 ` [PATCH 0/2] aio-posix: do not nest " Kevin Wolf
2 siblings, 0 replies; 4+ messages in thread
From: Stefan Hajnoczi @ 2023-05-02 18:41 UTC (permalink / raw)
To: qemu-devel; +Cc: Fam Zheng, Paolo Bonzini, Stefan Hajnoczi, qemu-block
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
tests/unit/test-nested-aio-poll.c | 130 ++++++++++++++++++++++++++++++
tests/unit/meson.build | 1 +
2 files changed, 131 insertions(+)
create mode 100644 tests/unit/test-nested-aio-poll.c
diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c
new file mode 100644
index 0000000000..9bbe18b839
--- /dev/null
+++ b/tests/unit/test-nested-aio-poll.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Test that poll handlers are not re-entrant in nested aio_poll()
+ *
+ * Copyright Red Hat
+ *
+ * Poll handlers are usually level-triggered. That means they continue firing
+ * until the condition is reset (e.g. a virtqueue becomes empty). If a poll
+ * handler calls nested aio_poll() before the condition is reset, then infinite
+ * recursion occurs.
+ *
+ * aio_poll() is supposed to prevent this by disabling poll handlers in nested
+ * aio_poll() calls. This test case checks that this is indeed what happens.
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qapi/error.h"
+
+typedef struct {
+ AioContext *ctx;
+
+ /* This is the EventNotifier that drives the test */
+ EventNotifier poll_notifier;
+
+ /* This EventNotifier is only used to wake aio_poll() */
+ EventNotifier dummy_notifier;
+
+ bool nested;
+} TestData;
+
+static void io_read(EventNotifier *notifier)
+{
+ fprintf(stderr, "%s %p\n", __func__, notifier);
+ event_notifier_test_and_clear(notifier);
+}
+
+static bool io_poll_true(void *opaque)
+{
+ fprintf(stderr, "%s %p\n", __func__, opaque);
+ return true;
+}
+
+static bool io_poll_false(void *opaque)
+{
+ fprintf(stderr, "%s %p\n", __func__, opaque);
+ return false;
+}
+
+static void io_poll_ready(EventNotifier *notifier)
+{
+ TestData *td = container_of(notifier, TestData, poll_notifier);
+
+ fprintf(stderr, "> %s\n", __func__);
+
+ g_assert(!td->nested);
+ td->nested = true;
+
+ /* Wake the following nested aio_poll() call */
+ event_notifier_set(&td->dummy_notifier);
+
+ /* This nested event loop must not call io_poll()/io_poll_ready() */
+ g_assert(aio_poll(td->ctx, true));
+
+ td->nested = false;
+
+ fprintf(stderr, "< %s\n", __func__);
+}
+
+/* dummy_notifier never triggers */
+static void io_poll_never_ready(EventNotifier *notifier)
+{
+ g_assert_not_reached();
+}
+
+static void test(void)
+{
+ TestData td = {
+ .ctx = aio_context_new(&error_abort),
+ };
+
+ qemu_set_current_aio_context(td.ctx);
+
+ /* Enable polling */
+ aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort);
+
+ /*
+ * The GSource is unused but this has the side-effect of changing the fdmon
+ * that AioContext uses.
+ */
+ aio_get_g_source(td.ctx);
+
+ /* Make the event notifier active (set) right away */
+ event_notifier_init(&td.poll_notifier, 1);
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, false,
+ io_read, io_poll_true, io_poll_ready);
+
+ /* This event notifier will be used later */
+ event_notifier_init(&td.dummy_notifier, 0);
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+ io_read, io_poll_false, io_poll_never_ready);
+
+ /* Consume aio_notify() */
+ g_assert(!aio_poll(td.ctx, false));
+
+ /*
+ * Run the io_read() handler. This has the side-effect of activating
+ * polling in future aio_poll() calls.
+ */
+ g_assert(aio_poll(td.ctx, true));
+
+ /* The second time around the io_poll()/io_poll_ready() handler runs */
+ g_assert(aio_poll(td.ctx, true));
+
+ /* Run io_poll()/io_poll_ready() one more time to show it keeps working */
+ g_assert(aio_poll(td.ctx, true));
+
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+ NULL, NULL, NULL);
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL);
+ event_notifier_cleanup(&td.dummy_notifier);
+ event_notifier_cleanup(&td.poll_notifier);
+ aio_context_unref(td.ctx);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ g_test_add_func("/nested-aio-poll", test);
+ return g_test_run();
+}
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 3bc78d8660..a314f82baa 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -67,6 +67,7 @@ if have_block
'test-coroutine': [testblock],
'test-aio': [testblock],
'test-aio-multithread': [testblock],
+ 'test-nested-aio-poll': [testblock],
'test-throttle': [testblock],
'test-thread-pool': [testblock],
'test-hbitmap': [testblock],
--
2.40.1
^ permalink raw reply related [flat|nested] 4+ messages in thread