Kernel KVM virtualization development
 help / color / mirror / Atom feed
From: Bin Guo <guobin@linux.alibaba.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Hyman Huang <infra.ai.cloud@bitdeer.com>
Cc: kvm@vger.kernel.org, qemu-devel@nongnu.org
Subject: [PATCH v2] accel/kvm: event-driven wakeup for dirty ring reaper thread
Date: Thu,  4 Jun 2026 15:00:29 +0800	[thread overview]
Message-ID: <20260604070029.34982-1-guobin@linux.alibaba.com> (raw)

The reaper polls with sleep(1) (TODO in the code) and only notices
dirty-limit teardown at the next 1s tick.

Replace the sleep with qemu_poll_ns() on an EventNotifier, kicked
from dirtylimit_state_finalize() after dirtylimit_state has been
cleared.  The kick must follow the NULL assignment: kicking earlier
wakes the reaper while dirtylimit_in_service() still returns true,
so it just loops back to wait.  A 1s fallback timeout remains as a
liveness backstop.

20 set-/cancel-vcpu-dirty-limit cycles via QMP, |reaper-wake -
cancel-ack| measured with strace on the reaper TID:

    before: median 255 ms, max 502 ms
    after:  median 0.6 ms, max  27 ms

kvm_dirty_ring_reaper_init() returns int again (was made void in
commit 43a5e377f4) to propagate event_notifier_init() failure.

Signed-off-by: Bin Guo <guobin@linux.alibaba.com>
---
Changes in v2:
- Fix compilation error: kvm_init() has no Error **errp parameter, so
  kvm_dirty_ring_reaper_init() now uses error_report() directly and
  no longer accepts an errp argument.
- Remove the kick from kvm_cpu_exec() KVM_EXIT_DIRTY_RING_FULL handler:
  it already reaps synchronously (all vCPUs or the ring-full one), so
  a background reaper kick would only be redundant or a no-op.
- Move kick site from dirtylimit_change(false) to
  dirtylimit_state_finalize() after dirtylimit_state = NULL, ensuring
  the reaper actually proceeds past the dirtylimit_in_service() check.

 accel/kvm/kvm-all.c      | 42 ++++++++++++++++++++++++++++++++++------
 accel/stubs/kvm-stub.c   |  4 ++++
 include/system/kvm.h     |  7 +++++++
 include/system/kvm_int.h |  3 +++
 system/dirtylimit.c      |  2 ++
 5 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 96f90ebb24..be005832bc 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1754,6 +1754,8 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     } while (size);
 }
 
+#define KVM_DIRTY_RING_REAPER_FALLBACK_NS  (1 * NANOSECONDS_PER_SECOND)
+
 static void *kvm_dirty_ring_reaper_thread(void *data)
 {
     KVMState *s = data;
@@ -1764,12 +1766,18 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
     trace_kvm_dirty_ring_reaper("init");
 
     while (true) {
+        GPollFD pfd = {
+            .fd = event_notifier_get_fd(&r->reaper_notifier),
+            .events = G_IO_IN,
+        };
+
         r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT;
         trace_kvm_dirty_ring_reaper("wait");
-        /*
-         * TODO: provide a smarter timeout rather than a constant?
-         */
-        sleep(1);
+
+        qemu_poll_ns(&pfd, 1, KVM_DIRTY_RING_REAPER_FALLBACK_NS);
+
+        /* Drain unconditionally so a stale event can't spin the next loop. */
+        event_notifier_test_and_clear(&r->reaper_notifier);
 
         /* keep sleeping so that dirtylimit not be interfered by reaper */
         if (dirtylimit_in_service()) {
@@ -1789,13 +1797,32 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
     g_assert_not_reached();
 }
 
-static void kvm_dirty_ring_reaper_init(KVMState *s)
+static int kvm_dirty_ring_reaper_init(KVMState *s)
 {
     struct KVMDirtyRingReaper *r = &s->reaper;
+    int ret;
+
+    ret = event_notifier_init(&r->reaper_notifier, 0);
+    if (ret < 0) {
+        error_report("Failed to initialize dirty ring reaper notifier: %s",
+                     strerror(-ret));
+        return ret;
+    }
 
     qemu_thread_create(&r->reaper_thr, "kvm-reaper",
                        kvm_dirty_ring_reaper_thread,
                        s, QEMU_THREAD_JOINABLE);
+    return 0;
+}
+
+void kvm_dirty_ring_reaper_kick(void)
+{
+    KVMState *s = kvm_state;
+
+    if (!s || !s->kvm_dirty_ring_size) {
+        return;
+    }
+    event_notifier_set(&s->reaper.reaper_notifier);
 }
 
 static int kvm_dirty_ring_init(KVMState *s)
@@ -3097,7 +3124,10 @@ static int kvm_init(AccelState *as, MachineState *ms)
     }
 
     if (s->kvm_dirty_ring_size) {
-        kvm_dirty_ring_reaper_init(s);
+        ret = kvm_dirty_ring_reaper_init(s);
+        if (ret < 0) {
+            goto err;
+        }
     }
 
     if (kvm_check_extension(kvm_state, KVM_CAP_BINARY_STATS_FD)) {
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index c4617caac6..b878598552 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -134,6 +134,10 @@ uint32_t kvm_dirty_ring_size(void)
     return 0;
 }
 
+void kvm_dirty_ring_reaper_kick(void)
+{
+}
+
 bool kvm_hwpoisoned_mem(void)
 {
     return false;
diff --git a/include/system/kvm.h b/include/system/kvm.h
index 5fa33eddda..e127a5eb37 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -553,6 +553,13 @@ bool kvm_dirty_ring_enabled(void);
 
 uint32_t kvm_dirty_ring_size(void);
 
+/**
+ * kvm_dirty_ring_reaper_kick - wake the background dirty ring reaper.
+ *
+ * Safe from any thread; no-op when the dirty ring is not in use.
+ */
+void kvm_dirty_ring_reaper_kick(void);
+
 void kvm_mark_guest_state_protected(void);
 
 /**
diff --git a/include/system/kvm_int.h b/include/system/kvm_int.h
index 0876aac938..c14ebc927f 100644
--- a/include/system/kvm_int.h
+++ b/include/system/kvm_int.h
@@ -12,6 +12,7 @@
 #include "system/memory.h"
 #include "qapi/qapi-types-common.h"
 #include "qemu/accel.h"
+#include "qemu/event_notifier.h"
 #include "qemu/queue.h"
 #include "system/kvm.h"
 #include "accel/accel-ops.h"
@@ -100,6 +101,8 @@ struct KVMDirtyRingReaper {
     QemuThread reaper_thr;
     volatile uint64_t reaper_iteration; /* iteration number of reaper thr */
     volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */
+    /* Wakeup channel: kicked when dirty-limit is torn down. */
+    EventNotifier reaper_notifier;
 };
 struct KVMState
 {
diff --git a/system/dirtylimit.c b/system/dirtylimit.c
index c934ceb0de..a33256ade7 100644
--- a/system/dirtylimit.c
+++ b/system/dirtylimit.c
@@ -239,6 +239,8 @@ void dirtylimit_state_finalize(void)
     g_free(dirtylimit_state);
     dirtylimit_state = NULL;
 
+    kvm_dirty_ring_reaper_kick();
+
     trace_dirtylimit_state_finalize();
 }
 
-- 
2.50.1 (Apple Git-155)


             reply	other threads:[~2026-06-04  7:00 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04  7:00 Bin Guo [this message]
2026-06-05  8:22 ` [PATCH v2] accel/kvm: event-driven wakeup for dirty ring reaper thread Hy Man
2026-06-18  0:58   ` Hy Man

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260604070029.34982-1-guobin@linux.alibaba.com \
    --to=guobin@linux.alibaba.com \
    --cc=infra.ai.cloud@bitdeer.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox