From: Peter Xu <peterx@redhat.com>
To: qemu-devel@nongnu.org
Cc: Alexey Perevalov <a.perevalov@samsung.com>,
Juraj Marcin <jmarcin@redhat.com>,
"Dr . David Alan Gilbert" <dave@treblig.org>,
peterx@redhat.com, Fabiano Rosas <farosas@suse.de>
Subject: [PATCH v2 10/13] migration/postcopy: Cache the tid->vcpu mapping for blocktime
Date: Mon, 9 Jun 2025 15:12:56 -0400 [thread overview]
Message-ID: <20250609191259.9053-11-peterx@redhat.com> (raw)
In-Reply-To: <20250609191259.9053-1-peterx@redhat.com>
Looking up the vCPU index for each fault can be expensive when there're
hundreds of vCPUs. Provide a cache for tid->vcpu instead with a hash
table, then lookup from there.
When at it, add another counter to record how many non-vCPU faults it gets.
For example, the main thread can also access a guest page that was missing.
These kind of faults are not accounted by blocktime so far.
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/postcopy-ram.c | 68 ++++++++++++++++++++++++++++++++++------
migration/trace-events | 3 +-
2 files changed, 59 insertions(+), 12 deletions(-)
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 6ed4546744..494bfbab71 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -127,6 +127,17 @@ typedef struct PostcopyBlocktimeContext {
/* number of vCPU are suspended */
int smp_cpus_down;
+ /*
+ * Fast path for looking up vcpu_index from tid. NOTE: this result
+ * only reflects the vcpu setup when postcopy is running. It may not
+ * always match with the current vcpu setup because vcpus can be hot
+ * attached/detached after migration completes. However this should be
+ * stable when blocktime is using the structure.
+ */
+ GHashTable *tid_to_vcpu_hash;
+ /* Count of non-vCPU faults. This is only for debugging purpose. */
+ uint64_t non_vcpu_faults;
+
/*
* Handler for exit event, necessary for
* releasing whole blocktime_ctx
@@ -136,6 +147,7 @@ typedef struct PostcopyBlocktimeContext {
static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
{
+ g_hash_table_destroy(ctx->tid_to_vcpu_hash);
g_free(ctx->vcpu_blocktime_start);
g_free(ctx->vcpu_blocktime_total);
g_free(ctx->vcpu_faults_count);
@@ -150,6 +162,36 @@ static void migration_exit_cb(Notifier *n, void *data)
destroy_blocktime_context(ctx);
}
+static GHashTable *blocktime_init_tid_to_vcpu_hash(void)
+{
+ /*
+ * TID as an unsigned int can be directly used as the key. However,
+ * CPU index can NOT be directly used as value, because CPU index can
+ * be 0, which means NULL. Then when lookup we can never know whether
+ * it's 0 or "not found". Hence use an indirection for CPU index.
+ */
+ GHashTable *table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, g_free);
+ CPUState *cpu;
+
+ /*
+ * Initialize the tid->cpu_id mapping for lookups. The caller needs to
+ * make sure when reaching here the CPU topology is frozen and will be
+ * stable for the whole blocktime trapping period.
+ */
+ CPU_FOREACH(cpu) {
+ int *value = g_new(int, 1);
+
+ *value = cpu->cpu_index;
+ g_hash_table_insert(table,
+ GUINT_TO_POINTER((uint32_t)cpu->thread_id),
+ value);
+ trace_postcopy_blocktime_tid_cpu_map(cpu->cpu_index, cpu->thread_id);
+ }
+
+ return table;
+}
+
static struct PostcopyBlocktimeContext *blocktime_context_new(void)
{
MachineState *ms = MACHINE(qdev_get_machine());
@@ -160,6 +202,8 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void)
ctx->vcpu_blocktime_total = g_new0(uint64_t, smp_cpus);
ctx->vcpu_faults_count = g_new0(uint64_t, smp_cpus);
ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
+ ctx->tid_to_vcpu_hash = blocktime_init_tid_to_vcpu_hash();
+
ctx->exit_notifier.notify = migration_exit_cb;
qemu_add_exit_notifier(&ctx->exit_notifier);
@@ -826,18 +870,21 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
return 0;
}
-static int get_mem_fault_cpu_index(uint32_t pid)
+static int blocktime_get_vcpu(PostcopyBlocktimeContext *ctx, uint32_t tid)
{
- CPUState *cpu_iter;
+ int *found;
- CPU_FOREACH(cpu_iter) {
- if (cpu_iter->thread_id == pid) {
- trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
- return cpu_iter->cpu_index;
- }
+ found = g_hash_table_lookup(ctx->tid_to_vcpu_hash, GUINT_TO_POINTER(tid));
+ if (!found) {
+ /*
+ * NOTE: this is possible, because QEMU's non-vCPU threads can
+ * also access a missing page. Or, when KVM async pf is enabled, a
+ * fault can even happen from a kworker..
+ */
+ return -1;
}
- trace_get_mem_fault_cpu_index(-1, pid);
- return -1;
+
+ return *found;
}
static uint64_t get_current_us(void)
@@ -864,8 +911,9 @@ void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
if (!dc || ptid == 0) {
return;
}
- cpu = get_mem_fault_cpu_index(ptid);
+ cpu = blocktime_get_vcpu(dc, ptid);
if (cpu < 0) {
+ dc->non_vcpu_faults++;
return;
}
diff --git a/migration/trace-events b/migration/trace-events
index 02cdb6e7cc..9c1f3b7044 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -310,8 +310,7 @@ postcopy_preempt_tls_handshake(void) ""
postcopy_preempt_new_channel(void) ""
postcopy_preempt_thread_entry(void) ""
postcopy_preempt_thread_exit(void) ""
-
-get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u"
+postcopy_blocktime_tid_cpu_map(int cpu, uint32_t tid) "cpu: %d, tid: %u"
# exec.c
migration_exec_outgoing(const char *cmd) "cmd=%s"
--
2.49.0
next prev parent reply other threads:[~2025-06-09 19:14 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-09 19:12 [PATCH v2 00/13] migration/postcopy: Blocktime tracking overhaul Peter Xu
2025-06-09 19:12 ` [PATCH v2 01/13] migration: Add option to set postcopy-blocktime Peter Xu
2025-06-09 19:12 ` [PATCH v2 02/13] migration/postcopy: Push blocktime start/end into page req mutex Peter Xu
2025-06-09 19:12 ` [PATCH v2 03/13] migration/postcopy: Drop all atomic ops in blocktime feature Peter Xu
2025-06-09 19:12 ` [PATCH v2 04/13] migration/postcopy: Make all blocktime vars 64bits Peter Xu
2025-06-09 19:12 ` [PATCH v2 05/13] migration/postcopy: Drop PostcopyBlocktimeContext.start_time Peter Xu
2025-06-09 19:12 ` [PATCH v2 06/13] migration/postcopy: Bring blocktime layer to us level Peter Xu
2025-06-09 19:12 ` [PATCH v2 07/13] migration/postcopy: Add blocktime fault counts per-vcpu Peter Xu
2025-06-09 19:12 ` [PATCH v2 08/13] migration/postcopy: Report fault latencies in blocktime Peter Xu
2025-06-09 22:05 ` Peter Xu
2025-06-09 22:25 ` Peter Xu
2025-06-10 0:08 ` Dr. David Alan Gilbert
2025-06-10 13:39 ` Peter Xu
2025-06-10 13:53 ` Dr. David Alan Gilbert
2025-06-10 14:08 ` Peter Xu
2025-06-09 19:12 ` [PATCH v2 09/13] migration/postcopy: Initialize blocktime context only until listen Peter Xu
2025-06-09 19:12 ` Peter Xu [this message]
2025-06-09 19:12 ` [PATCH v2 11/13] migration/postcopy: Cleanup the total blocktime accounting Peter Xu
2025-06-09 19:12 ` [PATCH v2 12/13] migration/postcopy: Optimize blocktime fault tracking with hashtable Peter Xu
2025-06-09 19:12 ` [PATCH v2 13/13] migration/postcopy: blocktime allows track / report non-vCPU faults Peter Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250609191259.9053-11-peterx@redhat.com \
--to=peterx@redhat.com \
--cc=a.perevalov@samsung.com \
--cc=dave@treblig.org \
--cc=farosas@suse.de \
--cc=jmarcin@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).