[PATCH v3 4/4] KVM: selftests: loongarch: Add PV TLB flush performance test

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tao Cui <cui.tao@linux.dev>
To: maobibo@loongson.cn, zhaotianrui@loongson.cn,
	chenhuacai@kernel.org, loongarch@lists.linux.dev
Cc: kernel@xen0n.name, kvm@vger.kernel.org, Tao Cui <cuitao@kylinos.cn>
Subject: [PATCH v3 4/4] KVM: selftests: loongarch: Add PV TLB flush performance test
Date: Tue,  2 Jun 2026 10:18:19 +0800	[thread overview]
Message-ID: <20260602021819.2373404-5-cui.tao@linux.dev> (raw)
In-Reply-To: <20260602021819.2373404-1-cui.tao@linux.dev>

From: Tao Cui <cuitao@kylinos.cn>

Add a multi-threaded benchmark to measure PV TLB flush performance
inside LoongArch KVM guests.

The test spawns flusher threads that repeatedly mmap/munmap to trigger
TLB shootdown IPIs, alongside idle threads that either sleep or
busy-spin. With PV TLB flush enabled, IPIs to preempted vCPUs are
replaced by deferred flags in the steal-time shared page.

Usage (inside guest):
  ./pv_tlb_flush_test <flushers> <idle> <iterations> <busy_idle>
  busy_idle=0: idle threads sleep (PV can skip IPIs to preempted vCPUs)
  busy_idle=1: idle threads spin (all vCPUs active, PV cannot optimize)

Signed-off-by: Tao Cui <cuitao@kylinos.cn>
---
 .../kvm/loongarch/pv_tlb_flush_test.c         | 194 ++++++++++++++++++
 1 file changed, 194 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/loongarch/pv_tlb_flush_test.c

diff --git a/tools/testing/selftests/kvm/loongarch/pv_tlb_flush_test.c b/tools/testing/selftests/kvm/loongarch/pv_tlb_flush_test.c
new file mode 100644
index 000000000000..63efaf9ef1cd
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/pv_tlb_flush_test.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch PV TLB Flush Performance Test
+ *
+ * Measure the overhead of remote TLB flushes in a KVM guest by spawning
+ * flusher threads that repeatedly mmap/munmap (triggering TLB shootdown
+ * IPIs) alongside idle threads that either sleep or busy-spin.
+ *
+ * With PV TLB flush enabled, IPIs to preempted vCPUs are replaced by
+ * deferred flags in the steal-time shared page, reducing flush latency.
+ *
+ * Usage:
+ *   Compile on LoongArch guest:
+ *     gcc -O2 -static -pthread -o pv_tlb_flush_test pv_tlb_flush_test.c
+ *   Run (inside KVM guest):
+ *     ./pv_tlb_flush_test <flushers> <idle> <iterations> <busy_idle>
+ *   Examples:
+ *     ./pv_tlb_flush_test 1 31 50000 0   # 1 flusher, 31 sleep, PV helps
+ *     ./pv_tlb_flush_test 1 31 50000 1   # 1 flusher, 31 busy-spin, no PV
+ *
+ *   busy_idle=0: idle threads sleep, vCPUs get preempted, PV TLB flush
+ *                can skip IPIs to them
+ *   busy_idle=1: idle threads spin, all vCPUs stay active, PV TLB flush
+ *                cannot optimize (baseline for comparison)
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sched.h>
+#include <pthread.h>
+
+#define MEM_SIZE (2*1024*1024)
+#define DEFAULT_ITERS 50000
+#define MAX_THREADS 64
+
+static int nr_iters = DEFAULT_ITERS;
+static volatile int start_barrier;
+static volatile int stop_flag;
+static int busy_idle = 0;
+
+struct thread_args {
+	int cpu;
+	unsigned long *result;
+	int *completed;
+};
+
+static inline unsigned long clock_ns(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (unsigned long)ts.tv_sec * 1000000000UL + ts.tv_nsec;
+}
+
+static void pin_cpu(int cpu) {
+    cpu_set_t set;
+    if (cpu < 0)
+        return;
+    CPU_ZERO(&set);
+    CPU_SET(cpu, &set);
+    sched_setaffinity(0, sizeof(set), &set);
+}
+
+static void *idle_thread(void *arg) {
+    struct thread_args *ta = arg;
+    pin_cpu(ta->cpu);
+    while (!__atomic_load_n(&start_barrier, __ATOMIC_ACQUIRE));
+    if (busy_idle) {
+        volatile long sink = 0;
+        while (!__atomic_load_n(&stop_flag, __ATOMIC_ACQUIRE))
+            sink++;
+    } else {
+        while (!__atomic_load_n(&stop_flag, __ATOMIC_ACQUIRE))
+            usleep(1000);
+    }
+    return NULL;
+}
+
+static void *flush_thread(void *arg) {
+    struct thread_args *ta = arg;
+    unsigned long start, end;
+    int i;
+    size_t mem_size = MEM_SIZE;
+    pin_cpu(ta->cpu);
+    while (!__atomic_load_n(&start_barrier, __ATOMIC_ACQUIRE));
+    start = clock_ns();
+    for (i = 0; i < nr_iters && !__atomic_load_n(&stop_flag, __ATOMIC_ACQUIRE); i++) {
+        void *p = mmap(NULL, mem_size, PROT_READ|PROT_WRITE,
+                       MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+        if (p == MAP_FAILED) break;
+        for (size_t off = 0; off < mem_size; off += 65536)
+            ((volatile char*)p)[off] = 0;
+        munmap(p, mem_size);
+    }
+    end = clock_ns();
+    *ta->result = end - start;
+    *ta->completed = i;
+    return NULL;
+}
+
+int main(int argc, char **argv) {
+    int nr_flush = 1, nr_idle = 3, i, run;
+    int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+    if (argc > 1) nr_flush = atoi(argv[1]);
+    if (argc > 2) nr_idle = atoi(argv[2]);
+    if (argc > 3) nr_iters = atoi(argv[3]);
+    if (argc > 4) busy_idle = atoi(argv[4]);
+
+    if (nr_flush < 1 || nr_idle < 0 || nr_flush + nr_idle > MAX_THREADS) {
+        fprintf(stderr, "Usage: %s [flushers(1-%d)] [idle(0-%d)] [iters] [busy_idle]\n",
+                argv[0], MAX_THREADS, MAX_THREADS);
+        return 1;
+    }
+    if (nr_iters <= 0) {
+        fprintf(stderr, "Error: iterations must be positive\n");
+        return 1;
+    }
+
+    printf("=== TLB Flush Benchmark ===\n");
+    printf("CPUs: %d  Flushers: %d  Idle: %d  Iters: %d  Mode: %s\n",
+           ncpus, nr_flush, nr_idle, nr_iters,
+           busy_idle ? "busy-spin" : "sleep");
+
+    for (run = 0; run < 3; run++) {
+        int total = nr_flush + nr_idle;
+        int do_pin = (total <= ncpus);
+        int created = 0;
+        pthread_t threads[MAX_THREADS];
+        unsigned long results[MAX_THREADS];
+        int completed[MAX_THREADS];
+        struct thread_args args[MAX_THREADS];
+        start_barrier = 0; stop_flag = 0;
+
+        for (i = 0; i < nr_idle; i++) {
+            args[i].cpu = do_pin ? nr_flush + i : -1;
+            args[i].result = NULL;
+            args[i].completed = NULL;
+            if (pthread_create(&threads[i], NULL, idle_thread, &args[i])) {
+                perror("pthread_create idle");
+                goto cleanup;
+            }
+            created++;
+        }
+        for (i = 0; i < nr_flush; i++) {
+            int idx = nr_idle + i;
+            results[idx] = 0;
+            completed[idx] = 0;
+            args[idx].cpu = do_pin ? i : -1;
+            args[idx].result = &results[idx];
+            args[idx].completed = &completed[idx];
+            if (pthread_create(&threads[idx], NULL, flush_thread, &args[idx])) {
+                perror("pthread_create flush");
+                goto cleanup;
+            }
+            created++;
+        }
+
+        usleep(10000);
+        __atomic_store_n(&start_barrier, 1, __ATOMIC_RELEASE);
+        for (i = 0; i < nr_flush; i++)
+            pthread_join(threads[nr_idle + i], NULL);
+        __atomic_store_n(&stop_flag, 1, __ATOMIC_RELEASE);
+        for (i = 0; i < nr_idle; i++)
+            pthread_join(threads[i], NULL);
+
+        unsigned long total_ns = 0;
+        unsigned long total_done = 0;
+        for (i = 0; i < nr_flush; i++) {
+            int idx = nr_idle + i;
+            unsigned long done = completed[idx];
+            if (done == 0) {
+                printf("  Run %d flusher %d: no iterations completed\n", run, i);
+                continue;
+            }
+            printf("  Run %d flusher %d: %lu ns/flush (%lu iters)\n",
+                   run, i, results[idx] / done, done);
+            total_ns += results[idx];
+            total_done += done;
+        }
+        if (total_done > 0)
+            printf("  Run %d Avg: %lu ns/flush\n", run, total_ns / total_done);
+        continue;
+
+cleanup:
+        __atomic_store_n(&start_barrier, 1, __ATOMIC_RELEASE);
+        __atomic_store_n(&stop_flag, 1, __ATOMIC_RELEASE);
+        for (i = 0; i < created; i++)
+            pthread_join(threads[i], NULL);
+        return 1;
+    }
+    return 0;
+}
-- 
2.43.0

next prev parent reply	other threads:[~2026-06-02  2:18 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-02  2:18 [PATCH v3 0/4] LoongArch: KVM: Add PV TLB flush support Tao Cui
2026-06-02  2:18 ` [PATCH v3 1/4] LoongArch: KVM: Preserve auto-enabled PV features on userspace override Tao Cui
2026-06-02  2:26   ` sashiko-bot
2026-06-02  2:18 ` [PATCH v3 2/4] LoongArch: KVM: Add PV TLB flush support via steal-time shared memory Tao Cui
2026-06-02  2:37   ` sashiko-bot
2026-06-02  2:18 ` [PATCH v3 3/4] LoongArch: KVM: Implement guest-side PV TLB flush Tao Cui
2026-06-02  2:46   ` sashiko-bot
2026-06-02  2:18 ` Tao Cui [this message]
2026-06-02  2:52   ` [PATCH v3 4/4] KVM: selftests: loongarch: Add PV TLB flush performance test sashiko-bot

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:63efaf9ef1c )
 OR (
bs:"[PATCH v3 4/4] KVM: selftests: loongarch: Add PV TLB flush performance test" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260602021819.2373404-5-cui.tao@linux.dev \
    --to=cui.tao@linux.dev \
    --cc=chenhuacai@kernel.org \
    --cc=cuitao@kylinos.cn \
    --cc=kernel@xen0n.name \
    --cc=kvm@vger.kernel.org \
    --cc=loongarch@lists.linux.dev \
    --cc=maobibo@loongson.cn \
    --cc=zhaotianrui@loongson.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.