From: David Matlack <dmatlack@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Wanpeng Li <wanpengli@tencent.com>,
Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
David Matlack <dmatlack@google.com>,
Ben Gardon <bgardon@google.com>,
Zhenzhong Duan <zhenzhong.duan@intel.com>,
"open list:KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)"
<kvm@vger.kernel.org>, Peter Xu <peterx@redhat.com>
Subject: [PATCH 1/3] KVM: selftests: Introduce a selftest to measure execution performance
Date: Fri, 1 Apr 2022 23:37:35 +0000 [thread overview]
Message-ID: <20220401233737.3021889-2-dmatlack@google.com> (raw)
In-Reply-To: <20220401233737.3021889-1-dmatlack@google.com>
Introduce a new selftest, execute_perf_test, that uses the
perf_test_util framework to measure the performance of executing code
within a VM. This test is similar to the other perf_test_util-based
tests in that it spins up a variable number of vCPUs and runs them
concurrently, accessing memory.
In order to support executiong, extend perf_test_util to populate guest
memory with return instructions rather than random garbage. This way
memory can be execute simply by calling it.
Currently only x86-64 supports execution, but other architectures can be
easily added by providing their return code instruction.
Signed-off-by: David Matlack <dmatlack@google.com>
---
tools/testing/selftests/kvm/.gitignore | 1 +
tools/testing/selftests/kvm/Makefile | 1 +
.../testing/selftests/kvm/execute_perf_test.c | 188 ++++++++++++++++++
.../selftests/kvm/include/perf_test_util.h | 2 +
.../selftests/kvm/lib/perf_test_util.c | 25 ++-
5 files changed, 215 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/kvm/execute_perf_test.c
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 1f1b6c978bf7..3647ddacb103 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -56,6 +56,7 @@
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
+/execute_perf_test
/hardware_disable_test
/kvm_create_max_vcpus
/kvm_page_table_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c9cdbd248727..3c67346b0766 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -92,6 +92,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += execute_perf_test
TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
diff --git a/tools/testing/selftests/kvm/execute_perf_test.c b/tools/testing/selftests/kvm/execute_perf_test.c
new file mode 100644
index 000000000000..fa78facf44e7
--- /dev/null
+++ b/tools/testing/selftests/kvm/execute_perf_test.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration;
+
+/* Set to true when vCPU threads should exit. */
+static bool done;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+ /* The backing source for the region of memory. */
+ enum vm_mem_backing_src_type backing_src;
+
+ /* The amount of memory to allocate for each vCPU. */
+ uint64_t vcpu_memory_bytes;
+
+ /* The number of vCPUs to create in the VM. */
+ int vcpus;
+};
+
+static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
+ uint64_t expected_ucall)
+{
+ struct ucall uc;
+ uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+
+ TEST_ASSERT(expected_ucall == actual_ucall,
+ "Guest exited unexpectedly (expected ucall %" PRIu64
+ ", got %" PRIu64 ")",
+ expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+ int last_iteration = *current_iteration;
+
+ do {
+ if (READ_ONCE(done))
+ return false;
+
+ *current_iteration = READ_ONCE(iteration);
+ } while (last_iteration == *current_iteration);
+
+ return true;
+}
+
+static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
+{
+ struct kvm_vm *vm = perf_test_args.vm;
+ int vcpu_id = vcpu_args->vcpu_id;
+ int current_iteration = 0;
+
+ while (spin_wait_for_next_iteration(¤t_iteration)) {
+ vcpu_run(vm, vcpu_id);
+ assert_ucall(vm, vcpu_id, UCALL_SYNC);
+ vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ }
+}
+
+static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+{
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ target_iteration) {
+ continue;
+ }
+}
+
+static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ int next_iteration;
+ int vcpu_id;
+
+ /* Kick off the vCPUs by incrementing iteration. */
+ next_iteration = ++iteration;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ /* Wait for all vCPUs to finish the iteration. */
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+ spin_wait_for_vcpu(vcpu_id, next_iteration);
+
+ ts_elapsed = timespec_elapsed(ts_start);
+ pr_info("%-30s: %ld.%09lds\n",
+ description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *params = arg;
+ struct kvm_vm *vm;
+ int vcpus = params->vcpus;
+
+ vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
+ params->backing_src, !overlap_memory_access);
+
+ perf_test_start_vcpu_threads(vcpus, vcpu_thread_main);
+
+ pr_info("\n");
+
+ perf_test_set_wr_fract(vm, 1);
+ run_iteration(vm, vcpus, "Populating memory");
+
+ perf_test_set_execute(vm, true);
+ run_iteration(vm, vcpus, "Executing from memory");
+
+ /* Set done to signal the vCPU threads to exit */
+ done = true;
+
+ perf_test_join_vcpu_threads(vcpus);
+ perf_test_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n",
+ name);
+ puts("");
+ printf(" -h: Display this help message.");
+ guest_modes_help();
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ backing_src_help("-s");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_params params = {
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+ .vcpus = 1,
+ };
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ params.vcpu_memory_bytes = parse_size(optarg);
+ break;
+ case 'v':
+ params.vcpus = atoi(optarg);
+ break;
+ case 'o':
+ overlap_memory_access = true;
+ break;
+ case 's':
+ params.backing_src = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ for_each_guest_mode(run_test, ¶ms);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index a86f953d8d36..0a5a56539aff 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -33,6 +33,7 @@ struct perf_test_args {
uint64_t gpa;
uint64_t guest_page_size;
int wr_fract;
+ bool execute;
struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
};
@@ -46,6 +47,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
void perf_test_destroy_vm(struct kvm_vm *vm);
void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+void perf_test_set_execute(struct kvm_vm *vm, bool execute);
void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
void perf_test_join_vcpu_threads(int vcpus);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 722df3a28791..1a5eb60b59da 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -36,6 +36,16 @@ static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *);
/* Set to true once all vCPU threads are up and running. */
static bool all_vcpu_threads_running;
+/*
+ * When writing to guest memory, write the opcode for the `ret` instruction so
+ * that subsequent iteractions can exercise instruction fetch by calling the
+ * memory.
+ *
+ * NOTE: Non-x86 architectures would to use different values here to support
+ * execute.
+ */
+#define RETURN_OPCODE 0xC3
+
/*
* Continuously write to the first 8 bytes of each page in the
* specified region.
@@ -58,8 +68,10 @@ static void guest_code(uint32_t vcpu_id)
for (i = 0; i < pages; i++) {
uint64_t addr = gva + (i * pta->guest_page_size);
- if (i % pta->wr_fract == 0)
- *(uint64_t *)addr = 0x0123456789ABCDEF;
+ if (pta->execute)
+ ((void (*)(void)) addr)();
+ else if (i % pta->wr_fract == 0)
+ *(uint64_t *)addr = RETURN_OPCODE;
else
READ_ONCE(*(uint64_t *)addr);
}
@@ -198,6 +210,15 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
sync_global_to_guest(vm, perf_test_args);
}
+void perf_test_set_execute(struct kvm_vm *vm, bool execute)
+{
+#ifndef __x86_64__
+ TEST_ASSERT(false, "Execute not supported on this architure; see RETURN_OPCODE.");
+#endif
+ perf_test_args.execute = execute;
+ sync_global_to_guest(vm, perf_test_args);
+}
+
static void *vcpu_thread_main(void *data)
{
struct vcpu_thread *vcpu = data;
base-commit: d1fb6a1ca3e535f89628193ab94203533b264c8c
--
2.35.1.1094.g7c7d902a7c-goog
next prev parent reply other threads:[~2022-04-01 23:37 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-01 23:37 [PATCH 0/3] KVM: Split huge pages mapped by the TDP MMU on fault David Matlack
2022-04-01 23:37 ` David Matlack [this message]
2022-04-04 18:13 ` [PATCH 1/3] KVM: selftests: Introduce a selftest to measure execution performance Ben Gardon
2022-04-05 20:59 ` David Matlack
2022-04-05 21:00 ` David Matlack
2022-04-01 23:37 ` [PATCH 2/3] KVM: x86/mmu: Pass account_nx to tdp_mmu_split_huge_page() David Matlack
2022-04-04 18:26 ` Ben Gardon
2022-04-01 23:37 ` [PATCH 3/3] KVM: x86/mmu: Split huge pages mapped by the TDP MMU on fault David Matlack
2022-04-04 18:48 ` Ben Gardon
2022-04-05 19:02 ` David Matlack
2022-04-07 19:39 ` Sean Christopherson
2022-04-07 20:52 ` David Matlack
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220401233737.3021889-2-dmatlack@google.com \
--to=dmatlack@google.com \
--cc=bgardon@google.com \
--cc=jmattson@google.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=seanjc@google.com \
--cc=vkuznets@redhat.com \
--cc=wanpengli@tencent.com \
--cc=zhenzhong.duan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.