From: James Houghton <jthoughton@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
Sean Christopherson <seanjc@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>, Marc Zyngier <maz@kernel.org>,
Oliver Upton <oliver.upton@linux.dev>,
Yan Zhao <yan.y.zhao@intel.com>,
James Houghton <jthoughton@google.com>,
Nikita Kalyazin <kalyazin@amazon.com>,
Anish Moorthy <amoorthy@google.com>,
Peter Gonda <pgonda@google.com>, Peter Xu <peterx@redhat.com>,
David Matlack <dmatlack@google.com>,
Wang@google.com, Wei W <wei.w.wang@intel.com>,
kvm@vger.kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev
Subject: [PATCH v1 10/13] KVM: selftests: Add KVM Userfault mode to demand_paging_test
Date: Wed, 4 Dec 2024 19:13:45 +0000 [thread overview]
Message-ID: <20241204191349.1730936-11-jthoughton@google.com> (raw)
In-Reply-To: <20241204191349.1730936-1-jthoughton@google.com>
Add a way for the KVM_RUN loop to handle -EFAULT exits when they are for
KVM_MEMORY_EXIT_FLAG_USERFAULT. In this case, preemptively handle the
UFFDIO_COPY or UFFDIO_CONTINUE if userfaultfd is also in use. This saves
the trip through the userfaultfd poll/read/WAKE loop.
When preemptively handling UFFDIO_COPY/CONTINUE, do so with
MODE_DONTWAKE, as there will not be a thread to wake. If a thread *does*
take the userfaultfd slow path, we will get a regular userfault, and we
will call handle_uffd_page_request() which will do a full wake-up. In
the EEXIST case, a wake-up will not occur. Make sure to call UFFDIO_WAKE
explicitly in this case.
When handling KVM userfaults, make sure to set the bitmap with
memory_order_release. Although it wouldn't affect the functionality of
the test (because memstress doesn't actually require any particular
guest memory contents), it is what userspace normally needs to do.
Add `-k` to set the test to use KVM Userfault.
Add the vm_mem_region_set_flags_userfault() helper for setting
`userfault_bitmap` and KVM_MEM_USERFAULT at the same time.
Signed-off-by: James Houghton <jthoughton@google.com>
---
.../selftests/kvm/demand_paging_test.c | 139 +++++++++++++++++-
.../testing/selftests/kvm/include/kvm_util.h | 5 +
tools/testing/selftests/kvm/lib/kvm_util.c | 40 ++++-
3 files changed, 176 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 315f5c9037b4..e7ea1c57264d 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -12,7 +12,9 @@
#include <time.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
+#include <linux/bitmap.h>
#include <sys/syscall.h>
+#include <stdatomic.h>
#include "kvm_util.h"
#include "test_util.h"
@@ -24,11 +26,21 @@
#ifdef __NR_userfaultfd
static int nr_vcpus = 1;
+static int num_uffds;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static size_t demand_paging_size;
+static size_t host_page_size;
static char *guest_data_prototype;
+static struct {
+ bool enabled;
+ int uffd_mode; /* set if userfaultfd is also in use */
+ struct uffd_desc **uffd_descs;
+} kvm_userfault_data;
+
+static void resolve_kvm_userfault(u64 gpa, u64 size);
+
static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
struct kvm_vcpu *vcpu = vcpu_args->vcpu;
@@ -41,8 +53,22 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
clock_gettime(CLOCK_MONOTONIC, &start);
/* Let the guest access its memory */
+restart:
ret = _vcpu_run(vcpu);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ if (ret < 0 && errno == EFAULT && kvm_userfault_data.enabled) {
+ /* Check for userfault. */
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_MEMORY_FAULT,
+ "Got invalid exit reason: %x", run->exit_reason);
+ TEST_ASSERT(run->memory_fault.flags ==
+ KVM_MEMORY_EXIT_FLAG_USERFAULT,
+ "Got invalid memory fault exit: %llx",
+ run->memory_fault.flags);
+ resolve_kvm_userfault(run->memory_fault.gpa,
+ run->memory_fault.size);
+ goto restart;
+ } else
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+
if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
TEST_ASSERT(false,
"Invalid guest sync status: exit_reason=%s",
@@ -54,11 +80,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
ts_diff.tv_sec, ts_diff.tv_nsec);
}
-static int handle_uffd_page_request(int uffd_mode, int uffd,
- struct uffd_msg *msg)
+static int resolve_uffd_page_request(int uffd_mode, int uffd, uint64_t addr,
+ bool wake)
{
pid_t tid = syscall(__NR_gettid);
- uint64_t addr = msg->arg.pagefault.address;
struct timespec start;
struct timespec ts_diff;
int r;
@@ -71,7 +96,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
copy.src = (uint64_t)guest_data_prototype;
copy.dst = addr;
copy.len = demand_paging_size;
- copy.mode = 0;
+ copy.mode = wake ? 0 : UFFDIO_COPY_MODE_DONTWAKE;
r = ioctl(uffd, UFFDIO_COPY, ©);
/*
@@ -96,6 +121,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
cont.range.start = addr;
cont.range.len = demand_paging_size;
+ cont.mode = wake ? 0 : UFFDIO_CONTINUE_MODE_DONTWAKE;
r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
/*
@@ -119,6 +145,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
TEST_FAIL("Invalid uffd mode %d", uffd_mode);
}
+ if (r < 0 && wake) {
+ /*
+ * No wake-up occurs when UFFDIO_COPY/CONTINUE fails, but we
+ * have a thread waiting. Wake it up.
+ */
+ struct uffdio_range range = {0};
+
+ range.start = addr;
+ range.len = demand_paging_size;
+
+ TEST_ASSERT(ioctl(uffd, UFFDIO_WAKE, &range) == 0,
+ "UFFDIO_WAKE failed: 0x%lx", addr);
+ }
+
ts_diff = timespec_elapsed(start);
PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
@@ -129,6 +169,58 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
return 0;
}
+static int handle_uffd_page_request(int uffd_mode, int uffd,
+ struct uffd_msg *msg)
+{
+ uint64_t addr = msg->arg.pagefault.address;
+
+ return resolve_uffd_page_request(uffd_mode, uffd, addr, true);
+}
+
+static void resolve_kvm_userfault(u64 gpa, u64 size)
+{
+ struct kvm_vm *vm = memstress_args.vm;
+ struct userspace_mem_region *region;
+ unsigned long *bitmap_chunk;
+ u64 page, gpa_offset;
+
+ region = (struct userspace_mem_region *) userspace_mem_region_find(
+ vm, gpa, (gpa + size - 1));
+
+ if (kvm_userfault_data.uffd_mode) {
+ /*
+ * Resolve userfaults early, without needing to read them
+ * off the userfaultfd.
+ */
+ uint64_t hva = (uint64_t)addr_gpa2hva(vm, gpa);
+ struct uffd_desc **descs = kvm_userfault_data.uffd_descs;
+ int i, fd;
+
+ for (i = 0; i < num_uffds; ++i)
+ if (hva >= (uint64_t)descs[i]->va_start &&
+ hva < (uint64_t)descs[i]->va_end)
+ break;
+
+ TEST_ASSERT(i < num_uffds,
+ "Did not find userfaultfd for hva: %lx", hva);
+
+ fd = kvm_userfault_data.uffd_descs[i]->uffd;
+ resolve_uffd_page_request(kvm_userfault_data.uffd_mode, fd,
+ hva, false);
+ } else {
+ uint64_t hva = (uint64_t)addr_gpa2hva(vm, gpa);
+
+ memcpy((char *)hva, guest_data_prototype, demand_paging_size);
+ }
+
+ gpa_offset = gpa - region->region.guest_phys_addr;
+ page = gpa_offset / host_page_size;
+ bitmap_chunk = (unsigned long *)region->region.userfault_bitmap +
+ page / BITS_PER_LONG;
+ atomic_fetch_and_explicit(bitmap_chunk,
+ ~(1ul << (page % BITS_PER_LONG)), memory_order_release);
+}
+
struct test_params {
int uffd_mode;
bool single_uffd;
@@ -136,6 +228,7 @@ struct test_params {
int readers_per_uffd;
enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access;
+ bool kvm_userfault;
};
static void prefault_mem(void *alias, uint64_t len)
@@ -149,6 +242,25 @@ static void prefault_mem(void *alias, uint64_t len)
}
}
+static void enable_userfault(struct kvm_vm *vm, int slots)
+{
+ for (int i = 0; i < slots; ++i) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+ struct userspace_mem_region *region;
+ unsigned long *userfault_bitmap;
+ int flags = KVM_MEM_USERFAULT;
+
+ region = memslot2region(vm, slot);
+ userfault_bitmap = bitmap_zalloc(region->mmap_size /
+ host_page_size);
+ /* everything is userfault initially */
+ memset(userfault_bitmap, -1, region->mmap_size / host_page_size / CHAR_BIT);
+ printf("Setting bitmap: %p\n", userfault_bitmap);
+ vm_mem_region_set_flags_userfault(vm, slot, flags,
+ userfault_bitmap);
+ }
+}
+
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct memstress_vcpu_args *vcpu_args;
@@ -159,12 +271,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct timespec ts_diff;
double vcpu_paging_rate;
struct kvm_vm *vm;
- int i, num_uffds = 0;
+ int i;
vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type, p->partition_vcpu_memory_access);
demand_paging_size = get_backing_src_pagesz(p->src_type);
+ host_page_size = getpagesize();
guest_data_prototype = malloc(demand_paging_size);
TEST_ASSERT(guest_data_prototype,
@@ -208,6 +321,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
}
+ if (p->kvm_userfault) {
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_USERFAULT));
+ kvm_userfault_data.enabled = true;
+ kvm_userfault_data.uffd_mode = p->uffd_mode;
+ kvm_userfault_data.uffd_descs = uffd_descs;
+ enable_userfault(vm, 1);
+ }
+
pr_info("Finished creating vCPUs and starting uffd threads\n");
clock_gettime(CLOCK_MONOTONIC, &start);
@@ -265,6 +386,7 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
+ printf(" -k: Use KVM Userfault\n");
puts("");
exit(0);
}
@@ -283,7 +405,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) {
+ while ((opt = getopt(argc, argv, "ahokm:u:d:b:s:v:c:r:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -326,6 +448,9 @@ int main(int argc, char *argv[])
"Invalid number of readers per uffd %d: must be >=1",
p.readers_per_uffd);
break;
+ case 'k':
+ p.kvm_userfault = true;
+ break;
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index bc7c242480d6..7fec3559aa64 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -582,6 +582,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+struct userspace_mem_region *
+userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -591,6 +593,9 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_set_flags_userfault(struct kvm_vm *vm, uint32_t slot,
+ uint32_t flags,
+ unsigned long *userfault_bitmap);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9603f99d3247..7195dd3db5df 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -634,7 +634,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
* of the regions is returned. Null is returned only when no overlapping
* region exists.
*/
-static struct userspace_mem_region *
+struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
struct rb_node *node;
@@ -1149,6 +1149,44 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
+/*
+ * VM Memory Region Flags Set with a userfault bitmap
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * flags - Flags for the memslot
+ * userfault_bitmap - The bitmap to use for KVM_MEM_USERFAULT
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags. This helper adds a way to provide a
+ * userfault_bitmap.
+ */
+void vm_mem_region_set_flags_userfault(struct kvm_vm *vm, uint32_t slot,
+ uint32_t flags,
+ unsigned long *userfault_bitmap)
+{
+ int ret;
+ struct userspace_mem_region *region;
+
+ region = memslot2region(vm, slot);
+
+ TEST_ASSERT(!userfault_bitmap ^ (flags & KVM_MEM_USERFAULT),
+ "KVM_MEM_USERFAULT must be specified with a bitmap");
+
+ region->region.flags = flags;
+ region->region.userfault_bitmap = (__u64)userfault_bitmap;
+
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
+ " rc: %i errno: %i slot: %u flags: 0x%x",
+ ret, errno, slot, flags);
+}
+
/*
* VM Memory Region Move
*
--
2.47.0.338.g60cca15819-goog
next prev parent reply other threads:[~2024-12-04 19:14 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-04 19:13 [PATCH v1 00/13] KVM: Introduce KVM Userfault James Houghton
2024-12-04 19:13 ` [PATCH v1 01/13] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap James Houghton
2024-12-05 11:52 ` kernel test robot
2024-12-05 14:22 ` kernel test robot
2024-12-06 22:46 ` James Houghton
2024-12-04 19:13 ` [PATCH v1 02/13] KVM: Add KVM_MEMORY_EXIT_FLAG_USERFAULT James Houghton
2024-12-04 19:13 ` [PATCH v1 03/13] KVM: Allow late setting of KVM_MEM_USERFAULT on guest_memfd memslot James Houghton
2024-12-04 19:13 ` [PATCH v1 04/13] KVM: Advertise KVM_CAP_USERFAULT in KVM_CHECK_EXTENSION James Houghton
2024-12-04 19:13 ` [PATCH v1 05/13] KVM: x86/mmu: Add support for KVM_MEM_USERFAULT James Houghton
2024-12-04 19:13 ` [PATCH v1 06/13] KVM: arm64: " James Houghton
2024-12-04 23:07 ` Oliver Upton
2024-12-05 23:31 ` James Houghton
2024-12-06 0:45 ` Oliver Upton
2024-12-04 19:13 ` [PATCH v1 07/13] KVM: selftests: Fix vm_mem_region_set_flags docstring James Houghton
2024-12-04 19:13 ` [PATCH v1 08/13] KVM: selftests: Fix prefault_mem logic James Houghton
2024-12-04 19:13 ` [PATCH v1 09/13] KVM: selftests: Add va_start/end into uffd_desc James Houghton
2024-12-04 19:13 ` James Houghton [this message]
2024-12-14 22:46 ` [PATCH v1 10/13] KVM: selftests: Add KVM Userfault mode to demand_paging_test kernel test robot
2024-12-04 19:13 ` [PATCH v1 11/13] KVM: selftests: Inform set_memory_region_test of KVM_MEM_USERFAULT James Houghton
2024-12-04 19:13 ` [PATCH v1 12/13] KVM: selftests: Add KVM_MEM_USERFAULT + guest_memfd toggle tests James Houghton
2024-12-04 19:13 ` [PATCH v1 13/13] KVM: Documentation: Add KVM_CAP_USERFAULT and KVM_MEM_USERFAULT details James Houghton
2024-12-07 1:38 ` Bagas Sanjaya
2024-12-24 21:07 ` [PATCH v1 00/13] KVM: Introduce KVM Userfault Peter Xu
2025-01-02 17:53 ` James Houghton
2025-01-16 20:19 ` Peter Xu
2025-01-16 20:32 ` Peter Xu
2025-01-16 22:16 ` Sean Christopherson
2025-01-16 23:04 ` James Houghton
2025-01-16 23:17 ` Peter Xu
2025-01-16 23:46 ` Sean Christopherson
2025-01-16 22:51 ` James Houghton
2025-01-16 23:31 ` Peter Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241204191349.1730936-11-jthoughton@google.com \
--to=jthoughton@google.com \
--cc=Wang@google.com \
--cc=amoorthy@google.com \
--cc=corbet@lwn.net \
--cc=dmatlack@google.com \
--cc=kalyazin@amazon.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=oliver.upton@linux.dev \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=pgonda@google.com \
--cc=seanjc@google.com \
--cc=wei.w.wang@intel.com \
--cc=yan.y.zhao@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox