* [PATCH v8 37/46] KVM: selftests: Test that shared/private status is consistent across processes
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
Add a test to verify that a guest_memfd's shared/private status is
consistent across processes, and that any shared pages previously mapped in
any process are unmapped from all processes.
The test forks a child process after creating the shared guest_memfd
region so that the second process exists alongside the main process for the
entire test.
The processes then take turns to access memory to check that the
shared/private status is consistent across processes.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
.../kvm/x86/guest_memfd_conversions_test.c | 118 +++++++++++++++++++++
1 file changed, 118 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
index f03af2c46426f..99b0023609670 100644
--- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -2,6 +2,8 @@
/*
* Copyright (c) 2024, Google LLC.
*/
+#include <pthread.h>
+#include <time.h>
#include <sys/mman.h>
#include <unistd.h>
@@ -323,6 +325,122 @@ GMEM_CONVERSION_TEST_INIT_SHARED(truncate)
test_private(t, 0, 0, 'A');
}
+/* Test that shared/private memory protections work and are seen from any process. */
+GMEM_CONVERSION_TEST_INIT_SHARED(forked_accesses)
+{
+ enum test_state {
+ STATE_INIT,
+ STATE_CHECK_SHARED,
+ STATE_DONE_CHECKING_SHARED,
+ STATE_CHECK_PRIVATE,
+ STATE_DONE_CHECKING_PRIVATE,
+ };
+
+ struct sync_state {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ enum test_state step;
+ } *sync;
+
+ pthread_mutexattr_t mattr;
+ pthread_condattr_t cattr;
+ pid_t child_pid, parent_pid;
+ int status;
+
+ sync = kvm_mmap(sizeof(*sync), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1);
+
+ pthread_mutexattr_init(&mattr);
+ pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init(&sync->mutex, &mattr);
+ pthread_mutexattr_destroy(&mattr);
+
+ pthread_condattr_init(&cattr);
+ pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init(&sync->cond, &cattr);
+ pthread_condattr_destroy(&cattr);
+
+ sync->step = STATE_INIT;
+
+#define TEST_STATE_AWAIT(__state) \
+ do { \
+ pthread_mutex_lock(&sync->mutex); \
+ while (sync->step != (__state)) { \
+ struct timespec ts, stop; \
+ int ret; \
+ \
+ clock_gettime(CLOCK_REALTIME, &ts); \
+ stop = timespec_add_ns(ts, 100 * 1000000UL); \
+ \
+ ret = pthread_cond_timedwait(&sync->cond, &sync->mutex, &stop); \
+ if (ret == ETIMEDOUT) { \
+ bool alive = (child_pid == 0) ? \
+ (getppid() == parent_pid) : \
+ (waitpid(child_pid, NULL, WNOHANG) == 0); \
+ TEST_ASSERT(alive, "Other process exited prematurely"); \
+ } else { \
+ TEST_ASSERT(!ret, "pthread_cond_timedwait failed"); \
+ } \
+ } \
+ pthread_mutex_unlock(&sync->mutex); \
+ } while (0)
+
+#define TEST_STATE_SET(__state) \
+ do { \
+ pthread_mutex_lock(&sync->mutex); \
+ sync->step = (__state); \
+ pthread_cond_broadcast(&sync->cond); \
+ pthread_mutex_unlock(&sync->mutex); \
+ } while (0)
+
+ parent_pid = getpid();
+ child_pid = fork();
+ TEST_ASSERT(child_pid != -1, "fork failed");
+
+ if (child_pid == 0) {
+ const char inconsequential = 0xdd;
+
+ TEST_STATE_AWAIT(STATE_CHECK_SHARED);
+
+ /*
+ * This maps the pages into the child process as well, and tests
+ * that the conversion process will unmap the guest_memfd memory
+ * from all processes.
+ */
+ host_do_rmw(t->mem, 0, 0xB, 0xC);
+
+ TEST_STATE_SET(STATE_DONE_CHECKING_SHARED);
+ TEST_STATE_AWAIT(STATE_CHECK_PRIVATE);
+
+ TEST_EXPECT_SIGBUS(READ_ONCE(t->mem[0]));
+ TEST_EXPECT_SIGBUS(WRITE_ONCE(t->mem[0], inconsequential));
+
+ TEST_STATE_SET(STATE_DONE_CHECKING_PRIVATE);
+ exit(0);
+ }
+
+ test_shared(t, 0, 0, 0xA, 0xB);
+
+ TEST_STATE_SET(STATE_CHECK_SHARED);
+ TEST_STATE_AWAIT(STATE_DONE_CHECKING_SHARED);
+
+ test_convert_to_private(t, 0, 0xC, 0xD);
+
+ TEST_STATE_SET(STATE_CHECK_PRIVATE);
+ TEST_STATE_AWAIT(STATE_DONE_CHECKING_PRIVATE);
+
+ TEST_ASSERT_EQ(waitpid(child_pid, &status, 0), child_pid);
+ TEST_ASSERT(WIFEXITED(status) && WEXITSTATUS(status) == 0,
+ "Child exited with unexpected status");
+
+ pthread_mutex_destroy(&sync->mutex);
+ pthread_cond_destroy(&sync->cond);
+ kvm_munmap(sync, sizeof(*sync));
+
+#undef TEST_STATE_SET
+#undef TEST_STATE_AWAIT
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 36/46] KVM: selftests: Test that truncation does not change shared/private status
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Add a test to verify that deallocating a page in a guest memfd region via
fallocate() with FALLOC_FL_PUNCH_HOLE does not alter the shared or private
status of the corresponding memory range.
When a page backing a guest memfd mapping is deallocated, e.g., by punching
a hole or truncating the file, and then subsequently faulted back in, the
new page must inherit the correct shared/private status tracked by
guest_memfd.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../selftests/kvm/x86/guest_memfd_conversions_test.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
index 0b024fb7227f0..f03af2c46426f 100644
--- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -10,6 +10,7 @@
#include <linux/sizes.h>
#include "kvm_util.h"
+#include "kvm_syscalls.h"
#include "kselftest_harness.h"
#include "test_util.h"
#include "ucall_common.h"
@@ -309,6 +310,19 @@ GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(unallocated_folios, 8)
test_convert_to_shared(t, i, 'B', 'C', 'D');
}
+/* Truncation should not affect shared/private status. */
+GMEM_CONVERSION_TEST_INIT_SHARED(truncate)
+{
+ host_do_rmw(t->mem, 0, 0, 'A');
+ kvm_fallocate(t->gmem_fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+ host_do_rmw(t->mem, 0, 0, 'A');
+
+ test_convert_to_private(t, 0, 'A', 'B');
+
+ kvm_fallocate(t->gmem_fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+ test_private(t, 0, 0, 'A');
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 35/46] KVM: selftests: Convert with allocated folios in different layouts
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Add a guest_memfd selftest to verify that memory conversions work
correctly with allocated folios in different layouts.
By iterating through which pages are initially faulted, the test covers
various layouts of contiguous allocated and unallocated regions, exercising
conversion with different range layouts.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../kvm/x86/guest_memfd_conversions_test.c | 30 ++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
index b43ac196330f1..0b024fb7227f0 100644
--- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -279,6 +279,36 @@ GMEM_CONVERSION_TEST_INIT_PRIVATE(before_allocation_private)
test_convert_to_shared(t, 0, 0, 'A', 'B');
}
+/*
+ * Test that when some of the folios in the conversion range are allocated,
+ * conversion requests are handled correctly in guest_memfd. Vary the ranges
+ * allocated before conversion, using test_page, to cover various layouts of
+ * contiguous allocated and unallocated regions.
+ */
+GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(unallocated_folios, 8)
+{
+ const int second_page_to_fault = 4;
+ int i;
+
+ /*
+ * Fault 2 of the pages to test filemap range operations except when
+ * test_page == second_page_to_fault.
+ */
+ host_do_rmw(t->mem, test_page, 0, 'A');
+ if (test_page != second_page_to_fault)
+ host_do_rmw(t->mem, second_page_to_fault, 0, 'A');
+
+ gmem_set_private(t->gmem_fd, 0, nr_pages * page_size);
+ for (i = 0; i < nr_pages; ++i) {
+ char expected = (i == test_page || i == second_page_to_fault) ? 'A' : 0;
+
+ test_private(t, i, expected, 'B');
+ }
+
+ for (i = 0; i < nr_pages; ++i)
+ test_convert_to_shared(t, i, 'B', 'C', 'D');
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 34/46] KVM: selftests: Test conversion before allocation
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Add two test cases to the guest_memfd conversions selftest to cover
the scenario where a conversion is requested before any memory has been
allocated in the guest_memfd region.
The KVM_SET_MEMORY_ATTRIBUTES2 ioctl can be called on a memory region at
any time. If the guest had not yet faulted in any pages for that region,
the kernel must record the conversion request and apply the requested state
when the pages are eventually allocated.
The new tests cover both conversion directions.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../selftests/kvm/x86/guest_memfd_conversions_test.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
index 8e17d5c08aeb8..b43ac196330f1 100644
--- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -265,6 +265,20 @@ GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(indexing, 4)
#undef combine
}
+/*
+ * Test that even if there are no folios yet, conversion requests are recorded
+ * in guest_memfd.
+ */
+GMEM_CONVERSION_TEST_INIT_SHARED(before_allocation_shared)
+{
+ test_convert_to_private(t, 0, 0, 'A');
+}
+
+GMEM_CONVERSION_TEST_INIT_PRIVATE(before_allocation_private)
+{
+ test_convert_to_shared(t, 0, 0, 'A', 'B');
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 33/46] KVM: selftests: Test conversion precision in guest_memfd
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
The existing guest_memfd conversion tests only use single-page memory
regions. This provides no coverage for multi-page guest_memfd objects,
specifically whether KVM correctly handles the page index for conversion
operations. An incorrect implementation could, for example, always operate
on the first page regardless of the index provided.
Add a new test case to verify that conversions between private and shared
memory correctly target the specified page within a multi-page guest_memfd.
This test also verifies the precision of memory conversions by converting a
single page an then iterating through all other pages ensure they remain in
their original state.
To support this test, add a new GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED
macro that handles setting up and tearing down the VM for each page
iteration. The teardown logic is adjusted to prevent a double-free in this
new scenario.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../kvm/x86/guest_memfd_conversions_test.c | 66 ++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
index 5b070d3374eae..8e17d5c08aeb8 100644
--- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -61,8 +61,13 @@ static void gmem_conversions_do_setup(test_data_t *t, int nr_pages,
static void gmem_conversions_do_teardown(test_data_t *t)
{
+ /* Use NULL to avoid second free in FIXTURE_TEARDOWN (multipage tests). */
+ if (!t->vcpu)
+ return;
+
/* No need to close gmem_fd, it's owned by the VM structure. */
kvm_vm_free(t->vcpu->vm);
+ t->vcpu = NULL;
}
FIXTURE_TEARDOWN(gmem_conversions)
@@ -101,6 +106,29 @@ static void __gmem_conversions_##test(test_data_t *t, int nr_pages) \
#define GMEM_CONVERSION_TEST_INIT_SHARED(test) \
__GMEM_CONVERSION_TEST_INIT_SHARED(test, 1)
+/*
+ * Repeats test over nr_pages in a guest_memfd of size nr_pages, providing each
+ * test iteration with test_page, the index of the page under test in
+ * guest_memfd. test_page takes values 0..(nr_pages - 1) inclusive.
+ */
+#define GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(test, __nr_pages) \
+static void __gmem_conversions_multipage_##test(test_data_t *t, int nr_pages, \
+ const int test_page); \
+ \
+TEST_F(gmem_conversions, test) \
+{ \
+ const u64 flags = GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED; \
+ int i; \
+ \
+ for (i = 0; i < __nr_pages; ++i) { \
+ gmem_conversions_do_setup(self, __nr_pages, flags); \
+ __gmem_conversions_multipage_##test(self, __nr_pages, i); \
+ gmem_conversions_do_teardown(self); \
+ } \
+} \
+static void __gmem_conversions_multipage_##test(test_data_t *t, int nr_pages, \
+ const int test_page)
+
struct guest_check_data {
void *mem;
char expected_val;
@@ -199,6 +227,44 @@ GMEM_CONVERSION_TEST_INIT_SHARED(init_shared)
test_convert_to_shared(t, 0, 'C', 'D', 'E');
}
+GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(indexing, 4)
+{
+ int i;
+
+ /* Get a char that varies with both i and n. */
+#define combine(x, n) ((x << 4) + (n))
+#define i_(n) (combine(i, n))
+#define t_(n) (combine(test_page, n))
+
+ /*
+ * Start with the highest index, to catch any errors when, perhaps, the
+ * first page is returned even for the last index.
+ */
+ for (i = nr_pages - 1; i >= 0; --i)
+ test_shared(t, i, 0, i_(0), i_(2));
+
+ test_convert_to_private(t, test_page, t_(2), t_(3));
+
+ for (i = 0; i < nr_pages; ++i) {
+ if (i == test_page)
+ test_private(t, test_page, t_(3), t_(4));
+ else
+ test_shared(t, i, i_(2), i_(3), i_(4));
+ }
+
+ test_convert_to_shared(t, test_page, t_(4), t_(5), t_(6));
+
+ for (i = 0; i < nr_pages; ++i) {
+ char expected = i == test_page ? t_(6) : i_(4);
+
+ test_shared(t, i, expected, i_(7), i_(8));
+ }
+
+#undef t_
+#undef i_
+#undef combine
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 31/46] KVM: selftests: Test basic single-page conversion flow
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Add a selftest for the guest_memfd memory attribute conversion ioctls.
The test starts the guest_memfd as all-private (the default state), and
verifies the basic flow of converting a single page to shared and then back
to private.
Add infrastructure that supports extensions to other conversion flow
tests. This infrastructure will be used in upcoming patches for other
conversion tests.
Add test as an x86-specific test since guest_memfd's testing
vehicle (KVM_X86_SW_PROTECTED_VM) is x86-specific.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../kvm/x86/guest_memfd_conversions_test.c | 199 +++++++++++++++++++++
2 files changed, 200 insertions(+)
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4ace12606e937..b0e64a6dde21a 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
TEST_GEN_PROGS_x86 += x86/aperfmperf_test
+TEST_GEN_PROGS_x86 += x86/guest_memfd_conversions_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
new file mode 100644
index 0000000000000..8e09e241723e5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Google LLC.
+ */
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/align.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "kselftest_harness.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+FIXTURE(gmem_conversions) {
+ struct kvm_vcpu *vcpu;
+ int gmem_fd;
+ /* HVA of the first byte of the memory mmap()-ed from gmem_fd. */
+ char *mem;
+};
+
+typedef FIXTURE_DATA(gmem_conversions) test_data_t;
+
+FIXTURE_SETUP(gmem_conversions) { }
+
+static size_t page_size;
+
+static void guest_do_rmw(void);
+#define GUEST_MEMFD_SHARING_TEST_GVA 0x90000000ULL
+
+/*
+ * Defer setup until the individual test is invoked so that tests can specify
+ * the number of pages and flags for the guest_memfd instance.
+ */
+static void gmem_conversions_do_setup(test_data_t *t, int nr_pages,
+ int gmem_flags)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+ };
+ /*
+ * Use high GPA above APIC_DEFAULT_PHYS_BASE to avoid clashing with
+ * APIC_DEFAULT_PHYS_BASE.
+ */
+ const gpa_t gpa = SZ_4G;
+ const u32 slot = 1;
+ struct kvm_vm *vm;
+
+ vm = __vm_create_shape_with_one_vcpu(shape, &t->vcpu, nr_pages, guest_do_rmw);
+
+ vm_mem_add(vm, VM_MEM_SRC_SHMEM, gpa, slot, nr_pages,
+ KVM_MEM_GUEST_MEMFD, -1, 0, gmem_flags);
+
+ t->gmem_fd = kvm_slot_to_fd(vm, slot);
+ t->mem = addr_gpa2hva(vm, gpa);
+ virt_map(vm, GUEST_MEMFD_SHARING_TEST_GVA, gpa, nr_pages);
+}
+
+static void gmem_conversions_do_teardown(test_data_t *t)
+{
+ /* No need to close gmem_fd, it's owned by the VM structure. */
+ kvm_vm_free(t->vcpu->vm);
+}
+
+FIXTURE_TEARDOWN(gmem_conversions)
+{
+ gmem_conversions_do_teardown(self);
+}
+
+/*
+ * In these test definition macros, __nr_pages and nr_pages is used to set up
+ * the total number of pages in the guest_memfd under test. This will be
+ * available in the test definitions as nr_pages.
+ */
+
+#define __GMEM_CONVERSION_TEST(test, __nr_pages, flags) \
+static void __gmem_conversions_##test(test_data_t *t, int nr_pages); \
+ \
+TEST_F(gmem_conversions, test) \
+{ \
+ gmem_conversions_do_setup(self, __nr_pages, flags); \
+ __gmem_conversions_##test(self, __nr_pages); \
+} \
+static void __gmem_conversions_##test(test_data_t *t, int nr_pages) \
+
+#define GMEM_CONVERSION_TEST(test, __nr_pages, flags) \
+ __GMEM_CONVERSION_TEST(test, __nr_pages, (flags) | GUEST_MEMFD_FLAG_MMAP)
+
+#define __GMEM_CONVERSION_TEST_INIT_PRIVATE(test, __nr_pages) \
+ GMEM_CONVERSION_TEST(test, __nr_pages, 0)
+
+#define GMEM_CONVERSION_TEST_INIT_PRIVATE(test) \
+ __GMEM_CONVERSION_TEST_INIT_PRIVATE(test, 1)
+
+struct guest_check_data {
+ void *mem;
+ char expected_val;
+ char write_val;
+};
+static struct guest_check_data guest_data;
+
+static void guest_do_rmw(void)
+{
+ for (;;) {
+ char *mem = READ_ONCE(guest_data.mem);
+
+ GUEST_ASSERT_EQ(READ_ONCE(*mem), READ_ONCE(guest_data.expected_val));
+ WRITE_ONCE(*mem, READ_ONCE(guest_data.write_val));
+
+ GUEST_SYNC(0);
+ }
+}
+
+static void run_guest_do_rmw(struct kvm_vcpu *vcpu, u64 pgoff,
+ char expected_val, char write_val)
+{
+ struct ucall uc;
+ int r;
+
+ guest_data.mem = (void *)GUEST_MEMFD_SHARING_TEST_GVA + pgoff * page_size;
+ guest_data.expected_val = expected_val;
+ guest_data.write_val = write_val;
+ sync_global_to_guest(vcpu->vm, guest_data);
+
+ do {
+ r = __vcpu_run(vcpu);
+ } while (r == -1 && errno == EINTR);
+
+ TEST_ASSERT_EQ(r, 0);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ }
+}
+
+static void host_do_rmw(char *mem, u64 pgoff, char expected_val,
+ char write_val)
+{
+ TEST_ASSERT_EQ(READ_ONCE(mem[pgoff * page_size]), expected_val);
+ WRITE_ONCE(mem[pgoff * page_size], write_val);
+}
+
+static void test_private(test_data_t *t, u64 pgoff, char starting_val,
+ char write_val)
+{
+ TEST_EXPECT_SIGBUS(WRITE_ONCE(t->mem[pgoff * page_size], write_val));
+ run_guest_do_rmw(t->vcpu, pgoff, starting_val, write_val);
+ TEST_EXPECT_SIGBUS(READ_ONCE(t->mem[pgoff * page_size]));
+}
+
+static void test_convert_to_private(test_data_t *t, u64 pgoff,
+ char starting_val, char write_val)
+{
+ gmem_set_private(t->gmem_fd, pgoff * page_size, page_size);
+ test_private(t, pgoff, starting_val, write_val);
+}
+
+static void test_shared(test_data_t *t, u64 pgoff, char starting_val,
+ char host_write_val, char write_val)
+{
+ host_do_rmw(t->mem, pgoff, starting_val, host_write_val);
+ run_guest_do_rmw(t->vcpu, pgoff, host_write_val, write_val);
+ TEST_ASSERT_EQ(READ_ONCE(t->mem[pgoff * page_size]), write_val);
+}
+
+static void test_convert_to_shared(test_data_t *t, u64 pgoff,
+ char starting_val, char host_write_val,
+ char write_val)
+{
+ gmem_set_shared(t->gmem_fd, pgoff * page_size, page_size);
+ test_shared(t, pgoff, starting_val, host_write_val, write_val);
+}
+
+GMEM_CONVERSION_TEST_INIT_PRIVATE(init_private)
+{
+ test_private(t, 0, 0, 'A');
+ test_convert_to_shared(t, 0, 'A', 'B', 'C');
+ test_convert_to_private(t, 0, 'C', 'E');
+}
+
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES) &
+ KVM_MEMORY_ATTRIBUTE_PRIVATE);
+
+ page_size = getpagesize();
+
+ return test_harness_run(argc, argv);
+}
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 32/46] KVM: selftests: Test conversion flow when INIT_SHARED
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Add a test case to verify that conversions between private and shared
memory work correctly when the memory is initially created as shared.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../testing/selftests/kvm/x86/guest_memfd_conversions_test.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
index 8e09e241723e5..5b070d3374eae 100644
--- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
@@ -95,6 +95,12 @@ static void __gmem_conversions_##test(test_data_t *t, int nr_pages) \
#define GMEM_CONVERSION_TEST_INIT_PRIVATE(test) \
__GMEM_CONVERSION_TEST_INIT_PRIVATE(test, 1)
+#define __GMEM_CONVERSION_TEST_INIT_SHARED(test, __nr_pages) \
+ GMEM_CONVERSION_TEST(test, __nr_pages, GUEST_MEMFD_FLAG_INIT_SHARED)
+
+#define GMEM_CONVERSION_TEST_INIT_SHARED(test) \
+ __GMEM_CONVERSION_TEST_INIT_SHARED(test, 1)
+
struct guest_check_data {
void *mem;
char expected_val;
@@ -186,6 +192,12 @@ GMEM_CONVERSION_TEST_INIT_PRIVATE(init_private)
test_convert_to_private(t, 0, 'C', 'E');
}
+GMEM_CONVERSION_TEST_INIT_SHARED(init_shared)
+{
+ test_shared(t, 0, 0, 'A', 'B');
+ test_convert_to_private(t, 0, 'B', 'C');
+ test_convert_to_shared(t, 0, 'C', 'D', 'E');
+}
int main(int argc, char *argv[])
{
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 30/46] KVM: selftests: Add helpers for calling ioctls on guest_memfd
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
Add helper functions to kvm_util.h to support calling ioctls, specifically
KVM_SET_MEMORY_ATTRIBUTES2, on a guest_memfd file descriptor.
Introduce gmem_ioctl() and __gmem_ioctl() macros, modeled after the
existing vm_ioctl() helpers, to provide a standard way to call ioctls
on a guest_memfd.
Add gmem_set_memory_attributes() and its derivatives (gmem_set_private(),
gmem_set_shared()) to set memory attributes on a guest_memfd region.
Also provide "__" variants that return the ioctl error code instead of
aborting the test. These helpers will be used by upcoming guest_memfd
tests.
To avoid code duplication, factor out the check for supported memory
attributes into a new macro, TEST_ASSERT_SUPPORTED_ATTRIBUTES, and use
it in both the existing vm_set_memory_attributes() and the new
gmem_set_memory_attributes() helpers.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/include/kvm_util.h | 94 +++++++++++++++++++++++---
1 file changed, 86 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 0cacf3698b259..323d06b5699ec 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -392,6 +392,16 @@ static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
})
+#define __gmem_ioctl(gmem_fd, cmd, arg) \
+ kvm_do_ioctl(gmem_fd, cmd, arg)
+
+#define gmem_ioctl(gmem_fd, cmd, arg) \
+({ \
+ int ret = __gmem_ioctl(gmem_fd, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
+})
+
/*
* Looks up and returns the value corresponding to the capability
* (KVM_CAP_*) given by cap.
@@ -418,8 +428,16 @@ static inline void vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0)
vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
}
+/*
+ * KVM_SET_MEMORY_ATTRIBUTES{,2} overwrites _all_ attributes. These
+ * flows need significant enhancements to support multiple attributes.
+ */
+#define TEST_ASSERT_SUPPORTED_ATTRIBUTES(attributes) \
+ TEST_ASSERT(!(attributes) || (attributes) == KVM_MEMORY_ATTRIBUTE_PRIVATE, \
+ "Update me to support multiple attributes!")
+
static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
- u64 size, u64 attributes)
+ size_t size, u64 attributes)
{
struct kvm_memory_attributes attr = {
.attributes = attributes,
@@ -428,17 +446,11 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
.flags = 0,
};
- /*
- * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
- * need significant enhancements to support multiple attributes.
- */
- TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
- "Update me to support multiple attributes!");
+ TEST_ASSERT_SUPPORTED_ATTRIBUTES(attributes);
vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
}
-
static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa,
u64 size)
{
@@ -451,6 +463,72 @@ static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa,
vm_set_memory_attributes(vm, gpa, size, 0);
}
+static inline int __gmem_set_memory_attributes(int fd, u64 offset,
+ size_t size, u64 attributes,
+ u64 *error_offset)
+{
+ struct kvm_memory_attributes2 attr = {
+ .attributes = attributes,
+ .offset = offset,
+ .size = size,
+ .flags = 0,
+ .error_offset = 0,
+ };
+ int r;
+
+ r = __gmem_ioctl(fd, KVM_SET_MEMORY_ATTRIBUTES2, &attr);
+
+ /* Copy error_offset regardless of r so caller can check. */
+ if (error_offset)
+ *error_offset = attr.error_offset;
+
+ return r;
+}
+
+static inline int __gmem_set_private(int fd, u64 offset, size_t size,
+ u64 *error_offset)
+{
+ return __gmem_set_memory_attributes(fd, offset, size,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ error_offset);
+}
+
+static inline int __gmem_set_shared(int fd, u64 offset, size_t size,
+ u64 *error_offset)
+{
+ return __gmem_set_memory_attributes(fd, offset, size, 0,
+ error_offset);
+}
+
+static inline void gmem_set_memory_attributes(int fd, u64 offset,
+ size_t size, u64 attributes)
+{
+ struct kvm_memory_attributes2 attr = {
+ .attributes = attributes,
+ .offset = offset,
+ .size = size,
+ .flags = 0,
+ };
+
+ TEST_ASSERT_SUPPORTED_ATTRIBUTES(attributes);
+
+ __TEST_REQUIRE(kvm_check_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES) > 0,
+ "No valid attributes for guest_memfd ioctl!");
+
+ gmem_ioctl(fd, KVM_SET_MEMORY_ATTRIBUTES2, &attr);
+}
+
+static inline void gmem_set_private(int fd, u64 offset, size_t size)
+{
+ gmem_set_memory_attributes(fd, offset, size,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+static inline void gmem_set_shared(int fd, u64 offset, size_t size)
+{
+ gmem_set_memory_attributes(fd, offset, size, 0);
+}
+
void vm_guest_mem_fallocate(struct kvm_vm *vm, gpa_t gpa, u64 size,
bool punch_hole);
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 25/46] KVM: guest_memfd: Enable INIT_SHARED on guest_memfd for x86 Coco VMs
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
Now that guest_memfd supports tracking private vs. shared within gmem
itself, allow userspace to specify INIT_SHARED on a guest_memfd instance
for x86 Confidential Computing (CoCo) VMs, so long as in-place conversion
is enabled, i.e. when it's actually possible for a guest_memfd instance to
contain shared memory.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
arch/x86/kvm/x86.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2fde594e86d72..57a543dadb851 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -14116,14 +14116,15 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_KVM_GUEST_MEMFD
-/*
- * KVM doesn't yet support initializing guest_memfd memory as shared for VMs
- * with private memory (the private vs. shared tracking needs to be moved into
- * guest_memfd).
- */
bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm)
{
- return !kvm_arch_has_private_mem(kvm);
+ /*
+ * INIT_SHARED is supported if in-place conversion is enabled, or if
+ * the VM doesn't support private memory. If the VM has private memory
+ * and in-place conversion is disabled, then guest_memfd can _only_ be
+ * used for private memory.
+ */
+ return gmem_in_place_conversion || !kvm_arch_has_private_mem(kvm);
}
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 29/46] KVM: selftests: Add selftests global for guest memory attributes capability
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
Add a global variable, kvm_has_gmem_attributes, to make the result of
checking for KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES available to all tests.
kvm_has_gmem_attributes is true if guest_memfd tracks memory attributes, as
opposed to VM-level tracking.
This global variable is synced to the guest for testing convenience, to
avoid introducing subtle bugs when host/guest state is desynced.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/include/test_util.h | 2 ++
tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++
2 files changed, 7 insertions(+)
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index a56271c237ae9..51287fac8138a 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -115,6 +115,8 @@ struct guest_random_state {
extern u32 guest_random_seed;
extern struct guest_random_state guest_rng;
+extern bool kvm_has_gmem_attributes;
+
struct guest_random_state new_guest_random_state(u32 seed);
u32 guest_random_u32(struct guest_random_state *state);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index d5bbc80b2bf1c..b73817f7bc803 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -24,6 +24,8 @@ u32 guest_random_seed;
struct guest_random_state guest_rng;
static u32 last_guest_seed;
+bool kvm_has_gmem_attributes;
+
static size_t vcpu_mmap_sz(void);
int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
@@ -521,6 +523,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus,
}
guest_rng = new_guest_random_state(guest_random_seed);
sync_global_to_guest(vm, guest_rng);
+ sync_global_to_guest(vm, kvm_has_gmem_attributes);
kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
@@ -2286,6 +2289,8 @@ void __attribute((constructor)) kvm_selftest_init(void)
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
+ kvm_has_gmem_attributes = kvm_has_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES);
+
kvm_selftest_arch_init();
}
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 26/46] KVM: selftests: Create gmem fd before "regular" fd when adding memslot
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
When adding a memslot associated a guest_memfd instance, create/dup the
guest_memfd before creating the "normal" backing file. This will allow
dup'ing the gmem fd as the normal fd when guest_memfd supports mmap(),
i.e. to make guest_memfd the _only_ backing source for the memslot.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/lib/kvm_util.c | 45 +++++++++++++++---------------
1 file changed, 23 insertions(+), 22 deletions(-)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 195f3fdae1e39..2dd87c903ede6 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1053,6 +1053,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (alignment > 1)
region->mmap_size += alignment;
+ if (flags & KVM_MEM_GUEST_MEMFD) {
+ if (guest_memfd < 0) {
+ u32 guest_memfd_flags = 0;
+
+ TEST_ASSERT(!guest_memfd_offset,
+ "Offset must be zero when creating new guest_memfd");
+ guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ } else {
+ /*
+ * Install a unique fd for each memslot so that the fd
+ * can be closed when the region is deleted without
+ * needing to track if the fd is owned by the framework
+ * or by the caller.
+ */
+ guest_memfd = kvm_dup(guest_memfd);
+ }
+
+ region->region.guest_memfd = guest_memfd;
+ region->region.guest_memfd_offset = guest_memfd_offset;
+ } else {
+ region->region.guest_memfd = -1;
+ }
+
region->fd = -1;
if (backing_src_is_shared(src_type))
region->fd = kvm_memfd_alloc(region->mmap_size,
@@ -1082,28 +1105,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->backing_src_type = src_type;
- if (flags & KVM_MEM_GUEST_MEMFD) {
- if (guest_memfd < 0) {
- u32 guest_memfd_flags = 0;
- TEST_ASSERT(!guest_memfd_offset,
- "Offset must be zero when creating new guest_memfd");
- guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
- } else {
- /*
- * Install a unique fd for each memslot so that the fd
- * can be closed when the region is deleted without
- * needing to track if the fd is owned by the framework
- * or by the caller.
- */
- guest_memfd = kvm_dup(guest_memfd);
- }
-
- region->region.guest_memfd = guest_memfd;
- region->region.guest_memfd_offset = guest_memfd_offset;
- } else {
- region->region.guest_memfd = -1;
- }
-
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 27/46] KVM: selftests: Rename guest_memfd{,_offset} to gmem_{fd,offset}
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
Rename local variables and function parameters for the guest memory file
descriptor and its offset to use a "gmem_" prefix instead of
"guest_memfd_".
No functional change intended.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/include/kvm_util.h | 6 +++---
tools/testing/selftests/kvm/lib/kvm_util.c | 26 +++++++++++++-------------
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 04a910164a296..d4c104cb0418f 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -690,17 +690,17 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags,
gpa_t gpa, u64 size, void *hva);
void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
gpa_t gpa, u64 size, void *hva,
- u32 guest_memfd, u64 guest_memfd_offset);
+ u32 gmem_fd, u64 gmem_offset);
int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
gpa_t gpa, u64 size, void *hva,
- u32 guest_memfd, u64 guest_memfd_offset);
+ u32 gmem_fd, u64 gmem_offset);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
gpa_t gpa, u32 slot, u64 npages, u32 flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
gpa_t gpa, u32 slot, u64 npages, u32 flags,
- int guest_memfd_fd, u64 guest_memfd_offset);
+ int gmem_fd, u64 gmem_offset);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 2dd87c903ede6..9b482778f7379 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -946,7 +946,7 @@ void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags,
int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
gpa_t gpa, u64 size, void *hva,
- u32 guest_memfd, u64 guest_memfd_offset)
+ u32 gmem_fd, u64 gmem_offset)
{
struct kvm_userspace_memory_region2 region = {
.slot = slot,
@@ -954,8 +954,8 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
.guest_phys_addr = gpa,
.memory_size = size,
.userspace_addr = (uintptr_t)hva,
- .guest_memfd = guest_memfd,
- .guest_memfd_offset = guest_memfd_offset,
+ .guest_memfd = gmem_fd,
+ .guest_memfd_offset = gmem_offset,
};
TEST_REQUIRE_SET_USER_MEMORY_REGION2();
@@ -965,10 +965,10 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
gpa_t gpa, u64 size, void *hva,
- u32 guest_memfd, u64 guest_memfd_offset)
+ u32 gmem_fd, u64 gmem_offset)
{
int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
- guest_memfd, guest_memfd_offset);
+ gmem_fd, gmem_offset);
TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",
errno, strerror(errno));
@@ -978,7 +978,7 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
gpa_t gpa, u32 slot, u64 npages, u32 flags,
- int guest_memfd, u64 guest_memfd_offset)
+ int gmem_fd, u64 gmem_offset)
{
int ret;
struct userspace_mem_region *region;
@@ -1054,12 +1054,12 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->mmap_size += alignment;
if (flags & KVM_MEM_GUEST_MEMFD) {
- if (guest_memfd < 0) {
- u32 guest_memfd_flags = 0;
+ if (gmem_fd < 0) {
+ u32 gmem_flags = 0;
- TEST_ASSERT(!guest_memfd_offset,
+ TEST_ASSERT(!gmem_offset,
"Offset must be zero when creating new guest_memfd");
- guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ gmem_fd = vm_create_guest_memfd(vm, mem_size, gmem_flags);
} else {
/*
* Install a unique fd for each memslot so that the fd
@@ -1067,11 +1067,11 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
* needing to track if the fd is owned by the framework
* or by the caller.
*/
- guest_memfd = kvm_dup(guest_memfd);
+ gmem_fd = kvm_dup(gmem_fd);
}
- region->region.guest_memfd = guest_memfd;
- region->region.guest_memfd_offset = guest_memfd_offset;
+ region->region.guest_memfd = gmem_fd;
+ region->region.guest_memfd_offset = gmem_offset;
} else {
region->region.guest_memfd = -1;
}
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 28/46] KVM: selftests: Add support for mmap() on guest_memfd in core library
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
Accept gmem_flags in vm_mem_add() to be able to create a guest_memfd within
vm_mem_add().
When vm_mem_add() is used to set up a guest_memfd for a memslot, set up the
provided (or created) gmem_fd as the fd for the user memory region. This
makes it available to be mmap()-ed from just like fds from other memory
sources. mmap() from guest_memfd using the provided gmem_flags and
gmem_offset.
Add a kvm_slot_to_fd() helper to provide convenient access to the file
descriptor of a memslot.
Update existing callers of vm_mem_add() to pass 0 for gmem_flags to
preserve existing behavior.
Signed-off-by: Sean Christopherson <seanjc@google.com>
[For guest_memfds, mmap() using gmem_offset instead of 0 all the time.]
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/include/kvm_util.h | 7 +++++-
tools/testing/selftests/kvm/lib/kvm_util.c | 27 ++++++++++++----------
.../kvm/x86/private_mem_conversions_test.c | 2 +-
3 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index d4c104cb0418f..0cacf3698b259 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -700,7 +700,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
gpa_t gpa, u32 slot, u64 npages, u32 flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
gpa_t gpa, u32 slot, u64 npages, u32 flags,
- int gmem_fd, u64 gmem_offset);
+ int gmem_fd, u64 gmem_offset, u64 gmem_flags);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -732,6 +732,11 @@ void *addr_gva2hva(struct kvm_vm *vm, gva_t gva);
gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa);
+static inline int kvm_slot_to_fd(struct kvm_vm *vm, u32 slot)
+{
+ return memslot2region(vm, slot)->fd;
+}
+
#ifndef vcpu_arch_put_guest
#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0)
#endif
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9b482778f7379..d5bbc80b2bf1c 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -978,12 +978,13 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
gpa_t gpa, u32 slot, u64 npages, u32 flags,
- int gmem_fd, u64 gmem_offset)
+ int gmem_fd, u64 gmem_offset, u64 gmem_flags)
{
int ret;
struct userspace_mem_region *region;
size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
size_t mem_size = npages * vm->page_size;
+ off_t mmap_offset = 0;
size_t alignment = 1;
TEST_REQUIRE_SET_USER_MEMORY_REGION2();
@@ -1055,8 +1056,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (flags & KVM_MEM_GUEST_MEMFD) {
if (gmem_fd < 0) {
- u32 gmem_flags = 0;
-
TEST_ASSERT(!gmem_offset,
"Offset must be zero when creating new guest_memfd");
gmem_fd = vm_create_guest_memfd(vm, mem_size, gmem_flags);
@@ -1077,13 +1076,17 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
}
region->fd = -1;
- if (backing_src_is_shared(src_type))
+ if (flags & KVM_MEM_GUEST_MEMFD && gmem_flags & GUEST_MEMFD_FLAG_MMAP) {
+ region->fd = kvm_dup(gmem_fd);
+ mmap_offset = gmem_offset;
+ } else if (backing_src_is_shared(src_type)) {
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
+ }
- region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd);
+ region->mmap_start = __kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, mmap_offset);
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1129,10 +1132,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
/* If shared memory, create an alias. */
if (region->fd >= 0) {
- region->mmap_alias = kvm_mmap(region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd);
+ region->mmap_alias = __kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, mmap_offset);
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1143,7 +1146,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
gpa_t gpa, u32 slot, u64 npages, u32 flags)
{
- vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0, 0);
}
/*
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 1d2f5d4fd45d7..861baff201e78 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -399,7 +399,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_v
for (i = 0; i < nr_memslots; i++)
vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
BASE_DATA_SLOT + i, slot_size / vm->page_size,
- KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+ KVM_MEM_GUEST_MEMFD, memfd, slot_size * i, 0);
for (i = 0; i < nr_vcpus; i++) {
gpa_t gpa = BASE_DATA_GPA + i * per_cpu_size;
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 21/46] KVM: guest_memfd: Zero page while getting pfn
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Move the folio initialization logic from kvm_gmem_get_pfn() into
__kvm_gmem_get_pfn() to also zero pages if the page is to be used in
kvm_gmem_populate().
With in-place conversion, the existing data in a guest_memfd page can be
populated into guest memory through platform-specific ioctls.
Without first zeroing the page obtained using __kvm_gmem_get_pfn(), it
might contain uninitialized host memory, which would leak to the guest if
the populate completes.
guest_memfd pages are zeroed at most once in the page's entire lifetime
with guest_memfd, and that is tracked using the uptodate flag.
Zeroing the page in __kvm_gmem_get_pfn() is chosen over zeroing in
kvm_gmem_get_folio() since other flows, such as a future write() syscall,
can get a page, write to the page and then set page uptodate without
zeroing.
This aligns with the concept of zeroing before first use - the other place
where zeroing happens is in kvm_gmem_fault_user_mapping().
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 90bc1a26512b6..86c9f5b0863cb 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -1137,6 +1137,11 @@ static struct folio *__kvm_gmem_get_pfn(struct file *file,
return ERR_PTR(-EHWPOISON);
}
+ if (!folio_test_uptodate(folio)) {
+ clear_highpage(folio_page(folio, 0));
+ folio_mark_uptodate(folio);
+ }
+
*pfn = folio_file_pfn(folio, index);
if (max_order)
*max_order = 0;
@@ -1166,11 +1171,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
goto out;
}
- if (!folio_test_uptodate(folio)) {
- clear_highpage(folio_page(folio, 0));
- folio_mark_uptodate(folio);
- }
-
if (kvm_gmem_is_private_mem(inode, index))
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 24/46] KVM: guest_memfd: Make in-place conversion the default
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Make in-place conversion the default if the arch has private mem.
The default can be overridden at compile type by enabling
CONFIG_KVM_VM_MEMORY_ATTRIBUTES, or at KVM load time through a module
parameter.
In-place conversion also implies tracking a guest's private/shared state in
guest_memfd. To avoid inconsistencies in the way memory attributes are
tracked between the per-VM or by guest_memfd, make the module_param
read-only (0444).
Document that using per-VM attributes for tracking private/shared state of
guest memory is deprecated in favor of tracking in guest_memfd.
Warn if the admin sets gmem_in_place_conversion as false when
CONFIG_KVM_VM_MEMORY_ATTRIBUTES is not enabled. Add warning in the code
path where guest memory is populated for a CoCo VM, since that's the
earliest point in a CoCo VM's lifecycle where memory attributes are
queried. Unlike other query sites, this site is exclusively used by CoCo
VMs.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/Kconfig | 7 ++++++-
virt/kvm/guest_memfd.c | 5 +++++
virt/kvm/kvm_main.c | 3 ++-
3 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c28393dc664eb..a3c189d765150 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -85,7 +85,12 @@ config KVM_VM_MEMORY_ATTRIBUTES
bool "Enable per-VM PRIVATE vs. SHARED attributes (for CoCo VMs)"
help
Enable support for tracking PRIVATE vs. SHARED memory using per-VM
- memory attributes.
+ memory attributes. Using per-VM attributes are deprecated in favor
+ of tracking PRIVATE state in guest_memfd. Select this if you need
+ to run CoCo VMs using a VMM that doesn't support guest_memfd memory
+ attributes.
+
+ If unsure, say N.
config KVM_SW_PROTECTED_VM
bool "Enable support for KVM software-protected VMs"
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 86c9f5b0863cb..5cb73543c03c8 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -1193,10 +1193,15 @@ static bool kvm_gmem_range_is_private(struct file *file, pgoff_t index,
{
struct maple_tree *mt = &GMEM_I(file_inode(file))->attributes;
+#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
if (!gmem_in_place_conversion)
return kvm_range_has_vm_memory_attributes(kvm, gfn, gfn + nr_pages,
KVM_MEMORY_ATTRIBUTE_PRIVATE,
KVM_MEMORY_ATTRIBUTE_PRIVATE);
+#else
+ if (WARN_ON_ONCE(!gmem_in_place_conversion))
+ return false;
+#endif
return kvm_gmem_range_has_attributes(mt, index, nr_pages,
KVM_MEMORY_ATTRIBUTE_PRIVATE);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd1d18a1d2f68..46e92b5dc3804 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,7 +102,8 @@ static bool __ro_after_init allow_unsafe_mappings;
module_param(allow_unsafe_mappings, bool, 0444);
#ifdef kvm_arch_has_private_mem
-bool __ro_after_init gmem_in_place_conversion = false;
+bool __ro_after_init gmem_in_place_conversion = !IS_ENABLED(CONFIG_KVM_VM_MEMORY_ATTRIBUTES);
+module_param(gmem_in_place_conversion, bool, 0444);
EXPORT_SYMBOL_FOR_KVM_INTERNAL(gmem_in_place_conversion);
#endif
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 22/46] KVM: SEV: Make 'uaddr' parameter optional for KVM_SEV_SNP_LAUNCH_UPDATE
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Michael Roth <michael.roth@amd.com>
Make the source page for populating an SNP guest_memfd instance optional
if in-place conversion/population is enabled. If KVM can convert the page
in-place, then it's possible for guest memory to be initialized directly
from userspace by mmap()'ing the guest_memfd and writing to it while the
corresponding GPA ranges are in a 'shared' state, before converting them
to the 'private' state expected by KVM_SEV_SNP_LAUNCH_UPDATE.
Update the handling/documentation for KVM_SEV_SNP_LAUNCH_UPDATE to allow
for 'uaddr' to be set to NULL when in-place conversion is enabled, which
SNP_LAUNCH_UPDATE will then use to determine when it should/shouldn't
copy in data from a separate memory location. Continue to enforce
non-NULL when PRIVATE is tracked per-VM, not per-guest_memfd.
Signed-off-by: Michael Roth <michael.roth@amd.com>
[Added src_page check in error handling path when the firmware command fails]
[Dropped ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES]
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
[sean: drop explicit vm_memory_attributes references]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
Documentation/virt/kvm/x86/amd-memory-encryption.rst | 13 +++++++++----
arch/x86/kvm/svm/sev.c | 16 +++++++++++-----
virt/kvm/kvm_main.c | 1 +
3 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index bd04a908a8dbd..29409297f1ef0 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -503,7 +503,8 @@ secrets.
It is required that the GPA ranges initialized by this command have had the
KVM_MEMORY_ATTRIBUTE_PRIVATE attribute set in advance. See the documentation
-for KVM_SET_MEMORY_ATTRIBUTES for more details on this aspect.
+for KVM_SET_MEMORY_ATTRIBUTES/KVM_SET_MEMORY_ATTRIBUTES2 for more details on
+this aspect.
Upon success, this command is not guaranteed to have processed the entire
range requested. Instead, the ``gfn_start``, ``uaddr``, and ``len`` fields of
@@ -511,9 +512,13 @@ range requested. Instead, the ``gfn_start``, ``uaddr``, and ``len`` fields of
remaining range that has yet to be processed. The caller should continue
calling this command until those fields indicate the entire range has been
processed, e.g. ``len`` is 0, ``gfn_start`` is equal to the last GFN in the
-range plus 1, and ``uaddr`` is the last byte of the userspace-provided source
-buffer address plus 1. In the case where ``type`` is KVM_SEV_SNP_PAGE_TYPE_ZERO,
-``uaddr`` will be ignored completely.
+range plus 1, and ``uaddr`` (if specified) is the last byte of the
+userspace-provided source buffer address plus 1.
+
+In the case where ``type`` is KVM_SEV_SNP_PAGE_TYPE_ZERO, ``uaddr`` will be
+ignored completely. For all other page types, ``uaddr`` is optional if in-place
+conversion is enable, i.e. when the destination can also be the source, and is
+required if in-place conversion is disabled.
Parameters (in): struct kvm_sev_snp_launch_update
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 74fb15551e83f..2b7569b6a8609 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2330,7 +2330,13 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
int level;
int ret;
- if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src_page))
+ /*
+ * A source page is required if in-place conversion isn't enabled, as
+ * the data needs to come from a separate physical page. Zero pages
+ * are exempt as they don't consume a source page.
+ */
+ if (!gmem_in_place_conversion &&
+ sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src_page)
return -EINVAL;
ret = snp_lookup_rmpentry((u64)pfn, &assigned, &level);
@@ -2377,7 +2383,7 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
*/
if (ret && !snp_page_reclaim(kvm, pfn) &&
sev_populate_args->type == KVM_SEV_SNP_PAGE_TYPE_CPUID &&
- sev_populate_args->fw_error == SEV_RET_INVALID_PARAM) {
+ sev_populate_args->fw_error == SEV_RET_INVALID_PARAM && src_page) {
void *src_vaddr = kmap_local_page(src_page);
void *dst_vaddr = kmap_local_pfn(pfn);
@@ -2410,8 +2416,8 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
- pr_debug("%s: GFN start 0x%llx length 0x%llx type %d flags %d\n", __func__,
- params.gfn_start, params.len, params.type, params.flags);
+ pr_debug("%s: GFN start 0x%llx length 0x%llx type %d flags %d src %llx\n", __func__,
+ params.gfn_start, params.len, params.type, params.flags, params.uaddr);
if (!params.len || !PAGE_ALIGNED(params.len) || params.flags ||
(params.type != KVM_SEV_SNP_PAGE_TYPE_NORMAL &&
@@ -2468,7 +2474,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
params.gfn_start += count;
params.len -= count * PAGE_SIZE;
- if (params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO)
+ if (src && params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO)
params.uaddr += count * PAGE_SIZE;
if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params)))
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 044486f128c37..dd1d18a1d2f68 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -103,6 +103,7 @@ module_param(allow_unsafe_mappings, bool, 0444);
#ifdef kvm_arch_has_private_mem
bool __ro_after_init gmem_in_place_conversion = false;
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(gmem_in_place_conversion);
#endif
#define MEMORY_ATTRIBUTES_MATCH(one, two) \
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 23/46] KVM: TDX: Make source page optional for KVM_TDX_INIT_MEM_REGION
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:32 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Update tdx_gmem_post_populate() to handle cases where a source page is
not explicitly provided. Instead of returning -EOPNOTSUPP when src_page
is NULL, default to using the page associated with the destination PFN.
This change allows for in-place memory conversion where the data is
already present in the target PFN, ensuring the TDX module has a valid
source page reference for the TDH.MEM.PAGE.ADD operation.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
Documentation/virt/kvm/x86/intel-tdx.rst | 4 ++++
arch/x86/kvm/vmx/tdx.c | 11 ++++++++---
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst
index 6a222e9d09541..74357fe87f9ec 100644
--- a/Documentation/virt/kvm/x86/intel-tdx.rst
+++ b/Documentation/virt/kvm/x86/intel-tdx.rst
@@ -158,6 +158,10 @@ KVM_TDX_INIT_MEM_REGION
Initialize @nr_pages TDX guest private memory starting from @gpa with userspace
provided data from @source_addr. @source_addr must be PAGE_SIZE-aligned.
+If guest_memfd in-place conversion is enabled, pass NULL for @source_addr to
+initialize the memory region using memory contents already populated in
+guest_memfd memory.
+
Note, before calling this sub command, memory attribute of the range
[gpa, gpa + nr_pages] needs to be private. Userspace can use
KVM_SET_MEMORY_ATTRIBUTES to set the attribute.
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index ffe9d0db58c59..56d10333c61a7 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -3198,8 +3198,12 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
if (KVM_BUG_ON(kvm_tdx->page_add_src, kvm))
return -EIO;
- if (!src_page)
- return -EOPNOTSUPP;
+ if (!src_page) {
+ if (!gmem_in_place_conversion)
+ return -EOPNOTSUPP;
+
+ src_page = pfn_to_page(pfn);
+ }
kvm_tdx->page_add_src = src_page;
ret = kvm_tdp_mmu_map_private_pfn(arg->vcpu, gfn, pfn);
@@ -3278,7 +3282,8 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c
break;
}
- region.source_addr += PAGE_SIZE;
+ if (region.source_addr)
+ region.source_addr += PAGE_SIZE;
region.gpa += PAGE_SIZE;
region.nr_pages--;
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 20/46] KVM: guest_memfd: Determine invalidation filter from memory attributes
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Before conversion, the range filter doesn't really matter:
+ For non-CoCo VMs that use guest_memfd, they have no mirrored tdp, so
KVM_DIRECT_ROOTS would have been invalidated anyway.
+ CoCo VMs could not use INIT_SHARED, and there's no conversion support, so
always using KVM_FILTER_PRIVATE would have worked.
Now with conversion support, update kvm_gmem_get_invalidate_filter to
inspect the memory attributes maple tree for a given range.
Instead of determining the invalidation filter based on static inode
flags, iterate through the attributes maple tree for the specific range
being invalidated. This allows KVM to identify if the range contains
private pages, shared pages, or both, and set the filter bits
accordingly.
Update kvm_gmem_invalidate_begin and kvm_gmem_release to pass the range
parameters to the filter helper to ensure invalidation accurately
targets the memory types present in the affected range.
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index d72ecbfcc3144..90bc1a26512b6 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -193,12 +193,24 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
return folio;
}
-static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode)
+static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(
+ struct inode *inode, pgoff_t start, pgoff_t end)
{
- if (GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED)
- return KVM_FILTER_SHARED;
+ struct gmem_inode *gi = GMEM_I(inode);
+ enum kvm_gfn_range_filter filter = 0;
+ void *entry;
+
+ lockdep_assert(mt_lock_is_held(&gi->attributes));
+
+ mt_for_each(&gi->attributes, entry, start, end - 1) {
+ filter |= (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) ?
+ KVM_FILTER_PRIVATE : KVM_FILTER_SHARED;
+
+ if (filter == (KVM_FILTER_PRIVATE | KVM_FILTER_SHARED))
+ break;
+ }
- return KVM_FILTER_PRIVATE;
+ return filter;
}
static void __kvm_gmem_invalidate_start(struct gmem_file *f, pgoff_t start,
@@ -244,7 +256,7 @@ static void kvm_gmem_invalidate_start(struct inode *inode, pgoff_t start,
enum kvm_gfn_range_filter attr_filter;
struct gmem_file *f;
- attr_filter = kvm_gmem_get_invalidate_filter(inode);
+ attr_filter = kvm_gmem_get_invalidate_filter(inode, start, end);
kvm_gmem_for_each_file(f, inode)
__kvm_gmem_invalidate_start(f, start, end, attr_filter);
@@ -368,6 +380,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
{
pgoff_t end = i_size_read(inode) >> PAGE_SHIFT;
struct gmem_file *f = file->private_data;
+ enum kvm_gfn_range_filter filter;
struct kvm_memory_slot *slot;
struct kvm *kvm = f->kvm;
unsigned long index;
@@ -397,8 +410,8 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
* Zap all SPTEs pointed at by this file. Do not free the backing
* memory, as its lifetime is associated with the inode, not the file.
*/
- __kvm_gmem_invalidate_start(f, 0, end,
- kvm_gmem_get_invalidate_filter(inode));
+ filter = kvm_gmem_get_invalidate_filter(inode, 0, end);
+ __kvm_gmem_invalidate_start(f, 0, end, filter);
__kvm_gmem_invalidate_end(f, 0, end);
list_del(&f->entry);
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 16/46] KVM: guest_memfd: Return early if range already has requested attributes
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Extract a helper out of kvm_gmem_range_is_private() that checks that a
range has given attributes.
Optimize setting memory attributes by returning early if all pages in the
requested range already has the requested attributes.
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 3c94442bc8131..cec8fa26ece17 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -86,6 +86,23 @@ static bool kvm_gmem_is_shared_mem(struct inode *inode, pgoff_t index)
return !kvm_gmem_is_private_mem(inode, index);
}
+static bool kvm_gmem_range_has_attributes(struct maple_tree *mt,
+ pgoff_t index, size_t nr_pages,
+ u64 attributes)
+{
+ pgoff_t end = index + nr_pages - 1;
+ void *entry;
+
+ lockdep_assert(mt_lock_is_held(mt));
+
+ mt_for_each(mt, entry, index, end) {
+ if (xa_to_value(entry) != attributes)
+ return false;
+ }
+
+ return true;
+}
+
static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
pgoff_t index, struct folio *folio)
{
@@ -653,12 +670,15 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
pgoff_t end = start + nr_pages;
struct maple_tree *mt;
struct ma_state mas;
- int r;
+ int r = 0;
mt = &gi->attributes;
filemap_invalidate_lock(mapping);
+ if (kvm_gmem_range_has_attributes(mt, start, nr_pages, attrs))
+ goto out;
+
mas_init(&mas, mt, start);
r = kvm_gmem_mas_preallocate(&mas, attrs, start, nr_pages);
if (r) {
@@ -1148,19 +1168,14 @@ static bool kvm_gmem_range_is_private(struct file *file, pgoff_t index,
size_t nr_pages, struct kvm *kvm, gfn_t gfn)
{
struct maple_tree *mt = &GMEM_I(file_inode(file))->attributes;
- pgoff_t end = index + nr_pages - 1;
- void *entry;
if (!gmem_in_place_conversion)
return kvm_range_has_vm_memory_attributes(kvm, gfn, gfn + nr_pages,
KVM_MEMORY_ATTRIBUTE_PRIVATE,
KVM_MEMORY_ATTRIBUTE_PRIVATE);
- mt_for_each(mt, entry, index, end) {
- if (xa_to_value(entry) != KVM_MEMORY_ATTRIBUTE_PRIVATE)
- return false;
- }
- return true;
+ return kvm_gmem_range_has_attributes(mt, index, nr_pages,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE);
}
static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot,
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 19/46] KVM: guest_memfd: Use actual size for invalidation in kvm_gmem_release()
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
__kvm_gmem_invalidate_begin() and __kvm_gmem_invalidate_end() actually do
not specially handle -1ul. -1ul is used as a huge number, which legal
indices do not exceed, and hence the invalidation works as expected.
Since a later patch is going to make use of the exact range, calculate the
size of the guest_memfd inode and use it as the end range for invalidating
SPTEs.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index d163559da0235..d72ecbfcc3144 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -366,6 +366,7 @@ static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,
static int kvm_gmem_release(struct inode *inode, struct file *file)
{
+ pgoff_t end = i_size_read(inode) >> PAGE_SHIFT;
struct gmem_file *f = file->private_data;
struct kvm_memory_slot *slot;
struct kvm *kvm = f->kvm;
@@ -396,9 +397,9 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
* Zap all SPTEs pointed at by this file. Do not free the backing
* memory, as its lifetime is associated with the inode, not the file.
*/
- __kvm_gmem_invalidate_start(f, 0, -1ul,
+ __kvm_gmem_invalidate_start(f, 0, end,
kvm_gmem_get_invalidate_filter(inode));
- __kvm_gmem_invalidate_end(f, 0, -1ul);
+ __kvm_gmem_invalidate_end(f, 0, end);
list_del(&f->entry);
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 17/46] KVM: guest_memfd: Advertise KVM_SET_MEMORY_ATTRIBUTES2 ioctl
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Introduce KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES to advertise the
availability of the KVM_SET_MEMORY_ATTRIBUTES2 ioctl.
KVM_SET_MEMORY_ATTRIBUTES2 is a guest_memfd-scoped version of the existing
KVM_SET_MEMORY_ATTRIBUTES VM ioctl. It allows userspace to manage memory
attributes, such as KVM_MEMORY_ATTRIBUTE_PRIVATE, directly on a guest_memfd
file descriptor.
This new version uses struct kvm_memory_attributes2, which adds an
error_offset field to the output. This allows KVM to return the specific
offset that triggered an error, which is especially useful for handling
EAGAIN results caused by transient page reference counts during attribute
conversions.
Update the KVM API documentation to define the new ioctl and its behavior,
and add the necessary UAPI definitions and capability checks.
Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
Documentation/virt/kvm/api.rst | 78 +++++++++++++++++++++++++++++++++++++++++-
include/uapi/linux/kvm.h | 2 ++
virt/kvm/kvm_main.c | 23 +++++++++----
3 files changed, 95 insertions(+), 8 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index a833d90845b95..73878f34f6d2e 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -117,7 +117,7 @@ description:
x86 includes both i386 and x86_64.
Type:
- system, vm, or vcpu.
+ system, vm, vcpu or guest_memfd.
Parameters:
what parameters are accepted by the ioctl.
@@ -6373,6 +6373,8 @@ S390:
Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set.
Returns -EINVAL if called on a protected VM.
+.. _KVM_SET_MEMORY_ATTRIBUTES:
+
4.141 KVM_SET_MEMORY_ATTRIBUTES
-------------------------------
@@ -6566,6 +6568,80 @@ KVM_S390_KEYOP_SSKE
Sets the storage key for the guest address ``guest_addr`` to the key
specified in ``key``, returning the previous value in ``key``.
+4.145 KVM_SET_MEMORY_ATTRIBUTES2
+---------------------------------
+
+:Capability: KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES
+:Architectures: all
+:Type: guest_memfd ioctl
+:Parameters: struct kvm_memory_attributes2 (in/out)
+:Returns: 0 on success, <0 on error
+
+Errors:
+
+ ========== ===============================================================
+ EINVAL The specified `offset` or `size` were invalid (e.g. not
+ page aligned, causes an overflow, or size is zero).
+ EFAULT The parameter address was invalid.
+ EAGAIN Some page within requested range had unexpected refcounts. The
+ offset of the page will be returned in `error_offset`.
+ ENOMEM Ran out of memory trying to track private/shared state
+ ========== ===============================================================
+
+KVM_SET_MEMORY_ATTRIBUTES2 is an extension to
+KVM_SET_MEMORY_ATTRIBUTES that supports returning (writing) values to
+userspace. The original (pre-extension) fields are shared with
+KVM_SET_MEMORY_ATTRIBUTES identically.
+
+Attribute values are shared with KVM_SET_MEMORY_ATTRIBUTES.
+
+::
+
+ struct kvm_memory_attributes2 {
+ /* in */
+ union {
+ __u64 address;
+ __u64 offset;
+ };
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ /* out */
+ __u64 error_offset;
+ __u64 reserved[11];
+ };
+
+ #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
+
+Set attributes for a range of offsets within a guest_memfd to
+KVM_MEMORY_ATTRIBUTE_PRIVATE to limit the specified guest_memfd backed
+memory range for guest_use. Even if KVM_CAP_GUEST_MEMFD_MMAP is
+supported, after a successful call to set
+KVM_MEMORY_ATTRIBUTE_PRIVATE, the requested range will not be mappable
+into host userspace and will only be mappable by the guest.
+
+To allow the range to be mappable into host userspace again, call
+KVM_SET_MEMORY_ATTRIBUTES2 on the guest_memfd again with
+KVM_MEMORY_ATTRIBUTE_PRIVATE unset.
+
+KVM does not directly manipulate the memory contents of pages during
+attribute updates. However, the process of setting these attributes,
+which includes operations such as unmapping pages from the host or
+stage-2 page tables, may result in side effects on memory contents
+that vary across different trusted firmware implementations.
+
+If this ioctl returns -EAGAIN, the offset of the page with unexpected
+refcounts will be returned in `error_offset`. This can occur if there
+are transient refcounts on the pages, taken by other parts of the
+kernel.
+
+Userspace is expected to figure out how to remove all known refcounts
+on the shared pages, such as refcounts taken by get_user_pages(), and
+try the ioctl again. A possible source of these long term refcounts is
+if the guest_memfd memory was pinned in IOMMU page tables.
+
+See also: :ref: `KVM_SET_MEMORY_ATTRIBUTES`.
+
.. _kvm_run:
5. The kvm_run structure
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 876c0429f9d4e..129d6f6303251 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -997,6 +997,7 @@ struct kvm_enable_cap {
#define KVM_CAP_S390_KEYOP 247
#define KVM_CAP_S390_VSIE_ESAMODE 248
#define KVM_CAP_S390_HPAGE_2G 249
+#define KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES 250
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1649,6 +1650,7 @@ struct kvm_memory_attributes {
__u64 flags;
};
+/* Available with KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES */
#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd2, struct kvm_memory_attributes2)
struct kvm_memory_attributes2 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a08b518cdb175..044486f128c37 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2434,18 +2434,22 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
}
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifdef kvm_arch_has_private_mem
+static u64 kvm_supports_private_mem(struct kvm *kvm)
+{
+ return !kvm || kvm_arch_has_private_mem(kvm);
+}
+#else
+#define kvm_supports_private_mem(kvm) false
+#endif
+
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
static u64 kvm_supported_vm_mem_attributes(struct kvm *kvm)
{
-#ifdef kvm_arch_has_private_mem
- if (gmem_in_place_conversion)
+ if (gmem_in_place_conversion || !kvm_supports_private_mem(kvm))
return 0;
- if (!kvm || kvm_arch_has_private_mem(kvm))
- return KVM_MEMORY_ATTRIBUTE_PRIVATE;
-#endif
-
- return 0;
+ return KVM_MEMORY_ATTRIBUTE_PRIVATE;
}
/*
@@ -4969,6 +4973,11 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
return 1;
case KVM_CAP_GUEST_MEMFD_FLAGS:
return kvm_gmem_get_supported_flags(kvm);
+ case KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES:
+ if (!gmem_in_place_conversion || !kvm_supports_private_mem(kvm))
+ return 0;
+
+ return KVM_MEMORY_ATTRIBUTE_PRIVATE;
#endif
default:
break;
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 18/46] KVM: guest_memfd: Handle lru_add fbatch refcounts during conversion safety check
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
When checking if a guest_memfd folio is safe for conversion, its refcount
is examined. A folio may be present in a per-CPU lru_add fbatch, which
temporarily increases its refcount. This can lead to a false positive,
incorrectly indicating that the folio is in use and preventing the
conversion, even if it is otherwise safe. The conversion process might not
be on the same CPU that holds the folio in its fbatch, making a simple
per-CPU check insufficient.
To address this, drain all CPUs' lru_add fbatches if an unexpectedly high
refcount is encountered during the safety check. This is performed at most
once per conversion request. Draining only if the folio in question may be
lru cached.
guest_memfd folios are unevictable, so they can only reside in the lru_add
fbatch. If the folio's refcount is still unsafe after draining, then the
conversion is truly deemed unsafe.
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
mm/swap.c | 2 ++
virt/kvm/guest_memfd.c | 18 ++++++++++++++----
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/mm/swap.c b/mm/swap.c
index 5cc44f0de9877..3134d9d3d7c30 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -37,6 +37,7 @@
#include <linux/page_idle.h>
#include <linux/local_lock.h>
#include <linux/buffer_head.h>
+#include <linux/kvm_types.h>
#include "internal.h"
@@ -904,6 +905,7 @@ void lru_add_drain_all(void)
lru_add_drain();
}
#endif /* CONFIG_SMP */
+EXPORT_SYMBOL_FOR_KVM(lru_add_drain_all);
atomic_t lru_disable_count = ATOMIC_INIT(0);
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index cec8fa26ece17..d163559da0235 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -8,6 +8,7 @@
#include <linux/mempolicy.h>
#include <linux/pseudo_fs.h>
#include <linux/pagemap.h>
+#include <linux/swap.h>
#include "kvm_mm.h"
@@ -597,6 +598,7 @@ static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
const int filemap_get_folios_refcount = 1;
pgoff_t last = start + nr_pages - 1;
struct folio_batch fbatch;
+ bool lru_drained = false;
bool safe = true;
pgoff_t next;
int i;
@@ -606,12 +608,20 @@ static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
next = start;
while (safe && filemap_get_folios(mapping, &next, last, &fbatch)) {
- for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+ for (i = 0; i < folio_batch_count(&fbatch);) {
struct folio *folio = fbatch.folios[i];
- if (folio_ref_count(folio) !=
- folio_nr_pages(folio) + filemap_get_folios_refcount) {
- safe = false;
+ safe = (folio_ref_count(folio) ==
+ folio_nr_pages(folio) +
+ filemap_get_folios_refcount);
+
+ if (safe) {
+ ++i;
+ } else if (folio_may_be_lru_cached(folio) &&
+ !lru_drained) {
+ lru_add_drain_all();
+ lru_drained = true;
+ } else {
*err_index = max(start, folio->index);
break;
}
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 15/46] KVM: guest_memfd: Call arch invalidate hooks on conversion
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
When memory in guest_memfd is converted from private to shared, the
platform-specific state associated with the guest-private pages must be
invalidated or cleaned up.
Iterate over the folios in the affected range and call the
kvm_arch_gmem_invalidate() hook for each PFN range. This allows
architectures to perform necessary teardown, such as updating hardware
metadata or encryption states, before the pages are transitioned to the
shared state.
Invoke this helper after indicating to KVM's mmu code that an invalidation
is in progress to stop in-flight page faults from succeeding.
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 41 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 433f79047b9d1..3c94442bc8131 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -607,6 +607,42 @@ static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
return safe;
}
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end)
+{
+ struct folio_batch fbatch;
+ pgoff_t next = start;
+ int i;
+
+ folio_batch_init(&fbatch);
+ while (filemap_get_folios(inode->i_mapping, &next, end - 1, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+ struct folio *folio = fbatch.folios[i];
+ pgoff_t start_index, end_index;
+ kvm_pfn_t start_pfn, end_pfn;
+
+ start_index = max(start, folio->index);
+ end_index = min(end, folio_next_index(folio));
+ /*
+ * end_index is either in folio or points to
+ * the first page of the next folio. Hence,
+ * all pages in range [start_index, end_index)
+ * are contiguous.
+ */
+ start_pfn = folio_file_pfn(folio, start_index);
+ end_pfn = start_pfn + end_index - start_index;
+
+ kvm_arch_gmem_invalidate(start_pfn, end_pfn);
+ }
+
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+}
+#else
+static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end) {}
+#endif
+
static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
size_t nr_pages, uint64_t attrs,
pgoff_t *err_index)
@@ -647,7 +683,12 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
*/
kvm_gmem_invalidate_start(inode, start, end);
+
+ if (!to_private)
+ kvm_gmem_invalidate(inode, start, end);
+
mas_store_prealloc(&mas, xa_mk_value(attrs));
+
kvm_gmem_invalidate_end(inode, start, end);
out:
filemap_invalidate_unlock(mapping);
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 10/46] KVM: guest_memfd: Wire up core private/shared attribute interfaces
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Sean Christopherson <seanjc@google.com>
With in-place conversion, guest_memfd is able to track the private/shared
status of memory. Use a global flag to toggle between tracking
private/shared status per-vm or within guest_memfd.
When queried for supported vm memory attributes, return 0 if attributes are
tracked in guest_memfd.
When querying for memory attributes over a range, look up memory attributes
based on the flag's state at query time.
For per-GFN memory attribute queries, choosing an implementation (VM or
guest_memfd lookup) at KVM load time.
The flag is always false for now and will be made toggle-able after all
in-place conversion features are added in subsequent patches.
If/since the flag is false, if CONFIG_KVM_VM_MEMORY_ATTRIBUTES is also not
selected, the per-GFN memory attribute query defaults to returning
0 (false/not private).
Co-developed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
include/linux/kvm_host.h | 4 ++++
virt/kvm/guest_memfd.c | 22 +++++++++++++++++++---
virt/kvm/kvm_main.c | 12 +++++++++++-
3 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 27687fb9d5201..acb552745b428 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2560,6 +2560,8 @@ static inline bool kvm_mem_range_is_private(struct kvm *kvm, gfn_t start,
#endif /* CONFIG_KVM_VM_MEMORY_ATTRIBUTES */
#ifdef kvm_arch_has_private_mem
+extern bool gmem_in_place_conversion;
+
typedef bool (kvm_mem_is_private_t)(struct kvm *kvm, gfn_t gfn);
DECLARE_STATIC_CALL(__kvm_mem_is_private, kvm_mem_is_private_t);
@@ -2568,6 +2570,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
return static_call(__kvm_mem_is_private)(kvm, gfn);
}
#else
+#define gmem_in_place_conversion false
+
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
{
return false;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index bca912db5be6e..e0e544ef47d69 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -926,6 +926,24 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn);
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
+static bool kvm_gmem_range_is_private(struct file *file, pgoff_t index,
+ size_t nr_pages, struct kvm *kvm, gfn_t gfn)
+{
+ struct maple_tree *mt = &GMEM_I(file_inode(file))->attributes;
+ pgoff_t end = index + nr_pages - 1;
+ void *entry;
+
+ if (!gmem_in_place_conversion)
+ return kvm_range_has_vm_memory_attributes(kvm, gfn, gfn + nr_pages,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE);
+
+ mt_for_each(mt, entry, index, end) {
+ if (xa_to_value(entry) != KVM_MEMORY_ATTRIBUTE_PRIVATE)
+ return false;
+ }
+ return true;
+}
static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot,
struct file *file, gfn_t gfn, struct page *src_page,
@@ -946,9 +964,7 @@ static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot,
folio_unlock(folio);
- if (!kvm_range_has_vm_memory_attributes(kvm, gfn, gfn + 1,
- KVM_MEMORY_ATTRIBUTE_PRIVATE,
- KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
+ if (!kvm_gmem_range_is_private(file, index, 1, kvm, gfn)) {
ret = -EINVAL;
goto out_put_folio;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b238e461b854..01761f6e25d25 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -101,6 +101,10 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_shrink);
static bool __ro_after_init allow_unsafe_mappings;
module_param(allow_unsafe_mappings, bool, 0444);
+#ifdef kvm_arch_has_private_mem
+bool __ro_after_init gmem_in_place_conversion = false;
+#endif
+
/*
* Ordering of locks:
*
@@ -2422,6 +2426,9 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
static u64 kvm_supported_vm_mem_attributes(struct kvm *kvm)
{
#ifdef kvm_arch_has_private_mem
+ if (gmem_in_place_conversion)
+ return 0;
+
if (!kvm || kvm_arch_has_private_mem(kvm))
return KVM_MEMORY_ATTRIBUTE_PRIVATE;
#endif
@@ -2633,8 +2640,11 @@ EXPORT_STATIC_CALL_GPL(__kvm_mem_is_private);
static void kvm_init_memory_attributes(void)
{
+ if (gmem_in_place_conversion)
+ static_call_update(__kvm_mem_is_private, kvm_gmem_is_private);
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
- static_call_update(__kvm_mem_is_private, kvm_vm_mem_is_private);
+ else
+ static_call_update(__kvm_mem_is_private, kvm_vm_mem_is_private);
#endif
}
#else
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
* [PATCH v8 12/46] KVM: guest_memfd: Only prepare folios for private pages
From: Ackerley Tng via B4 Relay @ 2026-06-19 0:31 UTC (permalink / raw)
To: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, Baoquan He
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco, Ackerley Tng
In-Reply-To: <20260618-gmem-inplace-conversion-v8-0-9d2959357853@google.com>
From: Ackerley Tng <ackerleytng@google.com>
All-shared guest_memfd used to be only supported for non-CoCo VMs where
preparation doesn't apply. INIT_SHARED is about to be supported for CoCo
VMs in a later patch in this series.
In addition, KVM_SET_MEMORY_ATTRIBUTES2 is about to be supported in
guest_memfd in a later patch in this series.
This means that the kvm fault handler may now call kvm_gmem_get_pfn() on a
shared folio for a CoCo VM where preparation applies.
Add a check to make sure that preparation is only performed for private
folios.
Preparation will be undone on freeing (see kvm_gmem_free_folio()) and on
conversion to shared.
Suggested-by: Michael Roth <michael.roth@amd.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index e0e544ef47d69..65ce795c090d9 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -890,6 +890,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
int *max_order)
{
pgoff_t index = kvm_gmem_get_index(slot, gfn);
+ struct inode *inode;
struct folio *folio;
int r = 0;
@@ -897,7 +898,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
if (!file)
return -EFAULT;
- filemap_invalidate_lock_shared(file_inode(file)->i_mapping);
+ inode = file_inode(file);
+ filemap_invalidate_lock_shared(inode->i_mapping);
folio = __kvm_gmem_get_pfn(file, slot, index, pfn, max_order);
if (IS_ERR(folio)) {
@@ -910,7 +912,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
folio_mark_uptodate(folio);
}
- r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
+ if (kvm_gmem_is_private_mem(inode, index))
+ r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
folio_unlock(folio);
@@ -920,7 +923,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
folio_put(folio);
out:
- filemap_invalidate_unlock_shared(file_inode(file)->i_mapping);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
return r;
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn);
--
2.55.0.rc0.738.g0c8ab3ebcc-goog
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox