* [v3 PATCH 1/2] KVM: guest_memfd: add generic population via write
2025-03-03 13:08 [v3 PATCH 0/2] KVM: guest_memfd: use write for population Nikita Kalyazin
@ 2025-03-03 13:08 ` Nikita Kalyazin
2025-03-03 13:08 ` [v3 PATCH 2/2] KVM: selftests: update guest_memfd write tests Nikita Kalyazin
1 sibling, 0 replies; 3+ messages in thread
From: Nikita Kalyazin @ 2025-03-03 13:08 UTC (permalink / raw)
To: pbonzini, shuah
Cc: kvm, linux-kselftest, linux-kernel, michael.day, david,
quic_eberman, jthoughton, brijesh.singh, michael.roth, graf,
jgowans, roypat, derekmn, nsaenz, xmarcalx, kalyazin
write syscall populates guest_memfd with user-supplied data in a generic
way, ie no vendor-specific preparation is performed. This is supposed
to be used in non-CoCo setups where guest memory is not
hardware-encrypted.
The following behaviour is implemented:
- only page-aligned count and offset are allowed
- if the memory is already allocated, the call will successfully
populate it
- if the memory is not allocated, the call will both allocate and
populate
- if the memory is already populated, the call will not repopulate it
Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
virt/kvm/guest_memfd.c | 94 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 91 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 30b47ff0e6d2..f93fe5835173 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -417,12 +417,97 @@ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-#else
-#define kvm_gmem_mmap NULL
+
+static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ pgoff_t start, end, index;
+ ssize_t ret = 0;
+
+ if (!PAGE_ALIGNED(*offset) || !PAGE_ALIGNED(count))
+ return -EINVAL;
+
+ if (*offset + count > i_size_read(file_inode(file)))
+ return -EINVAL;
+
+ if (!buf)
+ return -EINVAL;
+
+ start = *offset >> PAGE_SHIFT;
+ end = (*offset + count) >> PAGE_SHIFT;
+
+ filemap_invalidate_lock_shared(file->f_mapping);
+
+ for (index = start; index < end; ) {
+ struct folio *folio;
+ void *vaddr;
+ pgoff_t buf_offset = (index - start) << PAGE_SHIFT;
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ folio = kvm_gmem_get_folio(file_inode(file), index);
+ if (IS_ERR(folio)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (folio_test_hwpoison(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* No support for huge pages. */
+ if (WARN_ON_ONCE(folio_test_large(folio))) {
+ folio_unlock(folio);
+ folio_put(folio);
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ folio_unlock(folio);
+
+ vaddr = kmap_local_folio(folio, 0);
+ ret = copy_from_user(vaddr, buf + buf_offset, PAGE_SIZE);
+ kunmap_local(vaddr);
+ if (ret) {
+ ret = -EINVAL;
+ folio_put(folio);
+ goto out;
+ }
+
+ kvm_gmem_mark_prepared(folio);
+ folio_put(folio);
+
+ index = folio_next_index(folio);
+ *offset += PAGE_SIZE;
+ }
+
+out:
+ filemap_invalidate_unlock_shared(file->f_mapping);
+
+ return ret && start == (*offset >> PAGE_SHIFT) ?
+ ret : *offset - (start << PAGE_SHIFT);
+}
#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
static struct file_operations kvm_gmem_fops = {
- .mmap = kvm_gmem_mmap,
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+ .mmap = kvm_gmem_mmap,
+ .llseek = default_llseek,
+ .write = kvm_kmem_gmem_write,
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
.open = generic_file_open,
.release = kvm_gmem_release,
.fallocate = kvm_gmem_fallocate,
@@ -538,6 +623,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
}
file->f_flags |= O_LARGEFILE;
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+ file->f_mode |= FMODE_LSEEK | FMODE_PWRITE;
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
inode = file->f_inode;
WARN_ON(file->f_mapping != inode->i_mapping);
--
2.47.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [v3 PATCH 2/2] KVM: selftests: update guest_memfd write tests
2025-03-03 13:08 [v3 PATCH 0/2] KVM: guest_memfd: use write for population Nikita Kalyazin
2025-03-03 13:08 ` [v3 PATCH 1/2] KVM: guest_memfd: add generic population via write Nikita Kalyazin
@ 2025-03-03 13:08 ` Nikita Kalyazin
1 sibling, 0 replies; 3+ messages in thread
From: Nikita Kalyazin @ 2025-03-03 13:08 UTC (permalink / raw)
To: pbonzini, shuah
Cc: kvm, linux-kselftest, linux-kernel, michael.day, david,
quic_eberman, jthoughton, brijesh.singh, michael.roth, graf,
jgowans, roypat, derekmn, nsaenz, xmarcalx, kalyazin
This is to reflect that the write syscall is now implemented for
guest_memfd.
Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
.../testing/selftests/kvm/guest_memfd_test.c | 85 +++++++++++++++++--
1 file changed, 79 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 38c501e49e0e..b07221aa54c9 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -20,18 +20,90 @@
#include "kvm_util.h"
#include "test_util.h"
-static void test_file_read_write(int fd)
+static void test_file_read(int fd)
{
char buf[64];
TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
"read on a guest_mem fd should fail");
- TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
- "write on a guest_mem fd should fail");
TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
"pread on a guest_mem fd should fail");
- TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
- "pwrite on a guest_mem fd should fail");
+}
+
+static void test_file_write(int fd, size_t total_size)
+{
+ size_t page_size = getpagesize();
+ void *buf = NULL;
+ int ret;
+
+ ret = posix_memalign(&buf, page_size, total_size);
+ TEST_ASSERT_EQ(ret, 0);
+
+ /* Check arguments correctness checks work as expected */
+
+ ret = pwrite(fd, buf, page_size - 1, 0);
+ TEST_ASSERT(ret == -1, "write unaligned count on a guest_mem fd should fail");
+ TEST_ASSERT_EQ(errno, EINVAL);
+
+ ret = pwrite(fd, buf, page_size, 1);
+ TEST_ASSERT(ret == -1, "write unaligned offset on a guest_mem fd should fail");
+ TEST_ASSERT_EQ(errno, EINVAL);
+
+ ret = pwrite(fd, buf, page_size, total_size);
+ TEST_ASSERT(ret == -1, "writing past the file size on a guest_mem fd should fail");
+ TEST_ASSERT_EQ(errno, EINVAL);
+
+ ret = pwrite(fd, NULL, page_size, 0);
+ TEST_ASSERT(ret == -1, "supplying a NULL buffer when writing a guest_mem fd should fail");
+ TEST_ASSERT_EQ(errno, EINVAL);
+
+ /* Check double population is not allowed */
+
+ ret = pwrite(fd, buf, page_size, 0);
+ TEST_ASSERT(ret == page_size, "page-aligned write on a guest_mem fd should succeed");
+
+ ret = pwrite(fd, buf, page_size, 0);
+ TEST_ASSERT(ret == -1, "write on already populated guest_mem fd should fail");
+ TEST_ASSERT_EQ(errno, ENOSPC);
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+ /* Check population is allowed again after punching a hole */
+
+ ret = pwrite(fd, buf, page_size, 0);
+ TEST_ASSERT(ret == page_size, "page-aligned write on a punched guest_mem fd should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+ /* Check population of already allocated memory is allowed */
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, page_size);
+ TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
+
+ ret = pwrite(fd, buf, page_size, 0);
+ TEST_ASSERT(ret == page_size, "write on a preallocated guest_mem fd should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+ /* Check population works until an already populated page is encountered */
+
+ ret = pwrite(fd, buf, total_size, 0);
+ TEST_ASSERT(ret == total_size, "page-aligned write on a guest_mem fd should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+ ret = pwrite(fd, buf, total_size, 0);
+ TEST_ASSERT(ret == page_size, "write on a guest_mem fd should not overwrite data");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, total_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+
+ free(buf);
}
static void test_mmap_allowed(int fd, size_t total_size)
@@ -233,7 +305,8 @@ void test_vm_type(unsigned long type, bool is_shared)
fd = vm_create_guest_memfd(vm, total_size, 0);
- test_file_read_write(fd);
+ test_file_read(fd);
+ test_file_write(fd, total_size);
if (is_shared)
test_mmap_allowed(fd, total_size);
--
2.47.1
^ permalink raw reply related [flat|nested] 3+ messages in thread