public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Fuad Tabba <tabba@google.com>
To: kvm@vger.kernel.org
Cc: julien.thierry.kdev@gmail.com, andre.przywara@arm.com,
	alexandru.elisei@arm.com, will@kernel.org, tabba@google.com
Subject: [PATCH kvmtool v1 08/17] Use memfd for all guest ram allocations
Date: Tue, 15 Nov 2022 11:15:40 +0000	[thread overview]
Message-ID: <20221115111549.2784927-9-tabba@google.com> (raw)
In-Reply-To: <20221115111549.2784927-1-tabba@google.com>

Allocate all guest ram backed by memfd/ftruncate instead of
anonymous mmap. This will make it easier to use kvm with fd-based
kvm guest memory proposals [*]. It also would make it easier to
use ipc memory sharing should that be needed in the future.

Signed-off-by: Fuad Tabba <tabba@google.com>

[*] https://lore.kernel.org/all/20221025151344.3784230-1-chao.p.peng@linux.intel.com/
---
 include/kvm/kvm.h  |  1 +
 include/kvm/util.h |  3 +++
 kvm.c              |  4 ++++
 util/util.c        | 33 ++++++++++++++++++++-------------
 4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
index 3872dc6..d0d519b 100644
--- a/include/kvm/kvm.h
+++ b/include/kvm/kvm.h
@@ -87,6 +87,7 @@ struct kvm {
 	struct kvm_config	cfg;
 	int			sys_fd;		/* For system ioctls(), i.e. /dev/kvm */
 	int			vm_fd;		/* For VM ioctls() */
+	int			ram_fd;		/* For guest memory. */
 	timer_t			timerid;	/* Posix timer for interrupts */
 
 	int			nrcpus;		/* Number of cpus to run */
diff --git a/include/kvm/util.h b/include/kvm/util.h
index 61a205b..369603b 100644
--- a/include/kvm/util.h
+++ b/include/kvm/util.h
@@ -140,6 +140,9 @@ static inline int pow2_size(unsigned long x)
 }
 
 struct kvm;
+int memfd_alloc(u64 size, bool hugetlb, u64 blk_size);
+void *mmap_anon_or_hugetlbfs_align(struct kvm *kvm, const char *htlbfs_path,
+				   u64 size, u64 align);
 void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
 
 #endif /* KVM__UTIL_H */
diff --git a/kvm.c b/kvm.c
index 78bc0d8..ed29d68 100644
--- a/kvm.c
+++ b/kvm.c
@@ -160,6 +160,7 @@ struct kvm *kvm__new(void)
 	mutex_init(&kvm->mem_banks_lock);
 	kvm->sys_fd = -1;
 	kvm->vm_fd = -1;
+	kvm->ram_fd = -1;
 
 #ifdef KVM_BRLOCK_DEBUG
 	kvm->brlock_sem = (pthread_rwlock_t) PTHREAD_RWLOCK_INITIALIZER;
@@ -174,6 +175,9 @@ int kvm__exit(struct kvm *kvm)
 
 	kvm__arch_delete_ram(kvm);
 
+	if (kvm->ram_fd >= 0)
+		close(kvm->ram_fd);
+
 	list_for_each_entry_safe(bank, tmp, &kvm->mem_banks, list) {
 		list_del(&bank->list);
 		free(bank);
diff --git a/util/util.c b/util/util.c
index d3483d8..278bcc2 100644
--- a/util/util.c
+++ b/util/util.c
@@ -102,36 +102,38 @@ static u64 get_hugepage_blk_size(const char *htlbfs_path)
 	return sfs.f_bsize;
 }
 
-static void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size, u64 blk_size)
+int memfd_alloc(u64 size, bool hugetlb, u64 blk_size)
 {
 	const char *name = "kvmtool";
 	unsigned int flags = 0;
 	int fd;
-	void *addr;
-	int htsize = __builtin_ctzl(blk_size);
 
-	if ((1ULL << htsize) != blk_size)
-		die("Hugepage size must be a power of 2.\n");
+	if (hugetlb) {
+		int htsize = __builtin_ctzl(blk_size);
 
-	flags |= MFD_HUGETLB;
-	flags |= htsize << MFD_HUGE_SHIFT;
+		if ((1ULL << htsize) != blk_size)
+			die("Hugepage size must be a power of 2.\n");
+
+		flags |= MFD_HUGETLB;
+		flags |= htsize << MFD_HUGE_SHIFT;
+	}
 
 	fd = memfd_create(name, flags);
 	if (fd < 0)
-		die("Can't memfd_create for hugetlbfs map\n");
+		die("Can't memfd_create for memory map\n");
+
 	if (ftruncate(fd, size) < 0)
 		die("Can't ftruncate for mem mapping size %lld\n",
 			(unsigned long long)size);
-	addr = mmap(NULL, size, PROT_RW, MAP_PRIVATE, fd, 0);
-	close(fd);
 
-	return addr;
+	return fd;
 }
 
 /* This function wraps the decision between hugetlbfs map (if requested) or normal mmap */
 void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size)
 {
 	u64 blk_size = 0;
+	int fd;
 
 	/*
 	 * We don't /need/ to map guest RAM from hugetlbfs, but we do so
@@ -146,9 +148,14 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size)
 		}
 
 		kvm->ram_pagesize = blk_size;
-		return mmap_hugetlbfs(kvm, htlbfs_path, size, blk_size);
 	} else {
 		kvm->ram_pagesize = getpagesize();
-		return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
 	}
+
+	fd = memfd_alloc(size, htlbfs_path, blk_size);
+	if (fd < 0)
+		return MAP_FAILED;
+
+	kvm->ram_fd = fd;
+	return mmap(NULL, size, PROT_RW, MAP_PRIVATE, kvm->ram_fd, 0);
 }
-- 
2.38.1.431.g37b22c650d-goog


  parent reply	other threads:[~2022-11-15 11:16 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-15 11:15 [PATCH kvmtool v1 00/17] Use memfd for guest vm memory allocation Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 01/17] Initialize the return value in kvm__for_each_mem_bank() Fuad Tabba
2022-11-15 11:59   ` Andre Przywara
2022-11-23 16:08   ` Alexandru Elisei
2022-11-23 17:43     ` Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 02/17] Make mmap_hugetlbfs() static Fuad Tabba
2022-11-15 17:58   ` Andre Przywara
2022-11-15 11:15 ` [PATCH kvmtool v1 03/17] Rename parameter in mmap_anon_or_hugetlbfs() Fuad Tabba
2022-11-23 16:40   ` Alexandru Elisei
2022-11-23 17:44     ` Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 04/17] Add hostmem va to debug print Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 05/17] Factor out getting the hugetlb block size Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 06/17] Use memfd for hugetlbfs when allocating guest ram Fuad Tabba
2022-11-24 10:19   ` Alexandru Elisei
2022-11-24 10:45     ` Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 07/17] Make blk_size a parameter and pass it to mmap_hugetlbfs() Fuad Tabba
2022-11-15 11:15 ` Fuad Tabba [this message]
2022-11-24 11:01   ` [PATCH kvmtool v1 08/17] Use memfd for all guest ram allocations Alexandru Elisei
2022-11-24 15:19     ` Fuad Tabba
2022-11-24 17:14       ` Alexandru Elisei
2022-11-25 10:43         ` Alexandru Elisei
2022-11-25 10:58           ` Fuad Tabba
2022-11-25 10:44         ` Fuad Tabba
2022-11-25 11:31           ` Alexandru Elisei
2022-11-28  8:49             ` Fuad Tabba
2022-11-29 18:09               ` Alexandru Elisei
2022-11-30 17:54                 ` Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 09/17] Allocate pvtime memory with memfd Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 10/17] Allocate vesa " Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 11/17] Add a function that allocates aligned memory if specified Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 12/17] Use new function to align memory Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 13/17] Remove struct fields and code used for alignment Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 14/17] Replace kvm_arch_delete_ram with kvm_delete_ram Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 15/17] Remove no-longer unused macro Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 16/17] Factor out set_user_memory_region code Fuad Tabba
2022-11-15 11:15 ` [PATCH kvmtool v1 17/17] Pass the memory file descriptor and offset when registering ram Fuad Tabba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221115111549.2784927-9-tabba@google.com \
    --to=tabba@google.com \
    --cc=alexandru.elisei@arm.com \
    --cc=andre.przywara@arm.com \
    --cc=julien.thierry.kdev@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox