[PATCH RFC 05/12] system/memory: Re-use memory-backend-guest-memfd inode for private memory

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Michael Roth <michael.roth@amd.com>
To: <qemu-devel@nongnu.org>
Cc: <kvm@vger.kernel.org>, <pbonzini@redhat.com>,
	<berrange@redhat.com>, <armbru@redhat.com>,
	<pankaj.gupta@amd.com>, <isaku.yamahata@intel.com>,
	<xiaoyao.li@intel.com>, <chao.p.peng@linux.intel.com>,
	<david@kernel.org>, <ashish.kalra@amd.com>,
	<ackerleytng@google.com>
Subject: [PATCH RFC 05/12] system/memory: Re-use memory-backend-guest-memfd inode for private memory
Date: Wed, 27 May 2026 19:03:30 -0500	[thread overview]
Message-ID: <20260528000416.8161-6-michael.roth@amd.com> (raw)
In-Reply-To: <20260528000416.8161-1-michael.roth@amd.com>

When convert-in-place=true, the shared memory allocated/provided by the
guest-memfd memory backend should also be used internally for private
memory. Do this by dup()'ing the guest_memfd FD so separate cleanup
paths for shared vs. private FDs can be managed in the same way they are
currently for convert-in-place=false (where shared memory comes from
some other backend like memory-backend-memfd).

Since it only currently makes sense to allow a
memory-backend-guest-memfd FD to be used for private memory, introduce a
new RAM_GUEST_MEMFD_SHARED flag that can be used to limit dup()'ing to
specific backend types like memory-backend-guest-memfd.

Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 backends/hostmem-guest-memfd.c |  1 +
 include/system/memory.h        |  3 +++
 system/physmem.c               | 46 +++++++++++++++++++++++++++++++---
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/backends/hostmem-guest-memfd.c b/backends/hostmem-guest-memfd.c
index deb796a6bd..8ab8242892 100644
--- a/backends/hostmem-guest-memfd.c
+++ b/backends/hostmem-guest-memfd.c
@@ -56,6 +56,7 @@ have_fd:
     ram_flags = backend->share ? RAM_SHARED : RAM_PRIVATE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
+    ram_flags |= RAM_GUEST_MEMFD_SHARED;
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
                                           backend->size, ram_flags, fd, 0, errp);
 }
diff --git a/include/system/memory.h b/include/system/memory.h
index 24c68720aa..0a371b686a 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -282,6 +282,9 @@ typedef struct IOMMUTLBEvent {
  */
 #define RAM_PRIVATE (1 << 13)
 
+/* RAM can be shared that has kvm guest memfd backend */
+#define RAM_GUEST_MEMFD_SHARED   (1 << 14)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
diff --git a/system/physmem.c b/system/physmem.c
index 04c7c38721..ebec7ae7a4 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -59,6 +59,7 @@
 #include "system/hostmem.h"
 #include "system/hw_accel.h"
 #include "system/xen-mapcache.h"
+#include "system/confidential-guest-support.h"
 #include "trace.h"
 
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
@@ -2187,11 +2188,14 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     if (new_block->flags & RAM_GUEST_MEMFD) {
         int ret;
 
+        assert(current_machine->cgs);
+
         if (!kvm_enabled()) {
             error_setg(errp, "cannot set up private guest memory for %s: KVM required",
                        object_get_typename(OBJECT(current_machine->cgs)));
             goto out_free;
         }
+
         assert(new_block->guest_memfd < 0);
 
         ret = ram_block_coordinated_discard_require(true);
@@ -2202,8 +2206,38 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
             goto out_free;
         }
 
-        new_block->guest_memfd = kvm_create_guest_memfd_private(new_block->max_length,
-                                                                errp);
+        /*
+         * If both shared/private memory are handled by guest_memfd, make sure to
+         * re-use the guest_memfd inode that should have already been created for
+         * handling shared memory.
+         */
+        if (current_machine->cgs->convert_in_place) {
+            if (!(new_block->flags & RAM_GUEST_MEMFD_SHARED)) {
+                error_setg(errp, "configured memory backend is not compatible with in-place conversion");
+                qemu_mutex_unlock_ramlist();
+                goto out_free;
+            }
+            assert(new_block->fd >= 0);
+
+            /*
+             * Current logic calculates guest_memfd_offset on the assumption that
+             * offset 0 corresponds to the first GPA that is backed by the RAM
+             * block/backend. For cases where the guest_memfd is only used for
+             * private memory and created internally as-needed this is always the
+             * case, but when re-using a guest_memfd that's also usable for shared
+             * memory (e.g. via memory-backend-guest-memfd) it's possible that
+             * guest_memfd might be mmap()'d starting at some non-zero offset. For
+             * now, this isn't a reachable condition, but assert this in case this
+             * ever changes and the logic needs to be updated to account for this.
+             */
+            assert(new_block->fd_offset == 0);
+
+            new_block->guest_memfd = qemu_dup(new_block->fd);
+        } else {
+            new_block->guest_memfd =
+                kvm_create_guest_memfd_private(new_block->max_length, errp);
+        }
+
         if (new_block->guest_memfd < 0) {
             qemu_mutex_unlock_ramlist();
             goto out_free;
@@ -2315,7 +2349,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
                           RAM_READONLY_FD | RAM_GUEST_MEMFD |
-                          RAM_RESIZEABLE)) == 0);
+                          RAM_RESIZEABLE | RAM_GUEST_MEMFD_SHARED)) == 0);
     assert(max_size >= size);
 
     if (xen_enabled()) {
@@ -2828,6 +2862,12 @@ int ram_block_rebind(Error **errp)
 {
     RAMBlock *block;
 
+    if (current_machine->cgs && current_machine->cgs->convert_in_place) {
+        error_setg(errp,
+                   "reset support is not yet enabled for in-place conversion");
+        return -1;
+    }
+
     qemu_mutex_lock_ramlist();
 
     RAMBLOCK_FOREACH(block) {
-- 
2.43.0

next prev parent reply	other threads:[~2026-05-28  0:09 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-28  0:03 [PATCH RFC 00/12] guest_memfd: support in-place memory conversion Michael Roth
2026-05-28  0:03 ` [PATCH RFC 01/12] accel/kvm: Decouple guest_memfd checks from memory attribute checks Michael Roth
2026-05-28  0:03 ` [PATCH RFC 02/12] hostmem: Introduce dedicated memory backend for guest_memfd Michael Roth
2026-06-02  8:22   ` Markus Armbruster
2026-06-03  6:19     ` Michael Roth
2026-06-08  8:20       ` Markus Armbruster
2026-06-08 20:42         ` Michael Roth
2026-05-28  0:03 ` [PATCH RFC 03/12] linux-headers: Update headers for v7 of in-place conversion kernel support Michael Roth
2026-05-28  0:03 ` [PATCH RFC 04/12] accel/kvm: Add CGS option to control in-place conversion support Michael Roth
2026-06-02  8:23   ` Markus Armbruster
2026-06-03  6:39     ` Michael Roth
2026-06-08  8:15       ` Markus Armbruster
2026-06-08 20:21         ` Michael Roth
2026-05-28  0:03 ` Michael Roth [this message]
2026-05-28  0:03 ` [PATCH RFC 06/12] system/memory: Default to guest_memfd for RAM for in-place conversion Michael Roth
2026-05-28  0:03 ` [PATCH RFC 07/12] accel/kvm: Move post-conversion updates to a separate helper Michael Roth
2026-05-28  0:03 ` [PATCH RFC 08/12] accel/kvm: Re-order attribute notifications for in-place conversion Michael Roth
2026-05-28  0:03 ` [PATCH RFC 09/12] accel/kvm: Support shared/private conversions via guest_memfd ioctls Michael Roth
2026-06-04 13:19   ` Gupta, Pankaj
2026-06-04 23:36     ` Michael Roth
2026-06-04 23:36       ` Michael Roth via qemu development
2026-05-28  0:03 ` [PATCH RFC 10/12] accel/kvm: Don't default to private attributes for in-place conversion Michael Roth
2026-05-28  0:03 ` [PATCH RFC 11/12] i386/sev: Update SNP_LAUNCH_UPDATE " Michael Roth
2026-05-28  0:03 ` [PATCH RFC 12/12] i386/sev: Allow in-place conversion for SEV-SNP guests Michael Roth
2026-05-28  5:44 ` [PATCH RFC 00/12] guest_memfd: support in-place memory conversion Xiaoyao Li
2026-06-02 22:20   ` Michael Roth

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:deb796a6b dfblob:8ab824289 dfblob:24c68720a dfblob:0a371b686
dfblob:04c7c3872 dfblob:ebec7ae7a )
 OR (
bs:"[PATCH RFC 05/12] system/memory: Re-use memory-backend-guest-memfd inode for private memory" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260528000416.8161-6-michael.roth@amd.com \
    --to=michael.roth@amd.com \
    --cc=ackerleytng@google.com \
    --cc=armbru@redhat.com \
    --cc=ashish.kalra@amd.com \
    --cc=berrange@redhat.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=david@kernel.org \
    --cc=isaku.yamahata@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=pankaj.gupta@amd.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=xiaoyao.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.