qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Xiaoyao Li <xiaoyao.li@intel.com>
To: "Paolo Bonzini" <pbonzini@redhat.com>,
	"David Hildenbrand" <david@redhat.com>,
	"Igor Mammedov" <imammedo@redhat.com>,
	"Michael S . Tsirkin" <mst@redhat.com>,
	"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
	"Richard Henderson" <richard.henderson@linaro.org>,
	"Peter Xu" <peterx@redhat.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Cornelia Huck" <cohuck@redhat.com>,
	"Daniel P . Berrangé" <berrange@redhat.com>,
	"Eric Blake" <eblake@redhat.com>,
	"Markus Armbruster" <armbru@redhat.com>,
	"Marcelo Tosatti" <mtosatti@redhat.com>
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, xiaoyao.li@intel.com,
	Michael Roth <michael.roth@amd.com>,
	Sean Christopherson <seanjc@google.com>,
	Claudio Fontana <cfontana@suse.de>,
	Gerd Hoffmann <kraxel@redhat.com>,
	Isaku Yamahata <isaku.yamahata@gmail.com>,
	Chenyi Qiang <chenyi.qiang@intel.com>
Subject: [PATCH v4 05/66] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot
Date: Wed, 24 Jan 2024 22:22:27 -0500	[thread overview]
Message-ID: <20240125032328.2522472-6-xiaoyao.li@intel.com> (raw)
In-Reply-To: <20240125032328.2522472-1-xiaoyao.li@intel.com>

From: Chao Peng <chao.p.peng@linux.intel.com>

Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM.

With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that
backend'ed both by hva-based shared memory and guest memfd based private
memory.

Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
Changes in v4:
- update KVM_MEM_PRIVATE to KVM_MEM_GUEST_MEMFD; (Isaku)
---
 accel/kvm/kvm-all.c      | 56 ++++++++++++++++++++++++++++++++++------
 accel/kvm/trace-events   |  2 +-
 include/sysemu/kvm_int.h |  2 ++
 3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 56b41a4ea8dc..50c10becf772 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -283,35 +283,69 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
 static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
 {
     KVMState *s = kvm_state;
-    struct kvm_userspace_memory_region mem;
+    struct kvm_userspace_memory_region2 mem;
+    static int cap_user_memory2 = -1;
     int ret;
 
+    if (cap_user_memory2 == -1) {
+        cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2);
+    }
+
+    if (!cap_user_memory2 && slot->guest_memfd >= 0) {
+        error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2,"
+                     " which is required by guest memfd!", __func__);
+        exit(1);
+    }
+
     mem.slot = slot->slot | (kml->as_id << 16);
     mem.guest_phys_addr = slot->start_addr;
     mem.userspace_addr = (unsigned long)slot->ram;
     mem.flags = slot->flags;
+    mem.guest_memfd = slot->guest_memfd;
+    mem.guest_memfd_offset = slot->guest_memfd_offset;
 
     if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
         /* Set the slot size to 0 before setting the slot to the desired
          * value. This is needed based on KVM commit 75d61fbc. */
         mem.memory_size = 0;
-        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+
+        if (cap_user_memory2) {
+            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+        } else {
+            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+        }
         if (ret < 0) {
             goto err;
         }
     }
     mem.memory_size = slot->memory_size;
-    ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    if (cap_user_memory2) {
+        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+    } else {
+        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    }
     slot->old_flags = mem.flags;
 err:
     trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
                               mem.guest_phys_addr, mem.memory_size,
-                              mem.userspace_addr, ret);
+                              mem.userspace_addr, mem.guest_memfd,
+                              mem.guest_memfd_offset, ret);
     if (ret < 0) {
-        error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
-                     " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
-                     __func__, mem.slot, slot->start_addr,
-                     (uint64_t)mem.memory_size, strerror(errno));
+        if (cap_user_memory2) {
+                error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
+                        " start=0x%" PRIx64 ", size=0x%" PRIx64 ","
+                        " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
+                        " guest_memfd_offset=0x%" PRIx64 ": %s",
+                        __func__, mem.slot, slot->start_addr,
+                        (uint64_t)mem.memory_size, mem.flags,
+                        mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
+                        strerror(errno));
+        } else {
+                error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
+                            " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
+                            __func__, mem.slot, slot->start_addr,
+                            (uint64_t)mem.memory_size, strerror(errno));
+        }
     }
     return ret;
 }
@@ -466,6 +500,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
     if (readonly && kvm_readonly_mem_allowed) {
         flags |= KVM_MEM_READONLY;
     }
+    if (memory_region_has_guest_memfd(mr)) {
+        flags |= KVM_MEM_GUEST_MEMFD;
+    }
     return flags;
 }
 
@@ -1353,6 +1390,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
         mem->ram_start_offset = ram_start_offset;
         mem->ram = ram;
         mem->flags = kvm_mem_flags(mr);
+        mem->guest_memfd = mr->ram_block->guest_memfd;
+        mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
+
         kvm_slot_init_dirty_bitmap(mem);
         err = kvm_set_user_memory_region(kml, mem, true);
         if (err) {
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index 9f599abc172c..e8c52cb9e7a1 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
 kvm_irqchip_release_virq(int virq) "virq %d"
 kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
 kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
-kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
+kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d"
 kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
 kvm_resample_fd_notify(int gsi) "gsi %d"
 kvm_dirty_ring_full(int id) "vcpu %d"
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 882e37e12c5b..cad763e240c1 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -30,6 +30,8 @@ typedef struct KVMSlot
     int as_id;
     /* Cache of the offset in ram address space */
     ram_addr_t ram_start_offset;
+    int guest_memfd;
+    hwaddr guest_memfd_offset;
 } KVMSlot;
 
 typedef struct KVMMemoryUpdate {
-- 
2.34.1



  parent reply	other threads:[~2024-01-25  3:32 UTC|newest]

Thread overview: 94+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-25  3:22 [PATCH v4 00/66] QEMU Guest memfd + QEMU TDX support Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 01/66] linux-headers: Update to Linux v6.8-rc1 Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 02/66] RAMBlock: Add support of KVM private guest memfd Xiaoyao Li
2024-01-26 13:57   ` David Hildenbrand
2024-01-29  2:23     ` Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 03/66] HostMem: Add mechanism to opt in kvm guest memfd via MachineState Xiaoyao Li
2024-01-26 13:58   ` David Hildenbrand
2024-01-25  3:22 ` [PATCH v4 04/66] trace/kvm: Split address space and slot id in trace_kvm_set_user_memory() Xiaoyao Li
2024-01-25  3:22 ` Xiaoyao Li [this message]
2024-01-25  3:22 ` [PATCH v4 06/66] kvm: Introduce support for memory_attributes Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 07/66] physmem: Introduce ram_block_discard_guest_memfd_range() Xiaoyao Li
2024-01-25 18:45   ` David Hildenbrand
2024-01-25  3:22 ` [PATCH v4 08/66] kvm: handle KVM_EXIT_MEMORY_FAULT Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 09/66] trace/kvm: Add trace for page convertion between shared and private Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 10/66] *** HACK *** linux-headers: Update headers to pull in TDX API changes Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 11/66] i386: Introduce tdx-guest object Xiaoyao Li
2024-02-19 12:34   ` Markus Armbruster
2024-02-19 12:44     ` Daniel P. Berrangé
2024-01-25  3:22 ` [PATCH v4 12/66] target/i386: Implement mc->kvm_type() to get VM type Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 13/66] target/i386: Introduce kvm_confidential_guest_init() Xiaoyao Li
2024-01-29 18:02   ` Daniel P. Berrangé
2024-01-25  3:22 ` [PATCH v4 14/66] i386/tdx: Implement tdx_kvm_init() to initialize TDX VM context Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 15/66] i386/tdx: Get tdx_capabilities via KVM_TDX_CAPABILITIES Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 16/66] i386/tdx: Introduce is_tdx_vm() helper and cache tdx_guest object Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 17/66] i386/tdx: Adjust the supported CPUID based on TDX restrictions Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 18/66] i386/tdx: Make Intel-PT unsupported for TD guest Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 19/66] i386/tdx: Update tdx_cpuid_lookup[].tdx_fixed0/1 by tdx_caps.cpuid_config[] Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 20/66] i386/tdx: Integrate tdx_caps->xfam_fixed0/1 into tdx_cpuid_lookup Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 21/66] i386/tdx: Integrate tdx_caps->attrs_fixed0/1 to tdx_cpuid_lookup Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 22/66] i386/kvm: Move architectural CPUID leaf generation to separate helper Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 23/66] kvm: Introduce kvm_arch_pre_create_vcpu() Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 24/66] i386/tdx: Initialize TDX before creating TD vcpus Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 25/66] i386/tdx: Add property sept-ve-disable for tdx-guest object Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 26/66] i386/tdx: Make sept_ve_disable set by default Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 27/66] i386/tdx: Wire CPU features up with attributes of TD guest Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 28/66] i386/tdx: Validate TD attributes Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 29/66] i386/tdx: Support user configurable mrconfigid/mrowner/mrownerconfig Xiaoyao Li
2024-02-19 12:48   ` Markus Armbruster
2024-02-20 15:10     ` Xiaoyao Li
2024-02-20 16:14       ` Markus Armbruster
2024-01-25  3:22 ` [PATCH v4 30/66] i386/tdx: Implement user specified tsc frequency Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 31/66] i386/tdx: Set kvm_readonly_mem_enabled to false for TDX VM Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 32/66] kvm/memory: Introduce the infrastructure to set the default shared/private value Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 33/66] i386/tdx: Make memory type private by default Xiaoyao Li
2024-01-26 14:58   ` David Hildenbrand
2024-01-29  2:18     ` Xiaoyao Li
2024-02-20 15:08       ` David Hildenbrand
2024-01-25  3:22 ` [PATCH v4 34/66] kvm/tdx: Don't complain when converting vMMIO region to shared Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 35/66] kvm/tdx: Ignore memory conversion to shared of unassigned region Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 36/66] i386/tdvf: Introduce function to parse TDVF metadata Xiaoyao Li
2024-01-25  3:22 ` [PATCH v4 37/66] i386/tdx: Parse TDVF metadata for TDX VM Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 38/66] i386/tdx: Skip BIOS shadowing setup Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 39/66] i386/tdx: Don't initialize pc.rom for TDX VMs Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 40/66] i386/tdx: Track mem_ptr for each firmware entry of TDVF Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 41/66] i386/tdx: Track RAM entries for TDX VM Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 42/66] headers: Add definitions from UEFI spec for volumes, resources, etc Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 43/66] i386/tdx: Setup the TD HOB list Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 44/66] i386/tdx: Add TDVF memory via KVM_TDX_INIT_MEM_REGION Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 45/66] memory: Introduce memory_region_init_ram_guest_memfd() Xiaoyao Li
2024-01-26 14:55   ` David Hildenbrand
2024-01-25  3:23 ` [PATCH v4 46/66] i386/tdx: register TDVF as private memory Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 47/66] i386/tdx: Call KVM_TDX_INIT_VCPU to initialize TDX vcpu Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 48/66] i386/tdx: Finalize TDX VM Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 49/66] i386/tdx: handle TDG.VP.VMCALL<SetupEventNotifyInterrupt> Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 50/66] i386/tdx: handle TDG.VP.VMCALL<GetQuote> Xiaoyao Li
     [not found]   ` <87zfvwehyz.fsf@pond.sub.org>
2024-02-19 12:55     ` Daniel P. Berrangé
2024-02-19 14:41       ` Markus Armbruster
2024-02-20 14:16         ` Xiaoyao Li
2024-02-22 16:30   ` Daniel P. Berrangé
2024-02-23  1:06     ` Xiaoyao Li
2024-02-23  1:48       ` Qiu, Feng
2024-01-25  3:23 ` [PATCH v4 51/66] i386/tdx: handle TDG.VP.VMCALL<MapGPA> hypercall Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 52/66] i386/tdx: Handle TDG.VP.VMCALL<REPORT_FATAL_ERROR> Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 53/66] i386/tdx: Wire TDX_REPORT_FATAL_ERROR with GuestPanic facility Xiaoyao Li
2024-02-19 12:53   ` Markus Armbruster
2024-02-27  9:51     ` Xiaoyao Li
2024-02-27 11:51       ` Markus Armbruster
2024-02-27 12:09         ` Xiaoyao Li
2024-02-27 13:09           ` Markus Armbruster
2024-02-27 14:51             ` Xiaoyao Li
2024-02-27 15:42               ` Markus Armbruster
2024-01-25  3:23 ` [PATCH v4 54/66] pci-host/q35: Move PAM initialization above SMRAM initialization Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 55/66] q35: Introduce smm_ranges property for q35-pci-host Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 56/66] i386/tdx: Disable SMM for TDX VMs Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 57/66] i386/tdx: Disable PIC " Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 58/66] i386/tdx: Don't allow system reset " Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 59/66] i386/tdx: LMCE is not supported for TDX Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 60/66] hw/i386: add eoi_intercept_unsupported member to X86MachineState Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 61/66] hw/i386: add option to forcibly report edge trigger in acpi tables Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 62/66] i386/tdx: Don't synchronize guest tsc for TDs Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 63/66] i386/tdx: Only configure MSR_IA32_UCODE_REV in kvm_init_msrs() " Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 64/66] i386/tdx: Skip kvm_put_apicbase() " Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 65/66] i386/tdx: Don't get/put guest state for TDX VMs Xiaoyao Li
2024-01-25  3:23 ` [PATCH v4 66/66] docs: Add TDX documentation Xiaoyao Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240125032328.2522472-6-xiaoyao.li@intel.com \
    --to=xiaoyao.li@intel.com \
    --cc=armbru@redhat.com \
    --cc=berrange@redhat.com \
    --cc=cfontana@suse.de \
    --cc=chenyi.qiang@intel.com \
    --cc=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=eblake@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=isaku.yamahata@gmail.com \
    --cc=kraxel@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=michael.roth@amd.com \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).