qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: David Hildenbrand <david@redhat.com>
To: qemu-devel@nongnu.org
Cc: David Hildenbrand <david@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	Tiwei Bie <tiwei.bie@intel.com>
Subject: [PATCH v1 1/2] vhost: Defer filtering memory sections until building the vhost memory structure
Date: Thu, 16 Feb 2023 12:47:51 +0100	[thread overview]
Message-ID: <20230216114752.198627-2-david@redhat.com> (raw)
In-Reply-To: <20230216114752.198627-1-david@redhat.com>

Having multiple devices, some filtering memslots and some not filtering
memslots, messes up the "used_memslot" accounting. If we'd have a device
the filters out less memory sections after a device that filters out more,
we'd be in trouble, because our memslot checks stop working reliably.
For example, hotplugging a device that filters out less memslots might end
up passing the checks based on max vs. used memslots, but can run out of
memslots when getting notified about all memory sections.

Further, it will be helpful in memory device context in the near future
to know that a RAM memory region section will consume a memslot, and be
accounted for in the used vs. free memslots, such that we can implement
reservation of memslots for memory devices properly. Whether a device
filters this out and would theoretically still have a free memslot is
then hidden internally, making overall vhost memslot accounting easier.

Let's filter the memslots when creating the vhost memory array,
accounting all RAM && !ROM memory regions as "used_memslots" even if
vhost_user isn't interested in anonymous RAM regions, because it needs
an fd.

When a device actually filters out regions (which should happen rarely
in practice), we might detect a layout change although only filtered
regions changed. We won't bother about optimizing that for now.

Note: we cannot simply filter out the region and count them as
"filtered" to add them to used, because filtered regions could get
merged and result in a smaller effective number of memslots. Further,
we won't touch the hmp/qmp virtio introspection output.

Fixes: 988a27754bbb ("vhost: allow backends to filter memory sections")
Cc: Tiwei Bie <tiwei.bie@intel.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/virtio/vhost.c | 79 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 55 insertions(+), 24 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index eb8c4c378c..b7fb960fa9 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -219,8 +219,13 @@ static void vhost_log_sync_range(struct vhost_dev *dev,
     int i;
     /* FIXME: this is N^2 in number of sections */
     for (i = 0; i < dev->n_mem_sections; ++i) {
-        MemoryRegionSection *section = &dev->mem_sections[i];
-        vhost_sync_dirty_bitmap(dev, section, first, last);
+        MemoryRegionSection *mrs = &dev->mem_sections[i];
+
+        if (dev->vhost_ops->vhost_backend_mem_section_filter &&
+            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, mrs)) {
+            continue;
+        }
+        vhost_sync_dirty_bitmap(dev, mrs, first, last);
     }
 }
 
@@ -503,12 +508,6 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
             return false;
         }
 
-        if (dev->vhost_ops->vhost_backend_mem_section_filter &&
-            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
-            trace_vhost_reject_section(mr->name, 2);
-            return false;
-        }
-
         trace_vhost_section(mr->name);
         return true;
     } else {
@@ -525,6 +524,43 @@ static void vhost_begin(MemoryListener *listener)
     dev->n_tmp_sections = 0;
 }
 
+static void vhost_realloc_vhost_memory(struct vhost_dev *dev,
+                                       unsigned int nregions)
+{
+    const size_t size = offsetof(struct vhost_memory, regions) +
+                        nregions * sizeof dev->mem->regions[0];
+
+    dev->mem = g_realloc(dev->mem, size);
+    dev->mem->nregions = nregions;
+}
+
+static void vhost_rebuild_vhost_memory(struct vhost_dev *dev)
+{
+    unsigned int nregions = 0;
+    int i;
+
+    vhost_realloc_vhost_memory(dev, dev->n_mem_sections);
+    for (i = 0; i < dev->n_mem_sections; i++) {
+        struct MemoryRegionSection *mrs = dev->mem_sections + i;
+        struct vhost_memory_region *cur_vmr;
+
+        if (dev->vhost_ops->vhost_backend_mem_section_filter &&
+            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, mrs)) {
+            continue;
+        }
+        cur_vmr = dev->mem->regions + nregions;
+        nregions++;
+
+        cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
+        cur_vmr->memory_size     = int128_get64(mrs->size);
+        cur_vmr->userspace_addr  =
+            (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
+            mrs->offset_within_region;
+        cur_vmr->flags_padding   = 0;
+    }
+    vhost_realloc_vhost_memory(dev, nregions);
+}
+
 static void vhost_commit(MemoryListener *listener)
 {
     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
@@ -532,7 +568,6 @@ static void vhost_commit(MemoryListener *listener)
     MemoryRegionSection *old_sections;
     int n_old_sections;
     uint64_t log_size;
-    size_t regions_size;
     int r;
     int i;
     bool changed = false;
@@ -564,23 +599,19 @@ static void vhost_commit(MemoryListener *listener)
         goto out;
     }
 
-    /* Rebuild the regions list from the new sections list */
-    regions_size = offsetof(struct vhost_memory, regions) +
-                       dev->n_mem_sections * sizeof dev->mem->regions[0];
-    dev->mem = g_realloc(dev->mem, regions_size);
-    dev->mem->nregions = dev->n_mem_sections;
+    /*
+     * Globally track the used memslots *without* device specific
+     * filtering. This way, we always know how many memslots are required
+     * when devices with differing filtering requirements get mixed, and
+     * all RAM memory regions of memory devices will consume memslots.
+     */
     used_memslots = dev->mem->nregions;
-    for (i = 0; i < dev->n_mem_sections; i++) {
-        struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
-        struct MemoryRegionSection *mrs = dev->mem_sections + i;
 
-        cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
-        cur_vmr->memory_size     = int128_get64(mrs->size);
-        cur_vmr->userspace_addr  =
-            (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
-            mrs->offset_within_region;
-        cur_vmr->flags_padding   = 0;
-    }
+    /*
+     * Rebuild the regions list from the new sections list, filtering out all
+     * sections that this device is not interested in.
+     */
+    vhost_rebuild_vhost_memory(dev);
 
     if (!dev->started) {
         goto out;
-- 
2.39.1



  reply	other threads:[~2023-02-16 11:48 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-16 11:47 [PATCH v1 0/2] vhost: memslot handling improvements David Hildenbrand
2023-02-16 11:47 ` David Hildenbrand [this message]
2023-02-16 12:04   ` [PATCH v1 1/2] vhost: Defer filtering memory sections until building the vhost memory structure Michael S. Tsirkin
2023-02-16 12:10     ` David Hildenbrand
2023-02-16 12:13       ` David Hildenbrand
2023-02-16 12:22         ` Michael S. Tsirkin
2023-02-16 12:21       ` Michael S. Tsirkin
2023-02-16 12:39         ` David Hildenbrand
2023-03-07 10:51   ` Igor Mammedov
2023-03-07 12:46     ` David Hildenbrand
2023-03-08 12:30       ` Igor Mammedov
2023-03-08 15:21         ` David Hildenbrand
2023-02-16 11:47 ` [PATCH v1 2/2] vhost: Remove vhost_backend_can_merge() callback David Hildenbrand
2023-03-07 10:25   ` Igor Mammedov
2023-03-07 11:16     ` Igor Mammedov
2023-03-07 11:24       ` David Hildenbrand
2023-02-16 16:04 ` [PATCH v1 0/2] vhost: memslot handling improvements Stefan Hajnoczi
2023-02-17 13:48   ` David Hildenbrand
2023-02-17 14:20 ` Michael S. Tsirkin
2023-02-17 14:27   ` David Hildenbrand
2023-03-07 11:14   ` Igor Mammedov
2023-03-08 10:08     ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230216114752.198627-2-david@redhat.com \
    --to=david@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=tiwei.bie@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).