From: Si-Wei Liu <si-wei.liu@oracle.com>
To: qemu-devel@nongnu.org
Cc: mst@redhat.com, jasowang@redhat.com, joao.m.martins@oracle.com,
si-wei.liu@oracle.com
Subject: [PATCH v2 2/2] vhost: Perform memory section dirty scans once per iteration
Date: Wed, 14 Feb 2024 03:50:19 -0800 [thread overview]
Message-ID: <1707911419-11758-2-git-send-email-si-wei.liu@oracle.com> (raw)
In-Reply-To: <1707911419-11758-1-git-send-email-si-wei.liu@oracle.com>
On setups with one or more virtio-net devices with vhost on,
dirty tracking iteration increases cost the bigger the number
amount of queues are set up e.g. on idle guests migration the
following is observed with virtio-net with vhost=on:
48 queues -> 78.11% [.] vhost_dev_sync_region.isra.13
8 queues -> 40.50% [.] vhost_dev_sync_region.isra.13
1 queue -> 6.89% [.] vhost_dev_sync_region.isra.13
2 devices, 1 queue -> 18.60% [.] vhost_dev_sync_region.isra.14
With high memory rates the symptom is lack of convergence as soon
as it has a vhost device with a sufficiently high number of queues,
the sufficient number of vhost devices.
On every migration iteration (every 100msecs) it will redundantly
query the *shared log* the number of queues configured with vhost
that exist in the guest. For the virtqueue data, this is necessary,
but not for the memory sections which are the same. So
essentially we end up scanning the dirty log too often.
To fix that, select a vhost device responsible for scanning the
log with regards to memory sections dirty tracking. It is selected
when we enable the logger (during migration) and cleared when we
disable the logger. If the vhost logger device goes away for some
reason, the logger will be re-selected from the rest of vhost
devices.
Co-developed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
hw/virtio/vhost.c | 75 +++++++++++++++++++++++++++++++++++++++++++----
include/hw/virtio/vhost.h | 1 +
2 files changed, 70 insertions(+), 6 deletions(-)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index ef6d9b5..997d560 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -45,6 +45,9 @@
static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
+static struct vhost_dev *vhost_mem_logger[VHOST_BACKEND_TYPE_MAX];
+static QLIST_HEAD(, vhost_dev) vhost_mlog_devices =
+ QLIST_HEAD_INITIALIZER(vhost_mlog_devices);
/* Memslots used by backends that support private memslots (without an fd). */
static unsigned int used_memslots;
@@ -149,6 +152,53 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev)
}
}
+static bool vhost_log_dev_enabled(struct vhost_dev *dev)
+{
+ assert(dev->vhost_ops);
+ assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
+ assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
+
+ return dev == vhost_mem_logger[dev->vhost_ops->backend_type];
+}
+
+static void vhost_mlog_set_dev(struct vhost_dev *hdev, bool enable)
+{
+ struct vhost_dev *logdev = NULL;
+ VhostBackendType backend_type;
+ bool reelect = false;
+
+ assert(hdev->vhost_ops);
+ assert(hdev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
+ assert(hdev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
+
+ backend_type = hdev->vhost_ops->backend_type;
+
+ if (enable && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
+ reelect = !vhost_mem_logger[backend_type];
+ QLIST_INSERT_HEAD(&vhost_mlog_devices, hdev, logdev_entry);
+ } else if (!enable && QLIST_IS_INSERTED(hdev, logdev_entry)) {
+ reelect = vhost_mem_logger[backend_type] == hdev;
+ QLIST_REMOVE(hdev, logdev_entry);
+ }
+
+ if (!reelect)
+ return;
+
+ QLIST_FOREACH(hdev, &vhost_mlog_devices, logdev_entry) {
+ if (!hdev->vhost_ops ||
+ hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_NONE ||
+ hdev->vhost_ops->backend_type >= VHOST_BACKEND_TYPE_MAX)
+ continue;
+
+ if (hdev->vhost_ops->backend_type == backend_type) {
+ logdev = hdev;
+ break;
+ }
+ }
+
+ vhost_mem_logger[backend_type] = logdev;
+}
+
static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
MemoryRegionSection *section,
hwaddr first,
@@ -166,12 +216,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
start_addr = MAX(first, start_addr);
end_addr = MIN(last, end_addr);
- for (i = 0; i < dev->mem->nregions; ++i) {
- struct vhost_memory_region *reg = dev->mem->regions + i;
- vhost_dev_sync_region(dev, section, start_addr, end_addr,
- reg->guest_phys_addr,
- range_get_last(reg->guest_phys_addr,
- reg->memory_size));
+ if (vhost_log_dev_enabled(dev)) {
+ for (i = 0; i < dev->mem->nregions; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ vhost_dev_sync_region(dev, section, start_addr, end_addr,
+ reg->guest_phys_addr,
+ range_get_last(reg->guest_phys_addr,
+ reg->memory_size));
+ }
}
for (i = 0; i < dev->nvqs; ++i) {
struct vhost_virtqueue *vq = dev->vqs + i;
@@ -382,6 +434,7 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync)
g_free(log);
}
+ vhost_mlog_set_dev(dev, false);
dev->log = NULL;
dev->log_size = 0;
}
@@ -997,6 +1050,15 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
goto err_vq;
}
}
+
+ /*
+ * At log start we select our vhost_device logger that will scan the
+ * memory sections and skip for the others. This is possible because
+ * the log is shared amongst all vhost devices for a given type of
+ * backend.
+ */
+ vhost_mlog_set_dev(dev, enable_log);
+
return 0;
err_vq:
for (; i >= 0; --i) {
@@ -2072,6 +2134,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
goto fail_log;
}
+ vhost_mlog_set_dev(hdev, true);
}
if (vrings) {
r = vhost_dev_set_vring_enable(hdev, true);
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 0247778..d75faf4 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -129,6 +129,7 @@ struct vhost_dev {
void *opaque;
struct vhost_log *log;
QLIST_ENTRY(vhost_dev) entry;
+ QLIST_ENTRY(vhost_dev) logdev_entry;
QLIST_HEAD(, vhost_iommu) iommu_list;
IOMMUNotifier n;
const VhostDevConfigOps *config_ops;
--
1.8.3.1
next prev parent reply other threads:[~2024-02-14 13:02 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-14 11:50 [PATCH v2 1/2] vhost: dirty log should be per backend type Si-Wei Liu
2024-02-14 11:50 ` Si-Wei Liu [this message]
2024-03-06 18:42 ` [PATCH v2 2/2] vhost: Perform memory section dirty scans once per iteration Eugenio Perez Martin
2024-03-12 18:35 ` Michael S. Tsirkin
2024-02-14 18:42 ` [PATCH v2 1/2] vhost: dirty log should be per backend type Si-Wei Liu
2024-03-12 15:07 ` Michael S. Tsirkin
2024-03-13 19:13 ` Si-Wei Liu
2024-03-06 17:27 ` Eugenio Perez Martin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1707911419-11758-2-git-send-email-si-wei.liu@oracle.com \
--to=si-wei.liu@oracle.com \
--cc=jasowang@redhat.com \
--cc=joao.m.martins@oracle.com \
--cc=mst@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).