From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Stefano Stabellini" <sstabellini@kernel.org>,
"Ilya Maximets" <i.maximets@ovn.org>,
"Philippe Mathieu-Daudé" <philmd@linaro.org>,
"Kevin Wolf" <kwolf@redhat.com>,
xen-devel@lists.xenproject.org,
"Anthony Perard" <anthony.perard@citrix.com>,
"Paolo Bonzini" <pbonzini@redhat.com>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
qemu-block@nongnu.org, "Julia Suvorova" <jusual@redhat.com>,
"Aarushi Mehta" <mehta.aaru20@gmail.com>,
"Paul Durrant" <paul@xen.org>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Fam Zheng" <fam@euphon.net>,
"Stefano Garzarella" <sgarzare@redhat.com>,
"Hanna Reitz" <hreitz@redhat.com>,
"Eric Blake" <eblake@redhat.com>
Subject: [PATCH v3 3/4] virtio: use defer_call() in virtio_irqfd_notify()
Date: Wed, 13 Sep 2023 16:00:44 -0400 [thread overview]
Message-ID: <20230913200045.1024233-4-stefanha@redhat.com> (raw)
In-Reply-To: <20230913200045.1024233-1-stefanha@redhat.com>
virtio-blk and virtio-scsi invoke virtio_irqfd_notify() to send Used
Buffer Notifications from an IOThread. This involves an eventfd
write(2) syscall. Calling this repeatedly when completing multiple I/O
requests in a row is wasteful.
Use the defer_call() API to batch together virtio_irqfd_notify() calls
made during thread pool (aio=threads), Linux AIO (aio=native), and
io_uring (aio=io_uring) completion processing.
Behavior is unchanged for emulated devices that do not use
defer_call_begin()/defer_call_end() since defer_call() immediately
invokes the callback when called outside a
defer_call_begin()/defer_call_end() region.
fio rw=randread bs=4k iodepth=64 numjobs=8 IOPS increases by ~9% with a
single IOThread and 8 vCPUs. iodepth=1 decreases by ~1% but this could
be noise. Detailed performance data and configuration specifics are
available here:
https://gitlab.com/stefanha/virt-playbooks/-/tree/blk_io_plug-irqfd
This duplicates the BH that virtio-blk uses for batching. The next
commit will remove it.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/io_uring.c | 6 ++++++
block/linux-aio.c | 4 ++++
hw/virtio/virtio.c | 13 ++++++++++++-
util/thread-pool.c | 5 +++++
hw/virtio/trace-events | 1 +
5 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/block/io_uring.c b/block/io_uring.c
index 3a1e1f45b3..7cdd00e9f1 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -125,6 +125,9 @@ static void luring_process_completions(LuringState *s)
{
struct io_uring_cqe *cqes;
int total_bytes;
+
+ defer_call_begin();
+
/*
* Request completion callbacks can run the nested event loop.
* Schedule ourselves so the nested event loop will "see" remaining
@@ -217,7 +220,10 @@ end:
aio_co_wake(luringcb->co);
}
}
+
qemu_bh_cancel(s->completion_bh);
+
+ defer_call_end();
}
static int ioq_submit(LuringState *s)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index a2670b3e46..ec05d946f3 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -205,6 +205,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
{
struct io_event *events;
+ defer_call_begin();
+
/* Reschedule so nested event loops see currently pending completions */
qemu_bh_schedule(s->completion_bh);
@@ -231,6 +233,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
* own `for` loop. If we are the last all counters dropped to zero. */
s->event_max = 0;
s->event_idx = 0;
+
+ defer_call_end();
}
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 969c25f4cf..d9aeed7012 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -15,6 +15,7 @@
#include "qapi/error.h"
#include "qapi/qapi-commands-virtio.h"
#include "trace.h"
+#include "qemu/defer-call.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
@@ -2426,6 +2427,16 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
}
}
+/* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
+static void virtio_notify_irqfd_deferred_fn(void *opaque)
+{
+ EventNotifier *notifier = opaque;
+ VirtQueue *vq = container_of(notifier, VirtQueue, guest_notifier);
+
+ trace_virtio_notify_irqfd_deferred_fn(vq->vdev, vq);
+ event_notifier_set(notifier);
+}
+
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
WITH_RCU_READ_LOCK_GUARD() {
@@ -2452,7 +2463,7 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
* to an atomic operation.
*/
virtio_set_isr(vq->vdev, 0x1);
- event_notifier_set(&vq->guest_notifier);
+ defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
}
static void virtio_irq(VirtQueue *vq)
diff --git a/util/thread-pool.c b/util/thread-pool.c
index e3d8292d14..d84961779a 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -15,6 +15,7 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
#include "qemu/osdep.h"
+#include "qemu/defer-call.h"
#include "qemu/queue.h"
#include "qemu/thread.h"
#include "qemu/coroutine.h"
@@ -175,6 +176,8 @@ static void thread_pool_completion_bh(void *opaque)
ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
+ defer_call_begin(); /* cb() may use defer_call() to coalesce work */
+
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_DONE) {
@@ -208,6 +211,8 @@ restart:
qemu_aio_unref(elem);
}
}
+
+ defer_call_end();
}
static void thread_pool_cancel(BlockAIOCB *acb)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 7109cf1a3b..29f4f543ad 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -73,6 +73,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "
virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
+virtio_notify_irqfd_deferred_fn(void *vdev, void *vq) "vdev %p vq %p"
virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
--
2.41.0
next prev parent reply other threads:[~2023-09-13 20:01 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-13 20:00 [PATCH v3 0/4] virtio-blk: use blk_io_plug_call() instead of notification BH Stefan Hajnoczi
2023-09-13 20:00 ` [PATCH v3 1/4] block: rename blk_io_plug_call() API to defer_call() Stefan Hajnoczi
2023-09-13 20:00 ` [PATCH v3 2/4] util/defer-call: move defer_call() to util/ Stefan Hajnoczi
2023-09-13 20:00 ` Stefan Hajnoczi [this message]
2023-09-13 20:00 ` [PATCH v3 4/4] virtio-blk: remove batch notification BH Stefan Hajnoczi
2023-09-13 20:27 ` [PATCH v3 0/4] virtio-blk: use blk_io_plug_call() instead of " Michael S. Tsirkin
2023-10-31 16:09 ` Kevin Wolf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230913200045.1024233-4-stefanha@redhat.com \
--to=stefanha@redhat.com \
--cc=anthony.perard@citrix.com \
--cc=eblake@redhat.com \
--cc=fam@euphon.net \
--cc=hreitz@redhat.com \
--cc=i.maximets@ovn.org \
--cc=jusual@redhat.com \
--cc=kwolf@redhat.com \
--cc=mehta.aaru20@gmail.com \
--cc=mst@redhat.com \
--cc=paul@xen.org \
--cc=pbonzini@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=sgarzare@redhat.com \
--cc=sstabellini@kernel.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.