qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Eric Blake <eblake@redhat.com>
To: qemu-devel@nongnu.org
Cc: qemu-block@nongnu.org, "Richard W.M. Jones" <rjones@redhat.com>,
	Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>,
	Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>
Subject: [PATCH v2 4/4] nbd: Enable multi-conn using round-robin
Date: Mon, 28 Apr 2025 13:46:47 -0500	[thread overview]
Message-ID: <20250428185246.492388-10-eblake@redhat.com> (raw)
In-Reply-To: <20250428185246.492388-6-eblake@redhat.com>

From: "Richard W.M. Jones" <rjones@redhat.com>

Enable NBD multi-conn by spreading operations across multiple
connections.

(XXX) This uses a naive round-robin approach which could be improved.
For example we could look at how many requests are in flight and
assign operations to the connections with fewest.  Or we could try to
estimate (based on size of requests outstanding) the load on each
connection.  But this implementation doesn't do any of that.

Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
Message-ID: <20230309113946.1528247-5-rjones@redhat.com>
---
 block/nbd.c | 67 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 19da1a7a1fe..bf5bc57569c 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1264,6 +1264,26 @@ nbd_co_request(NBDConnState *cs, NBDRequest *request,
     return ret ? ret : request_ret;
 }

+/*
+ * If multi-conn, choose a connection for this operation.
+ */
+static NBDConnState *choose_connection(BDRVNBDState *s)
+{
+    static size_t next;
+    size_t i;
+
+    if (s->multi_conn <= 1) {
+        return s->conns[0];
+    }
+
+    /* XXX Stupid simple round robin. */
+    i = qatomic_fetch_inc(&next);
+    i %= s->multi_conn;
+
+    assert(s->conns[i] != NULL);
+    return s->conns[i];
+}
+
 static int coroutine_fn GRAPH_RDLOCK
 nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                      QEMUIOVector *qiov, BdrvRequestFlags flags)
@@ -1276,7 +1296,7 @@ nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
         .from = offset,
         .len = bytes,
     };
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     assert(bytes <= NBD_MAX_BUFFER_SIZE);

@@ -1333,7 +1353,7 @@ nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
         .from = offset,
         .len = bytes,
     };
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     assert(!(cs->info.flags & NBD_FLAG_READ_ONLY));
     if (flags & BDRV_REQ_FUA) {
@@ -1359,7 +1379,7 @@ nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
         .from = offset,
         .len = bytes,
     };
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     /* rely on max_pwrite_zeroes */
     assert(bytes <= UINT32_MAX || cs->info.mode >= NBD_MODE_EXTENDED);
@@ -1391,7 +1411,13 @@ static int coroutine_fn GRAPH_RDLOCK nbd_client_co_flush(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = { .type = NBD_CMD_FLUSH };
-    NBDConnState * const cs = s->conns[0];
+
+    /*
+     * Multi-conn (if used) guarantees that flushing on any connection
+     * flushes caches on all connections, so we can perform this
+     * operation on any.
+     */
+    NBDConnState * const cs = choose_connection(s);

     if (!(cs->info.flags & NBD_FLAG_SEND_FLUSH)) {
         return 0;
@@ -1412,7 +1438,7 @@ nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
         .from = offset,
         .len = bytes,
     };
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     /* rely on max_pdiscard */
     assert(bytes <= UINT32_MAX || cs->info.mode >= NBD_MODE_EXTENDED);
@@ -1433,7 +1459,7 @@ static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status(
     NBDExtent64 extent = { 0 };
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     Error *local_err = NULL;
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     NBDRequest request = {
         .type = NBD_CMD_BLOCK_STATUS,
@@ -2058,7 +2084,7 @@ fail:
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);
     uint32_t min = cs->info.min_block;
     uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, cs->info.max_block);

@@ -2124,7 +2150,7 @@ static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t offset,
                                         BdrvRequestFlags flags, Error **errp)
 {
     BDRVNBDState *s = bs->opaque;
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     if (offset != cs->info.size && exact) {
         error_setg(errp, "Cannot resize NBD nodes");
@@ -2207,24 +2233,29 @@ static const char *const nbd_strong_runtime_opts[] = {
 static void nbd_cancel_in_flight(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-    NBDConnState * const cs = s->conns[0];
+    size_t i;
+    NBDConnState *cs;

-    reconnect_delay_timer_del(cs);
+    for (i = 0; i < MAX_MULTI_CONN; ++i) {
+        cs = s->conns[i];

-    qemu_mutex_lock(&cs->requests_lock);
-    if (cs->state == NBD_CLIENT_CONNECTING_WAIT) {
-        cs->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        reconnect_delay_timer_del(cs);
+
+        qemu_mutex_lock(&cs->requests_lock);
+        if (cs->state == NBD_CLIENT_CONNECTING_WAIT) {
+            cs->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        }
+        qemu_mutex_unlock(&cs->requests_lock);
+
+        nbd_co_establish_connection_cancel(cs->conn);
     }
-    qemu_mutex_unlock(&cs->requests_lock);
-
-    nbd_co_establish_connection_cancel(cs->conn);
 }

 static void nbd_attach_aio_context(BlockDriverState *bs,
                                    AioContext *new_context)
 {
     BDRVNBDState *s = bs->opaque;
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     /* The open_timer is used only during nbd_open() */
     assert(!cs->open_timer);
@@ -2244,7 +2275,7 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
 static void nbd_detach_aio_context(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
-    NBDConnState * const cs = s->conns[0];
+    NBDConnState * const cs = choose_connection(s);

     assert(!cs->open_timer);
     assert(!cs->reconnect_delay_timer);
-- 
2.49.0



  parent reply	other threads:[~2025-04-28 18:54 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-28 18:46 [RFC PATCH v2 0/4] Revival of patches to implement NBD client multi-conn Eric Blake
2025-04-28 18:46 ` [PATCH v2 1/4] nbd: Add multi-conn option Eric Blake
2025-04-29  5:49   ` Markus Armbruster
2025-04-29  9:14     ` Richard W.M. Jones
2025-04-29 11:01       ` Markus Armbruster
2025-04-29 11:19         ` Richard W.M. Jones
2025-04-29 11:31           ` Markus Armbruster
2025-05-27 22:01             ` Eric Blake
2025-05-28  6:10               ` Markus Armbruster
2025-05-22 17:38   ` Andrey Drobyshev
2025-05-22 18:44     ` Eric Blake
2025-05-23 11:03       ` Andrey Drobyshev
2025-05-23 12:59         ` Eric Blake
2025-04-28 18:46 ` [PATCH v2 2/4] nbd: Split out block device state from underlying NBD connections Eric Blake
2025-04-28 18:46 ` [PATCH v2 3/4] nbd: Open multiple NBD connections if multi-conn is set Eric Blake
2025-04-28 18:46 ` Eric Blake [this message]
2025-04-28 19:27   ` [PATCH v2 4/4] nbd: Enable multi-conn using round-robin Richard W.M. Jones
2025-04-28 21:32     ` Eric Blake
2025-05-22 17:37   ` Andrey Drobyshev
2025-05-22 18:45     ` Eric Blake
2025-04-29  8:41 ` [RFC PATCH v2 0/4] Revival of patches to implement NBD client multi-conn Daniel P. Berrangé
2025-04-29 12:03 ` Denis V. Lunev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250428185246.492388-10-eblake@redhat.com \
    --to=eblake@redhat.com \
    --cc=hreitz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rjones@redhat.com \
    --cc=vsementsov@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).