qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Steve Sistare <steven.sistare@oracle.com>
To: qemu-devel@nongnu.org
Cc: Jason Wang <jasowang@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Stefano Garzarella <sgarzare@redhat.com>,
	Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>,
	Hamza Khan <hamza.khan@nutanix.com>,
	Steve Sistare <steven.sistare@oracle.com>
Subject: [RFC V2 7/8] tap: cpr support
Date: Thu, 17 Jul 2025 11:39:27 -0700	[thread overview]
Message-ID: <1752777568-236368-8-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1752777568-236368-1-git-send-email-steven.sistare@oracle.com>

Provide the cpr=on option to preserve TAP and vhost descriptors during
cpr-transfer, so the management layer does not need to create a new
device for the target.

Save all tap fd's in canonical order, leveraging the index argument of
cpr_save_fd.  For the i'th queue, the tap device fd is saved at index 2*i,
and the vhostfd (if any) at index 2*i+1.

tap and vhost fd's are passed by name to the monitor when a NIC is hot
plugged, but the name is not known to qemu after cpr.  Allow the manager
to pass -1 for the fd "name" in the new qemu args to indicate that QEMU
should search for a saved value.  Example:

  -netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 qapi/net.json           |  5 +++-
 include/migration/cpr.h |  2 +-
 hw/vfio/device.c        |  2 +-
 migration/cpr.c         | 11 ++++----
 net/tap.c               | 70 ++++++++++++++++++++++++++++++++++++++-----------
 5 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 97ea183..5c7422b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -238,6 +238,8 @@
 # @poll-us: maximum number of microseconds that could be spent on busy
 #     polling for tap (since 2.7)
 #
+# @cpr: preserve fds and vhostfds during cpr-transfer.
+#
 # Since: 1.2
 ##
 { 'struct': 'NetdevTapOptions',
@@ -256,7 +258,8 @@
     '*vhostfds':   'str',
     '*vhostforce': 'bool',
     '*queues':     'uint32',
-    '*poll-us':    'uint32'} }
+    '*poll-us':    'uint32',
+    '*cpr':        'bool'} }
 
 ##
 # @NetdevSocketOptions:
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index 0fa57dd..baff57f 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -45,7 +45,7 @@ void cpr_state_close(void);
 struct QIOChannel *cpr_state_ioc(void);
 
 bool cpr_incoming_needed(void *opaque);
-int cpr_get_fd_param(const char *name, const char *fdname, int index,
+int cpr_get_fd_param(const char *name, const char *fdname, int index, bool cpr,
                      Error **errp);
 
 QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 96cf214..9eb6699 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -351,7 +351,7 @@ void vfio_device_free_name(VFIODevice *vbasedev)
 
 void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
 {
-    vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
+    vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, true, errp);
 }
 
 static VFIODeviceIOOps vfio_device_io_ops_ioctl;
diff --git a/migration/cpr.c b/migration/cpr.c
index e97be9d..6d01b8c 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -282,6 +282,7 @@ bool cpr_incoming_needed(void *opaque)
  * @name: CPR name for the descriptor
  * @fdname: An integer-valued string, or a name passed to a getfd command
  * @index: CPR index of the descriptor
+ * @cpr: use cpr
  * @errp: returned error message
  *
  * If CPR is not being performed, then use @fdname to find the fd.
@@ -291,22 +292,22 @@ bool cpr_incoming_needed(void *opaque)
  * On success returns the fd value, else returns -1.
  */
 int cpr_get_fd_param(const char *name, const char *fdname, int index,
-                     Error **errp)
+                     bool cpr, Error **errp)
 {
     ERRP_GUARD();
     int fd;
 
-    if (cpr_is_incoming()) {
+    if (cpr && cpr_is_incoming()) {
         fd = cpr_find_fd(name, index);
         if (fd < 0) {
             error_setg(errp, "cannot find saved value for fd %s", fdname);
         }
     } else {
         fd = monitor_fd_param(monitor_cur(), fdname, errp);
-        if (fd >= 0) {
-            cpr_save_fd(name, index, fd);
-        } else {
+        if (fd < 0) {
             error_prepend(errp, "Could not parse object fd %s:", fdname);
+        } else if (cpr) {
+            cpr_save_fd(name, index, fd);
         }
     }
     return fd;
diff --git a/net/tap.c b/net/tap.c
index 1b239fd..6a12751 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -35,6 +35,7 @@
 #include "net/eth.h"
 #include "net/net.h"
 #include "clients.h"
+#include "migration/cpr.h"
 #include "monitor/monitor.h"
 #include "system/system.h"
 #include "qapi/error.h"
@@ -59,6 +60,7 @@ typedef struct TAPState {
     bool has_ufo;
     bool has_uso;
     bool enabled;
+    bool cpr;
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
     Notifier exit;
@@ -290,6 +292,9 @@ static void tap_cleanup(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
+    if (s->cpr) {
+        cpr_delete_fd_all(nc->name);
+    }
     if (s->vhost_net) {
         vhost_net_cleanup(s->vhost_net);
         g_free(s->vhost_net);
@@ -642,18 +647,24 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
     return fd;
 }
 
+/* CPR fd's for each queue are saved at these indices */
+#define TAP_FD_INDEX(queue)         (2 * (queue) + 0)
+#define TAP_VHOSTFD_INDEX(queue)    (2 * (queue) + 1)
+
 #define MAX_TAP_QUEUES 1024
 
 static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                              const char *model, const char *name,
                              const char *ifname, const char *script,
                              const char *downscript, const char *vhostfdname,
-                             int vnet_hdr, int fd, Error **errp)
+                             int vnet_hdr, int fd, int index, Error **errp)
 {
     Error *err = NULL;
     TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
+    bool cpr = tap->has_cpr ? tap->cpr : false;
     int vhostfd;
 
+    s->cpr = cpr;
     tap_set_sndbuf(s->fd, tap, &err);
     if (err) {
         error_propagate(errp, err);
@@ -688,7 +699,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
         }
 
         if (vhostfdname) {
-            vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
+            vhostfd = cpr_get_fd_param(name, vhostfdname, index, cpr, &err);
             if (vhostfd == -1) {
                 error_propagate(errp, err);
                 goto failed;
@@ -699,7 +710,13 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                 goto failed;
             }
         } else {
-            vhostfd = open("/dev/vhost-net", O_RDWR);
+            vhostfd = cpr ? cpr_find_fd(name, index) : -1;
+            if (vhostfd < 0) {
+                vhostfd = open("/dev/vhost-net", O_RDWR);
+                if (cpr && vhostfd >= 0) {
+                    cpr_save_fd(name, index, vhostfd);
+                }
+            }
             if (vhostfd < 0) {
                 error_setg_errno(errp, errno,
                                  "tap: open vhost char device failed");
@@ -727,6 +744,9 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
     return;
 
 failed:
+    if (cpr) {
+        cpr_delete_fd_all(name);
+    }
     qemu_del_net_client(&s->nc);
 }
 
@@ -759,7 +779,8 @@ static int get_fds(char *str, char *fds[], int max)
 int net_init_tap(const Netdev *netdev, const char *name,
                  NetClientState *peer, Error **errp)
 {
-    const NetdevTapOptions *tap;
+    const NetdevTapOptions *tap = &netdev->u.tap;
+    bool cpr = tap->has_cpr ? tap->cpr : false;
     int fd, vnet_hdr = 0, i = 0, queues;
     /* for the no-fd, no-helper case */
     const char *script;
@@ -795,7 +816,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
             goto out;
         }
 
-        fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
+        fd = cpr_get_fd_param(name, tap->fd, TAP_FD_INDEX(0), cpr, errp);
         if (fd == -1) {
             ret = -1;
             goto out;
@@ -818,13 +839,14 @@ int net_init_tap(const Netdev *netdev, const char *name,
 
         net_init_tap_one(tap, peer, "tap", name, NULL,
                          script, downscript,
-                         vhostfdname, vnet_hdr, fd, &err);
+                         vhostfdname, vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
         if (err) {
             error_propagate(errp, err);
             close(fd);
             ret = -1;
             goto out;
         }
+
     } else if (tap->fds) {
         char **fds;
         char **vhost_fds;
@@ -855,7 +877,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
         }
 
         for (i = 0; i < nfds; i++) {
-            fd = monitor_fd_param(monitor_cur(), fds[i], errp);
+            fd = cpr_get_fd_param(name, fds[i], TAP_FD_INDEX(i), cpr, errp);
             if (fd == -1) {
                 ret = -1;
                 goto free_fail;
@@ -884,7 +906,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
             net_init_tap_one(tap, peer, "tap", name, ifname,
                              script, downscript,
                              tap->vhostfds ? vhost_fds[i] : NULL,
-                             vnet_hdr, fd, &err);
+                             vnet_hdr, fd, TAP_VHOSTFD_INDEX(i), &err);
             if (err) {
                 error_propagate(errp, err);
                 ret = -1;
@@ -912,9 +934,15 @@ free_fail:
             goto out;
         }
 
-        fd = net_bridge_run_helper(tap->helper,
-                                   tap->br ?: DEFAULT_BRIDGE_INTERFACE,
-                                   errp);
+        fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(0)) : -1;
+        if (fd < 0) {
+            fd = net_bridge_run_helper(tap->helper,
+                                    tap->br ?: DEFAULT_BRIDGE_INTERFACE,
+                                    errp);
+            if (cpr && fd >= 0) {
+                cpr_save_fd(name, TAP_FD_INDEX(0), fd);
+            }
+        }
         if (fd == -1) {
             ret = -1;
             goto out;
@@ -934,13 +962,14 @@ free_fail:
 
         net_init_tap_one(tap, peer, "bridge", name, ifname,
                          script, downscript, vhostfdname,
-                         vnet_hdr, fd, &err);
+                         vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
         if (err) {
             error_propagate(errp, err);
             close(fd);
             ret = -1;
             goto out;
         }
+
     } else {
         g_autofree char *default_script = NULL;
         g_autofree char *default_downscript = NULL;
@@ -965,8 +994,14 @@ free_fail:
         }
 
         for (i = 0; i < queues; i++) {
-            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
-                              ifname, sizeof ifname, queues > 1, errp);
+            fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(i)) : -1;
+            if (fd < 0) {
+                fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+                                ifname, sizeof ifname, queues > 1, errp);
+                if (cpr && fd >= 0) {
+                    cpr_save_fd(name, TAP_FD_INDEX(i), fd);
+                }
+            }
             if (fd == -1) {
                 ret = -1;
                 goto out;
@@ -984,7 +1019,9 @@ free_fail:
             net_init_tap_one(tap, peer, "tap", name, ifname,
                              i >= 1 ? "no" : script,
                              i >= 1 ? "no" : downscript,
-                             vhostfdname, vnet_hdr, fd, &err);
+                             vhostfdname, vnet_hdr,
+                             fd, TAP_VHOSTFD_INDEX(i),
+                             &err);
             if (err) {
                 error_propagate(errp, err);
                 close(fd);
@@ -995,6 +1032,9 @@ free_fail:
     }
 
 out:
+    if (ret && cpr) {
+        cpr_delete_fd_all(name);
+    }
     return ret;
 }
 
-- 
1.8.3.1



  parent reply	other threads:[~2025-07-17 20:45 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-17 18:39 [RFC V2 0/8] Live update: tap and vhost Steve Sistare
2025-07-17 18:39 ` [RFC V2 1/8] migration: stop vm earlier for cpr Steve Sistare
2025-07-17 18:39 ` [RFC V2 2/8] migration: cpr setup notifier Steve Sistare
2025-07-17 18:39 ` [RFC V2 3/8] vhost: reset vhost devices for cpr Steve Sistare
2025-08-27 11:29   ` Vladimir Sementsov-Ogievskiy
2025-08-27 18:38     ` Steven Sistare
2025-07-17 18:39 ` [RFC V2 4/8] cpr: delete all fds Steve Sistare
2025-07-17 18:39 ` [RFC V2 5/8] Revert "vhost-backend: remove vhost_kernel_reset_device()" Steve Sistare
2025-08-22 18:26   ` Steven Sistare
2025-07-17 18:39 ` [RFC V2 6/8] tap: common return label Steve Sistare
2025-07-17 18:39 ` Steve Sistare [this message]
2025-07-17 18:39 ` [RFC V2 8/8] tap: postload fix for cpr Steve Sistare
2025-07-18  8:48 ` [RFC V2 0/8] Live update: tap and vhost Lei Yang
2025-07-18 17:31   ` Steven Sistare
2025-07-24  5:46   ` Lei Yang
2025-08-05 13:54 ` Fabiano Rosas
2025-08-05 19:53   ` Steven Sistare
2025-08-06 15:51     ` Peter Xu
2025-08-11 18:24     ` Steven Sistare
2025-08-23 21:53 ` Vladimir Sementsov-Ogievskiy
2025-08-28 15:48   ` Steven Sistare
2025-08-29 19:37     ` Steven Sistare
2025-09-01 11:44       ` Vladimir Sementsov-Ogievskiy
2025-09-02 15:33         ` Steven Sistare
2025-09-02 17:09           ` Vladimir Sementsov-Ogievskiy
2025-09-05 16:16             ` Peter Xu
2025-09-08  9:55               ` Vladimir Sementsov-Ogievskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1752777568-236368-8-git-send-email-steven.sistare@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=farosas@suse.de \
    --cc=hamza.khan@nutanix.com \
    --cc=jasowang@redhat.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=sgarzare@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).