From: Steve Sistare <steven.sistare@oracle.com>
To: qemu-devel@nongnu.org
Cc: Jason Wang <jasowang@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Stefano Garzarella <sgarzare@redhat.com>,
Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>,
Hamza Khan <hamza.khan@nutanix.com>,
Steve Sistare <steven.sistare@oracle.com>
Subject: [RFC V2 7/8] tap: cpr support
Date: Thu, 17 Jul 2025 11:39:27 -0700 [thread overview]
Message-ID: <1752777568-236368-8-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1752777568-236368-1-git-send-email-steven.sistare@oracle.com>
Provide the cpr=on option to preserve TAP and vhost descriptors during
cpr-transfer, so the management layer does not need to create a new
device for the target.
Save all tap fd's in canonical order, leveraging the index argument of
cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i,
and the vhostfd (if any) at index 2*i+1.
tap and vhost fd's are passed by name to the monitor when a NIC is hot
plugged, but the name is not known to qemu after cpr. Allow the manager
to pass -1 for the fd "name" in the new qemu args to indicate that QEMU
should search for a saved value. Example:
-netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
qapi/net.json | 5 +++-
include/migration/cpr.h | 2 +-
hw/vfio/device.c | 2 +-
migration/cpr.c | 11 ++++----
net/tap.c | 70 ++++++++++++++++++++++++++++++++++++++-----------
5 files changed, 67 insertions(+), 23 deletions(-)
diff --git a/qapi/net.json b/qapi/net.json
index 97ea183..5c7422b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -238,6 +238,8 @@
# @poll-us: maximum number of microseconds that could be spent on busy
# polling for tap (since 2.7)
#
+# @cpr: preserve fds and vhostfds during cpr-transfer.
+#
# Since: 1.2
##
{ 'struct': 'NetdevTapOptions',
@@ -256,7 +258,8 @@
'*vhostfds': 'str',
'*vhostforce': 'bool',
'*queues': 'uint32',
- '*poll-us': 'uint32'} }
+ '*poll-us': 'uint32',
+ '*cpr': 'bool'} }
##
# @NetdevSocketOptions:
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index 0fa57dd..baff57f 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -45,7 +45,7 @@ void cpr_state_close(void);
struct QIOChannel *cpr_state_ioc(void);
bool cpr_incoming_needed(void *opaque);
-int cpr_get_fd_param(const char *name, const char *fdname, int index,
+int cpr_get_fd_param(const char *name, const char *fdname, int index, bool cpr,
Error **errp);
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 96cf214..9eb6699 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -351,7 +351,7 @@ void vfio_device_free_name(VFIODevice *vbasedev)
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
{
- vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
+ vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, true, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
diff --git a/migration/cpr.c b/migration/cpr.c
index e97be9d..6d01b8c 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -282,6 +282,7 @@ bool cpr_incoming_needed(void *opaque)
* @name: CPR name for the descriptor
* @fdname: An integer-valued string, or a name passed to a getfd command
* @index: CPR index of the descriptor
+ * @cpr: use cpr
* @errp: returned error message
*
* If CPR is not being performed, then use @fdname to find the fd.
@@ -291,22 +292,22 @@ bool cpr_incoming_needed(void *opaque)
* On success returns the fd value, else returns -1.
*/
int cpr_get_fd_param(const char *name, const char *fdname, int index,
- Error **errp)
+ bool cpr, Error **errp)
{
ERRP_GUARD();
int fd;
- if (cpr_is_incoming()) {
+ if (cpr && cpr_is_incoming()) {
fd = cpr_find_fd(name, index);
if (fd < 0) {
error_setg(errp, "cannot find saved value for fd %s", fdname);
}
} else {
fd = monitor_fd_param(monitor_cur(), fdname, errp);
- if (fd >= 0) {
- cpr_save_fd(name, index, fd);
- } else {
+ if (fd < 0) {
error_prepend(errp, "Could not parse object fd %s:", fdname);
+ } else if (cpr) {
+ cpr_save_fd(name, index, fd);
}
}
return fd;
diff --git a/net/tap.c b/net/tap.c
index 1b239fd..6a12751 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -35,6 +35,7 @@
#include "net/eth.h"
#include "net/net.h"
#include "clients.h"
+#include "migration/cpr.h"
#include "monitor/monitor.h"
#include "system/system.h"
#include "qapi/error.h"
@@ -59,6 +60,7 @@ typedef struct TAPState {
bool has_ufo;
bool has_uso;
bool enabled;
+ bool cpr;
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
Notifier exit;
@@ -290,6 +292,9 @@ static void tap_cleanup(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ if (s->cpr) {
+ cpr_delete_fd_all(nc->name);
+ }
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
@@ -642,18 +647,24 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
return fd;
}
+/* CPR fd's for each queue are saved at these indices */
+#define TAP_FD_INDEX(queue) (2 * (queue) + 0)
+#define TAP_VHOSTFD_INDEX(queue) (2 * (queue) + 1)
+
#define MAX_TAP_QUEUES 1024
static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
const char *model, const char *name,
const char *ifname, const char *script,
const char *downscript, const char *vhostfdname,
- int vnet_hdr, int fd, Error **errp)
+ int vnet_hdr, int fd, int index, Error **errp)
{
Error *err = NULL;
TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
+ bool cpr = tap->has_cpr ? tap->cpr : false;
int vhostfd;
+ s->cpr = cpr;
tap_set_sndbuf(s->fd, tap, &err);
if (err) {
error_propagate(errp, err);
@@ -688,7 +699,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
if (vhostfdname) {
- vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
+ vhostfd = cpr_get_fd_param(name, vhostfdname, index, cpr, &err);
if (vhostfd == -1) {
error_propagate(errp, err);
goto failed;
@@ -699,7 +710,13 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
goto failed;
}
} else {
- vhostfd = open("/dev/vhost-net", O_RDWR);
+ vhostfd = cpr ? cpr_find_fd(name, index) : -1;
+ if (vhostfd < 0) {
+ vhostfd = open("/dev/vhost-net", O_RDWR);
+ if (cpr && vhostfd >= 0) {
+ cpr_save_fd(name, index, vhostfd);
+ }
+ }
if (vhostfd < 0) {
error_setg_errno(errp, errno,
"tap: open vhost char device failed");
@@ -727,6 +744,9 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
return;
failed:
+ if (cpr) {
+ cpr_delete_fd_all(name);
+ }
qemu_del_net_client(&s->nc);
}
@@ -759,7 +779,8 @@ static int get_fds(char *str, char *fds[], int max)
int net_init_tap(const Netdev *netdev, const char *name,
NetClientState *peer, Error **errp)
{
- const NetdevTapOptions *tap;
+ const NetdevTapOptions *tap = &netdev->u.tap;
+ bool cpr = tap->has_cpr ? tap->cpr : false;
int fd, vnet_hdr = 0, i = 0, queues;
/* for the no-fd, no-helper case */
const char *script;
@@ -795,7 +816,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
goto out;
}
- fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
+ fd = cpr_get_fd_param(name, tap->fd, TAP_FD_INDEX(0), cpr, errp);
if (fd == -1) {
ret = -1;
goto out;
@@ -818,13 +839,14 @@ int net_init_tap(const Netdev *netdev, const char *name,
net_init_tap_one(tap, peer, "tap", name, NULL,
script, downscript,
- vhostfdname, vnet_hdr, fd, &err);
+ vhostfdname, vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
if (err) {
error_propagate(errp, err);
close(fd);
ret = -1;
goto out;
}
+
} else if (tap->fds) {
char **fds;
char **vhost_fds;
@@ -855,7 +877,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
}
for (i = 0; i < nfds; i++) {
- fd = monitor_fd_param(monitor_cur(), fds[i], errp);
+ fd = cpr_get_fd_param(name, fds[i], TAP_FD_INDEX(i), cpr, errp);
if (fd == -1) {
ret = -1;
goto free_fail;
@@ -884,7 +906,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
net_init_tap_one(tap, peer, "tap", name, ifname,
script, downscript,
tap->vhostfds ? vhost_fds[i] : NULL,
- vnet_hdr, fd, &err);
+ vnet_hdr, fd, TAP_VHOSTFD_INDEX(i), &err);
if (err) {
error_propagate(errp, err);
ret = -1;
@@ -912,9 +934,15 @@ free_fail:
goto out;
}
- fd = net_bridge_run_helper(tap->helper,
- tap->br ?: DEFAULT_BRIDGE_INTERFACE,
- errp);
+ fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(0)) : -1;
+ if (fd < 0) {
+ fd = net_bridge_run_helper(tap->helper,
+ tap->br ?: DEFAULT_BRIDGE_INTERFACE,
+ errp);
+ if (cpr && fd >= 0) {
+ cpr_save_fd(name, TAP_FD_INDEX(0), fd);
+ }
+ }
if (fd == -1) {
ret = -1;
goto out;
@@ -934,13 +962,14 @@ free_fail:
net_init_tap_one(tap, peer, "bridge", name, ifname,
script, downscript, vhostfdname,
- vnet_hdr, fd, &err);
+ vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
if (err) {
error_propagate(errp, err);
close(fd);
ret = -1;
goto out;
}
+
} else {
g_autofree char *default_script = NULL;
g_autofree char *default_downscript = NULL;
@@ -965,8 +994,14 @@ free_fail:
}
for (i = 0; i < queues; i++) {
- fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
- ifname, sizeof ifname, queues > 1, errp);
+ fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(i)) : -1;
+ if (fd < 0) {
+ fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+ ifname, sizeof ifname, queues > 1, errp);
+ if (cpr && fd >= 0) {
+ cpr_save_fd(name, TAP_FD_INDEX(i), fd);
+ }
+ }
if (fd == -1) {
ret = -1;
goto out;
@@ -984,7 +1019,9 @@ free_fail:
net_init_tap_one(tap, peer, "tap", name, ifname,
i >= 1 ? "no" : script,
i >= 1 ? "no" : downscript,
- vhostfdname, vnet_hdr, fd, &err);
+ vhostfdname, vnet_hdr,
+ fd, TAP_VHOSTFD_INDEX(i),
+ &err);
if (err) {
error_propagate(errp, err);
close(fd);
@@ -995,6 +1032,9 @@ free_fail:
}
out:
+ if (ret && cpr) {
+ cpr_delete_fd_all(name);
+ }
return ret;
}
--
1.8.3.1
next prev parent reply other threads:[~2025-07-17 20:45 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-17 18:39 [RFC V2 0/8] Live update: tap and vhost Steve Sistare
2025-07-17 18:39 ` [RFC V2 1/8] migration: stop vm earlier for cpr Steve Sistare
2025-07-17 18:39 ` [RFC V2 2/8] migration: cpr setup notifier Steve Sistare
2025-07-17 18:39 ` [RFC V2 3/8] vhost: reset vhost devices for cpr Steve Sistare
2025-08-27 11:29 ` Vladimir Sementsov-Ogievskiy
2025-08-27 18:38 ` Steven Sistare
2025-07-17 18:39 ` [RFC V2 4/8] cpr: delete all fds Steve Sistare
2025-07-17 18:39 ` [RFC V2 5/8] Revert "vhost-backend: remove vhost_kernel_reset_device()" Steve Sistare
2025-08-22 18:26 ` Steven Sistare
2025-07-17 18:39 ` [RFC V2 6/8] tap: common return label Steve Sistare
2025-07-17 18:39 ` Steve Sistare [this message]
2025-07-17 18:39 ` [RFC V2 8/8] tap: postload fix for cpr Steve Sistare
2025-07-18 8:48 ` [RFC V2 0/8] Live update: tap and vhost Lei Yang
2025-07-18 17:31 ` Steven Sistare
2025-07-24 5:46 ` Lei Yang
2025-08-05 13:54 ` Fabiano Rosas
2025-08-05 19:53 ` Steven Sistare
2025-08-06 15:51 ` Peter Xu
2025-08-11 18:24 ` Steven Sistare
2025-08-23 21:53 ` Vladimir Sementsov-Ogievskiy
2025-08-28 15:48 ` Steven Sistare
2025-08-29 19:37 ` Steven Sistare
2025-09-01 11:44 ` Vladimir Sementsov-Ogievskiy
2025-09-02 15:33 ` Steven Sistare
2025-09-02 17:09 ` Vladimir Sementsov-Ogievskiy
2025-09-05 16:16 ` Peter Xu
2025-09-08 9:55 ` Vladimir Sementsov-Ogievskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1752777568-236368-8-git-send-email-steven.sistare@oracle.com \
--to=steven.sistare@oracle.com \
--cc=farosas@suse.de \
--cc=hamza.khan@nutanix.com \
--cc=jasowang@redhat.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=sgarzare@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).