From: "Daniel P. Berrangé" <berrange@redhat.com>
To: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Cc: mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
farosas@suse.de, sw@weilnetz.de, eblake@redhat.com,
armbru@redhat.com, thuth@redhat.com, philmd@linaro.org,
qemu-devel@nongnu.org, michael.roth@amd.com,
steven.sistare@oracle.com, leiyang@redhat.com,
davydov-max@yandex-team.ru, yc-core@yandex-team.ru,
raphael.s.norwitz@gmail.com
Subject: Re: [PATCH v8 17/19] virtio-net: support backend-transfer migration for virtio-net/tap
Date: Thu, 16 Oct 2025 09:23:36 +0100 [thread overview]
Message-ID: <aPCriMKg_UolIrHK@redhat.com> (raw)
In-Reply-To: <20251015132136.1083972-18-vsementsov@yandex-team.ru>
On Wed, Oct 15, 2025 at 04:21:33PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Add virtio-net option backend-transfer, which is true by default,
> but false for older machine types, which doesn't support the feature.
>
> For backend-transfer migration, both global migration parameter
> backend-transfer and virtio-net backend-transfer option should be
> set to true.
>
> With the parameters enabled (both on source and target) of-course, and
> with unix-socket used as migration-channel, we do "migrate" the
> virtio-net backend - TAP device, with all its fds.
>
> This way management tool should not care about creating new TAP, and
> should not handle switching to it. Migration downtime become shorter.
>
> How it works:
>
> 1. For incoming migration, we postpone TAP initialization up to
> pre-incoming point.
>
> 2. At pre-incoming point we see that "virtio-net-tap" is set for
> backend-transfer, so we postpone TAP initialization up to
> post-load
>
> 3. During virtio-load, we get TAP state (and fds) as part of
> virtio-net state
>
> 4. In post-load we finalize TAP initialization
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
> hw/core/machine.c | 1 +
> hw/net/virtio-net.c | 75 +++++++++++++++++++++++++++++++++-
> include/hw/virtio/virtio-net.h | 1 +
> include/net/tap.h | 2 +
> net/tap.c | 45 +++++++++++++++++++-
> 5 files changed, 122 insertions(+), 2 deletions(-)
>
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 681adbb7ac..a3d77f5604 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -40,6 +40,7 @@
>
> GlobalProperty hw_compat_10_1[] = {
> { TYPE_ACPI_GED, "x-has-hest-addr", "false" },
> + { TYPE_VIRTIO_NET, "backend-transfer", "false" },
> };
> const size_t hw_compat_10_1_len = G_N_ELEMENTS(hw_compat_10_1);
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 661413c72f..5f9711dee7 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -38,6 +38,7 @@
> #include "qapi/qapi-events-migration.h"
> #include "hw/virtio/virtio-access.h"
> #include "migration/misc.h"
> +#include "migration/options.h"
> #include "standard-headers/linux/ethtool.h"
> #include "system/system.h"
> #include "system/replay.h"
> @@ -3358,6 +3359,9 @@ struct VirtIONetMigTmp {
> uint16_t curr_queue_pairs_1;
> uint8_t has_ufo;
> uint32_t has_vnet_hdr;
> +
> + NetClientState *ncs;
> + uint32_t max_queue_pairs;
> };
>
> /* The 2nd and subsequent tx_waiting flags are loaded later than
> @@ -3627,6 +3631,71 @@ static const VMStateDescription vhost_user_net_backend_state = {
> }
> };
>
> +static bool virtio_net_is_tap_mig(void *opaque, int version_id)
> +{
> + VirtIONet *n = opaque;
> + NetClientState *nc;
> +
> + nc = qemu_get_queue(n->nic);
> +
> + return migrate_backend_transfer() && n->backend_transfer && nc->peer &&
> + nc->peer->info->type == NET_CLIENT_DRIVER_TAP;
> +}
> +
> +static int virtio_net_nic_pre_save(void *opaque)
> +{
> + struct VirtIONetMigTmp *tmp = opaque;
> +
> + tmp->ncs = tmp->parent->nic->ncs;
> + tmp->max_queue_pairs = tmp->parent->max_queue_pairs;
> +
> + return 0;
> +}
> +
> +static int virtio_net_nic_pre_load(void *opaque)
> +{
> + /* Reuse the pointer setup from save */
> + virtio_net_nic_pre_save(opaque);
> +
> + return 0;
> +}
> +
> +static int virtio_net_nic_post_load(void *opaque, int version_id)
> +{
> + struct VirtIONetMigTmp *tmp = opaque;
> + Error *local_err = NULL;
> +
> + if (!virtio_net_update_host_features(tmp->parent, &local_err)) {
> + error_report_err(local_err);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static const VMStateDescription vmstate_virtio_net_nic_nc = {
> + .name = "virtio-net-nic-nc",
> + .fields = (const VMStateField[]) {
> + VMSTATE_STRUCT_POINTER(peer, NetClientState, vmstate_tap,
> + NetClientState),
> + VMSTATE_END_OF_LIST()
> + },
> +};
> +
> +static const VMStateDescription vmstate_virtio_net_nic = {
> + .name = "virtio-net-nic",
> + .pre_load = virtio_net_nic_pre_load,
> + .pre_save = virtio_net_nic_pre_save,
> + .post_load = virtio_net_nic_post_load,
> + .fields = (const VMStateField[]) {
> + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ncs, struct VirtIONetMigTmp,
> + max_queue_pairs,
> + vmstate_virtio_net_nic_nc,
> + struct NetClientState),
> + VMSTATE_END_OF_LIST()
> + },
> +};
> +
> static const VMStateDescription vmstate_virtio_net_device = {
> .name = "virtio-net-device",
> .version_id = VIRTIO_NET_VM_VERSION,
> @@ -3658,6 +3727,9 @@ static const VMStateDescription vmstate_virtio_net_device = {
> * but based on the uint.
> */
> VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
> + VMSTATE_WITH_TMP_TEST(VirtIONet, virtio_net_is_tap_mig,
> + struct VirtIONetMigTmp,
> + vmstate_virtio_net_nic),
> VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
> vmstate_virtio_net_has_vnet),
> VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
> @@ -4239,7 +4311,7 @@ static bool vhost_user_blk_pre_incoming(void *opaque, Error **errp)
> VirtIONet *n = opaque;
> int i;
>
> - if (peer_wait_incoming(n)) {
> + if (!virtio_net_is_tap_mig(opaque, 0) && peer_wait_incoming(n)) {
> for (i = 0; i < n->max_queue_pairs; i++) {
> if (!peer_postponed_init(n, i, errp)) {
> return false;
> @@ -4389,6 +4461,7 @@ static const Property virtio_net_properties[] = {
> host_features_ex,
> VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
> false),
> + DEFINE_PROP_BOOL("backend-transfer", VirtIONet, backend_transfer, true),
> };
>
> static void virtio_net_class_init(ObjectClass *klass, const void *data)
I really don't like this approach, because it is requiring the frontend
device to know about every different backend implementation that is able
to do state transfer. This really violates the separation from the
frontend and backend. The choice of specific backend should generally
be opaque to the frontend.
This really ought to be redesigned to work in terms of an formal API
exposed by the backend, not poking at TAP backend specific details.
eg an API that operates on NetClientState, for which each backend
can provide an optional implementation.
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 5b8ab7bda7..bf07f8a4cb 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -231,6 +231,7 @@ struct VirtIONet {
> struct EBPFRSSContext ebpf_rss;
> uint32_t nr_ebpf_rss_fds;
> char **ebpf_rss_fds;
> + bool backend_transfer;
> };
>
> size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> diff --git a/include/net/tap.h b/include/net/tap.h
> index 5a926ba513..506f7ab719 100644
> --- a/include/net/tap.h
> +++ b/include/net/tap.h
> @@ -36,4 +36,6 @@ int tap_get_fd(NetClientState *nc);
> bool tap_wait_incoming(NetClientState *nc);
> bool tap_postponed_init(NetClientState *nc, Error **errp);
>
> +extern const VMStateDescription vmstate_tap;
> +
> #endif /* QEMU_NET_TAP_H */
> diff --git a/net/tap.c b/net/tap.c
> index 8afbf3b407..b9c12dd64c 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -819,7 +819,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>
> static bool net_tap_setup(TAPState *s, int fd, int vnet_hdr, Error **errp)
> {
> - if (!net_tap_set_fd(s, fd, vnet_hdr, errp)) {
> + if (fd != -1 && !net_tap_set_fd(s, fd, vnet_hdr, errp)) {
> return false;
> }
>
> @@ -1225,6 +1225,49 @@ int tap_disable(NetClientState *nc)
> }
> }
>
> +static int tap_pre_load(void *opaque)
> +{
> + TAPState *s = opaque;
> +
> + if (s->fd != -1) {
> + error_report(
> + "TAP is already initialized and cannot receive incoming fd");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int tap_post_load(void *opaque, int version_id)
> +{
> + TAPState *s = opaque;
> + Error *local_err = NULL;
> +
> + if (!net_tap_setup(s, -1, -1, &local_err)) {
> + error_report_err(local_err);
> + qemu_del_net_client(&s->nc);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +const VMStateDescription vmstate_tap = {
> + .name = "net-tap",
> + .pre_load = tap_pre_load,
> + .post_load = tap_post_load,
> + .fields = (const VMStateField[]) {
> + VMSTATE_FD(fd, TAPState),
> + VMSTATE_BOOL(using_vnet_hdr, TAPState),
> + VMSTATE_BOOL(has_ufo, TAPState),
> + VMSTATE_BOOL(has_uso, TAPState),
> + VMSTATE_BOOL(has_tunnel, TAPState),
> + VMSTATE_BOOL(enabled, TAPState),
> + VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> bool tap_wait_incoming(NetClientState *nc)
> {
> TAPState *s = DO_UPCAST(TAPState, nc, nc);
IMHO implementing state transfer in the backends ought to be separate
commit from adding support for using that in the frontend.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
next prev parent reply other threads:[~2025-10-16 8:24 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-15 13:21 [PATCH v8 00/19] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 01/19] net/tap: net_init_tap_one(): drop extra error propagation Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 02/19] net/tap: net_init_tap_one(): move parameter checking earlier Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 03/19] net/tap: rework net_tap_init() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 04/19] net/tap: pass NULL to net_init_tap_one() in cases when scripts are NULL Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 05/19] net/tap: rework scripts handling Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 06/19] net/tap: setup exit notifier only when needed Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 07/19] net/tap: split net_tap_fd_init() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 08/19] net/tap: tap_set_sndbuf(): add return value Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 09/19] net/tap: rework tap_set_sndbuf() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 10/19] net/tap: rework sndbuf handling Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 11/19] net/tap: introduce net_tap_setup() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 12/19] net/tap: move vhost fd initialization to net_tap_new() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 13/19] net/tap: finalize net_tap_set_fd() logic Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 14/19] migration: introduce .pre_incoming() vmsd handler Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 15/19] net/tap: postpone tap setup to pre-incoming Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 16/19] qapi: introduce backend-transfer migration parameter Vladimir Sementsov-Ogievskiy
2025-10-15 18:19 ` Peter Xu
2025-10-15 19:02 ` Vladimir Sementsov-Ogievskiy
2025-10-15 20:07 ` Peter Xu
2025-10-15 21:02 ` Vladimir Sementsov-Ogievskiy
2025-10-16 8:32 ` Daniel P. Berrangé
2025-10-16 9:23 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:38 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:55 ` Daniel P. Berrangé
2025-10-16 18:40 ` Peter Xu
2025-10-16 18:51 ` Daniel P. Berrangé
2025-10-16 19:19 ` Daniel P. Berrangé
2025-10-16 19:39 ` Peter Xu
2025-10-16 20:00 ` Daniel P. Berrangé
2025-10-16 19:29 ` Peter Xu
2025-10-16 19:57 ` Daniel P. Berrangé
2025-10-16 20:28 ` Peter Xu
2025-10-17 6:51 ` Vladimir Sementsov-Ogievskiy
2025-10-17 15:55 ` Peter Xu
2025-10-17 8:10 ` Daniel P. Berrangé
2025-10-17 8:26 ` Vladimir Sementsov-Ogievskiy
2025-10-17 8:50 ` Daniel P. Berrangé
2025-10-17 9:18 ` Vladimir Sementsov-Ogievskiy
2025-10-17 8:39 ` Vladimir Sementsov-Ogievskiy
2025-10-17 16:08 ` Peter Xu
2025-10-16 20:26 ` Vladimir Sementsov-Ogievskiy
2025-10-16 20:30 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:56 ` Markus Armbruster
2025-10-16 12:07 ` Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 17/19] virtio-net: support backend-transfer migration for virtio-net/tap Vladimir Sementsov-Ogievskiy
2025-10-16 8:23 ` Daniel P. Berrangé [this message]
2025-10-16 9:15 ` Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 18/19] tests/functional: add skipWithoutSudo() decorator Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 19/19] tests/functional: add test_x86_64_tap_migration Vladimir Sementsov-Ogievskiy
2025-10-18 15:38 ` [PATCH v8 00/19] virtio-net: live-TAP local migration Lei Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aPCriMKg_UolIrHK@redhat.com \
--to=berrange@redhat.com \
--cc=armbru@redhat.com \
--cc=davydov-max@yandex-team.ru \
--cc=eblake@redhat.com \
--cc=farosas@suse.de \
--cc=jasowang@redhat.com \
--cc=leiyang@redhat.com \
--cc=michael.roth@amd.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=raphael.s.norwitz@gmail.com \
--cc=steven.sistare@oracle.com \
--cc=sw@weilnetz.de \
--cc=thuth@redhat.com \
--cc=vsementsov@yandex-team.ru \
--cc=yc-core@yandex-team.ru \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).