From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
To: "Daniel P. Berrangé" <berrange@redhat.com>
Cc: mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
farosas@suse.de, sw@weilnetz.de, eblake@redhat.com,
armbru@redhat.com, thuth@redhat.com, philmd@linaro.org,
qemu-devel@nongnu.org, michael.roth@amd.com,
steven.sistare@oracle.com, leiyang@redhat.com,
davydov-max@yandex-team.ru, yc-core@yandex-team.ru,
raphael.s.norwitz@gmail.com
Subject: Re: [PATCH v8 17/19] virtio-net: support backend-transfer migration for virtio-net/tap
Date: Thu, 16 Oct 2025 12:15:32 +0300 [thread overview]
Message-ID: <d99947db-afeb-462f-b2b5-a53e451e762d@yandex-team.ru> (raw)
In-Reply-To: <aPCriMKg_UolIrHK@redhat.com>
On 16.10.25 11:23, Daniel P. Berrangé wrote:
> On Wed, Oct 15, 2025 at 04:21:33PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> Add virtio-net option backend-transfer, which is true by default,
>> but false for older machine types, which doesn't support the feature.
>>
>> For backend-transfer migration, both global migration parameter
>> backend-transfer and virtio-net backend-transfer option should be
>> set to true.
>>
>> With the parameters enabled (both on source and target) of-course, and
>> with unix-socket used as migration-channel, we do "migrate" the
>> virtio-net backend - TAP device, with all its fds.
>>
>> This way management tool should not care about creating new TAP, and
>> should not handle switching to it. Migration downtime become shorter.
>>
>> How it works:
>>
>> 1. For incoming migration, we postpone TAP initialization up to
>> pre-incoming point.
>>
>> 2. At pre-incoming point we see that "virtio-net-tap" is set for
>> backend-transfer, so we postpone TAP initialization up to
>> post-load
>>
>> 3. During virtio-load, we get TAP state (and fds) as part of
>> virtio-net state
>>
>> 4. In post-load we finalize TAP initialization
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>> hw/core/machine.c | 1 +
>> hw/net/virtio-net.c | 75 +++++++++++++++++++++++++++++++++-
>> include/hw/virtio/virtio-net.h | 1 +
>> include/net/tap.h | 2 +
>> net/tap.c | 45 +++++++++++++++++++-
>> 5 files changed, 122 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>> index 681adbb7ac..a3d77f5604 100644
>> --- a/hw/core/machine.c
>> +++ b/hw/core/machine.c
>> @@ -40,6 +40,7 @@
>>
>> GlobalProperty hw_compat_10_1[] = {
>> { TYPE_ACPI_GED, "x-has-hest-addr", "false" },
>> + { TYPE_VIRTIO_NET, "backend-transfer", "false" },
>> };
>> const size_t hw_compat_10_1_len = G_N_ELEMENTS(hw_compat_10_1);
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 661413c72f..5f9711dee7 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -38,6 +38,7 @@
>> #include "qapi/qapi-events-migration.h"
>> #include "hw/virtio/virtio-access.h"
>> #include "migration/misc.h"
>> +#include "migration/options.h"
>> #include "standard-headers/linux/ethtool.h"
>> #include "system/system.h"
>> #include "system/replay.h"
>> @@ -3358,6 +3359,9 @@ struct VirtIONetMigTmp {
>> uint16_t curr_queue_pairs_1;
>> uint8_t has_ufo;
>> uint32_t has_vnet_hdr;
>> +
>> + NetClientState *ncs;
>> + uint32_t max_queue_pairs;
>> };
>>
>> /* The 2nd and subsequent tx_waiting flags are loaded later than
>> @@ -3627,6 +3631,71 @@ static const VMStateDescription vhost_user_net_backend_state = {
>> }
>> };
>>
>> +static bool virtio_net_is_tap_mig(void *opaque, int version_id)
>> +{
>> + VirtIONet *n = opaque;
>> + NetClientState *nc;
>> +
>> + nc = qemu_get_queue(n->nic);
>> +
>> + return migrate_backend_transfer() && n->backend_transfer && nc->peer &&
>> + nc->peer->info->type == NET_CLIENT_DRIVER_TAP;
>> +}
>> +
>> +static int virtio_net_nic_pre_save(void *opaque)
>> +{
>> + struct VirtIONetMigTmp *tmp = opaque;
>> +
>> + tmp->ncs = tmp->parent->nic->ncs;
>> + tmp->max_queue_pairs = tmp->parent->max_queue_pairs;
>> +
>> + return 0;
>> +}
>> +
>> +static int virtio_net_nic_pre_load(void *opaque)
>> +{
>> + /* Reuse the pointer setup from save */
>> + virtio_net_nic_pre_save(opaque);
>> +
>> + return 0;
>> +}
>> +
>> +static int virtio_net_nic_post_load(void *opaque, int version_id)
>> +{
>> + struct VirtIONetMigTmp *tmp = opaque;
>> + Error *local_err = NULL;
>> +
>> + if (!virtio_net_update_host_features(tmp->parent, &local_err)) {
>> + error_report_err(local_err);
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_virtio_net_nic_nc = {
>> + .name = "virtio-net-nic-nc",
>> + .fields = (const VMStateField[]) {
>> + VMSTATE_STRUCT_POINTER(peer, NetClientState, vmstate_tap,
>> + NetClientState),
>> + VMSTATE_END_OF_LIST()
>> + },
>> +};
>> +
>> +static const VMStateDescription vmstate_virtio_net_nic = {
>> + .name = "virtio-net-nic",
>> + .pre_load = virtio_net_nic_pre_load,
>> + .pre_save = virtio_net_nic_pre_save,
>> + .post_load = virtio_net_nic_post_load,
>> + .fields = (const VMStateField[]) {
>> + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ncs, struct VirtIONetMigTmp,
>> + max_queue_pairs,
>> + vmstate_virtio_net_nic_nc,
>> + struct NetClientState),
>> + VMSTATE_END_OF_LIST()
>> + },
>> +};
>> +
>> static const VMStateDescription vmstate_virtio_net_device = {
>> .name = "virtio-net-device",
>> .version_id = VIRTIO_NET_VM_VERSION,
>> @@ -3658,6 +3727,9 @@ static const VMStateDescription vmstate_virtio_net_device = {
>> * but based on the uint.
>> */
>> VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
>> + VMSTATE_WITH_TMP_TEST(VirtIONet, virtio_net_is_tap_mig,
>> + struct VirtIONetMigTmp,
>> + vmstate_virtio_net_nic),
>> VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
>> vmstate_virtio_net_has_vnet),
>> VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
>> @@ -4239,7 +4311,7 @@ static bool vhost_user_blk_pre_incoming(void *opaque, Error **errp)
>> VirtIONet *n = opaque;
>> int i;
>>
>> - if (peer_wait_incoming(n)) {
>> + if (!virtio_net_is_tap_mig(opaque, 0) && peer_wait_incoming(n)) {
>> for (i = 0; i < n->max_queue_pairs; i++) {
>> if (!peer_postponed_init(n, i, errp)) {
>> return false;
>> @@ -4389,6 +4461,7 @@ static const Property virtio_net_properties[] = {
>> host_features_ex,
>> VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
>> false),
>> + DEFINE_PROP_BOOL("backend-transfer", VirtIONet, backend_transfer, true),
>> };
>>
>> static void virtio_net_class_init(ObjectClass *klass, const void *data)
>
> I really don't like this approach, because it is requiring the frontend
> device to know about every different backend implementation that is able
> to do state transfer. This really violates the separation from the
> frontend and backend. The choice of specific backend should generally
> be opaque to the frontend.
>
> This really ought to be redesigned to work in terms of an formal API
> exposed by the backend, not poking at TAP backend specific details.
> eg an API that operates on NetClientState, for which each backend
> can provide an optional implementation.
Agree, I'll try.
>
>
>> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
>> index 5b8ab7bda7..bf07f8a4cb 100644
>> --- a/include/hw/virtio/virtio-net.h
>> +++ b/include/hw/virtio/virtio-net.h
>> @@ -231,6 +231,7 @@ struct VirtIONet {
>> struct EBPFRSSContext ebpf_rss;
>> uint32_t nr_ebpf_rss_fds;
>> char **ebpf_rss_fds;
>> + bool backend_transfer;
>> };
>>
>> size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
>> diff --git a/include/net/tap.h b/include/net/tap.h
>> index 5a926ba513..506f7ab719 100644
>> --- a/include/net/tap.h
>> +++ b/include/net/tap.h
>> @@ -36,4 +36,6 @@ int tap_get_fd(NetClientState *nc);
>> bool tap_wait_incoming(NetClientState *nc);
>> bool tap_postponed_init(NetClientState *nc, Error **errp);
>>
>> +extern const VMStateDescription vmstate_tap;
>> +
>> #endif /* QEMU_NET_TAP_H */
>> diff --git a/net/tap.c b/net/tap.c
>> index 8afbf3b407..b9c12dd64c 100644
>> --- a/net/tap.c
>> +++ b/net/tap.c
>> @@ -819,7 +819,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>>
>> static bool net_tap_setup(TAPState *s, int fd, int vnet_hdr, Error **errp)
>> {
>> - if (!net_tap_set_fd(s, fd, vnet_hdr, errp)) {
>> + if (fd != -1 && !net_tap_set_fd(s, fd, vnet_hdr, errp)) {
>> return false;
>> }
>>
>> @@ -1225,6 +1225,49 @@ int tap_disable(NetClientState *nc)
>> }
>> }
>>
>> +static int tap_pre_load(void *opaque)
>> +{
>> + TAPState *s = opaque;
>> +
>> + if (s->fd != -1) {
>> + error_report(
>> + "TAP is already initialized and cannot receive incoming fd");
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int tap_post_load(void *opaque, int version_id)
>> +{
>> + TAPState *s = opaque;
>> + Error *local_err = NULL;
>> +
>> + if (!net_tap_setup(s, -1, -1, &local_err)) {
>> + error_report_err(local_err);
>> + qemu_del_net_client(&s->nc);
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +const VMStateDescription vmstate_tap = {
>> + .name = "net-tap",
>> + .pre_load = tap_pre_load,
>> + .post_load = tap_post_load,
>> + .fields = (const VMStateField[]) {
>> + VMSTATE_FD(fd, TAPState),
>> + VMSTATE_BOOL(using_vnet_hdr, TAPState),
>> + VMSTATE_BOOL(has_ufo, TAPState),
>> + VMSTATE_BOOL(has_uso, TAPState),
>> + VMSTATE_BOOL(has_tunnel, TAPState),
>> + VMSTATE_BOOL(enabled, TAPState),
>> + VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
>> + VMSTATE_END_OF_LIST()
>> + }
>> +};
>> +
>> bool tap_wait_incoming(NetClientState *nc)
>> {
>> TAPState *s = DO_UPCAST(TAPState, nc, nc);
>
> IMHO implementing state transfer in the backends ought to be separate
> commit from adding support for using that in the frontend.
>
Will do.
Thanks for reviewing!
--
Best regards,
Vladimir
next prev parent reply other threads:[~2025-10-16 9:16 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-15 13:21 [PATCH v8 00/19] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 01/19] net/tap: net_init_tap_one(): drop extra error propagation Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 02/19] net/tap: net_init_tap_one(): move parameter checking earlier Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 03/19] net/tap: rework net_tap_init() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 04/19] net/tap: pass NULL to net_init_tap_one() in cases when scripts are NULL Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 05/19] net/tap: rework scripts handling Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 06/19] net/tap: setup exit notifier only when needed Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 07/19] net/tap: split net_tap_fd_init() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 08/19] net/tap: tap_set_sndbuf(): add return value Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 09/19] net/tap: rework tap_set_sndbuf() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 10/19] net/tap: rework sndbuf handling Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 11/19] net/tap: introduce net_tap_setup() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 12/19] net/tap: move vhost fd initialization to net_tap_new() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 13/19] net/tap: finalize net_tap_set_fd() logic Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 14/19] migration: introduce .pre_incoming() vmsd handler Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 15/19] net/tap: postpone tap setup to pre-incoming Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 16/19] qapi: introduce backend-transfer migration parameter Vladimir Sementsov-Ogievskiy
2025-10-15 18:19 ` Peter Xu
2025-10-15 19:02 ` Vladimir Sementsov-Ogievskiy
2025-10-15 20:07 ` Peter Xu
2025-10-15 21:02 ` Vladimir Sementsov-Ogievskiy
2025-10-16 8:32 ` Daniel P. Berrangé
2025-10-16 9:23 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:38 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:55 ` Daniel P. Berrangé
2025-10-16 18:40 ` Peter Xu
2025-10-16 18:51 ` Daniel P. Berrangé
2025-10-16 19:19 ` Daniel P. Berrangé
2025-10-16 19:39 ` Peter Xu
2025-10-16 20:00 ` Daniel P. Berrangé
2025-10-16 19:29 ` Peter Xu
2025-10-16 19:57 ` Daniel P. Berrangé
2025-10-16 20:28 ` Peter Xu
2025-10-17 6:51 ` Vladimir Sementsov-Ogievskiy
2025-10-17 15:55 ` Peter Xu
2025-10-17 8:10 ` Daniel P. Berrangé
2025-10-17 8:26 ` Vladimir Sementsov-Ogievskiy
2025-10-17 8:50 ` Daniel P. Berrangé
2025-10-17 9:18 ` Vladimir Sementsov-Ogievskiy
2025-10-17 8:39 ` Vladimir Sementsov-Ogievskiy
2025-10-17 16:08 ` Peter Xu
2025-10-16 20:26 ` Vladimir Sementsov-Ogievskiy
2025-10-16 20:30 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:56 ` Markus Armbruster
2025-10-16 12:07 ` Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 17/19] virtio-net: support backend-transfer migration for virtio-net/tap Vladimir Sementsov-Ogievskiy
2025-10-16 8:23 ` Daniel P. Berrangé
2025-10-16 9:15 ` Vladimir Sementsov-Ogievskiy [this message]
2025-10-15 13:21 ` [PATCH v8 18/19] tests/functional: add skipWithoutSudo() decorator Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 19/19] tests/functional: add test_x86_64_tap_migration Vladimir Sementsov-Ogievskiy
2025-10-18 15:38 ` [PATCH v8 00/19] virtio-net: live-TAP local migration Lei Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d99947db-afeb-462f-b2b5-a53e451e762d@yandex-team.ru \
--to=vsementsov@yandex-team.ru \
--cc=armbru@redhat.com \
--cc=berrange@redhat.com \
--cc=davydov-max@yandex-team.ru \
--cc=eblake@redhat.com \
--cc=farosas@suse.de \
--cc=jasowang@redhat.com \
--cc=leiyang@redhat.com \
--cc=michael.roth@amd.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=raphael.s.norwitz@gmail.com \
--cc=steven.sistare@oracle.com \
--cc=sw@weilnetz.de \
--cc=thuth@redhat.com \
--cc=yc-core@yandex-team.ru \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).