qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
To: "Daniel P. Berrangé" <berrange@redhat.com>
Cc: mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
	farosas@suse.de, sw@weilnetz.de, eblake@redhat.com,
	armbru@redhat.com, thuth@redhat.com, philmd@linaro.org,
	qemu-devel@nongnu.org, michael.roth@amd.com,
	steven.sistare@oracle.com, leiyang@redhat.com,
	davydov-max@yandex-team.ru, yc-core@yandex-team.ru,
	raphael.s.norwitz@gmail.com
Subject: Re: [PATCH v8 17/19] virtio-net: support backend-transfer migration for virtio-net/tap
Date: Thu, 16 Oct 2025 12:15:32 +0300	[thread overview]
Message-ID: <d99947db-afeb-462f-b2b5-a53e451e762d@yandex-team.ru> (raw)
In-Reply-To: <aPCriMKg_UolIrHK@redhat.com>

On 16.10.25 11:23, Daniel P. Berrangé wrote:
> On Wed, Oct 15, 2025 at 04:21:33PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> Add virtio-net option backend-transfer, which is true by default,
>> but false for older machine types, which doesn't support the feature.
>>
>> For backend-transfer migration, both global migration parameter
>> backend-transfer and virtio-net backend-transfer option should be
>> set to true.
>>
>> With the parameters enabled (both on source and target) of-course, and
>> with unix-socket used as migration-channel, we do "migrate" the
>> virtio-net backend - TAP device, with all its fds.
>>
>> This way management tool should not care about creating new TAP, and
>> should not handle switching to it. Migration downtime become shorter.
>>
>> How it works:
>>
>> 1. For incoming migration, we postpone TAP initialization up to
>>     pre-incoming point.
>>
>> 2. At pre-incoming point we see that "virtio-net-tap" is set for
>>     backend-transfer, so we postpone TAP initialization up to
>>     post-load
>>
>> 3. During virtio-load, we get TAP state (and fds) as part of
>>     virtio-net state
>>
>> 4. In post-load we finalize TAP initialization
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   hw/core/machine.c              |  1 +
>>   hw/net/virtio-net.c            | 75 +++++++++++++++++++++++++++++++++-
>>   include/hw/virtio/virtio-net.h |  1 +
>>   include/net/tap.h              |  2 +
>>   net/tap.c                      | 45 +++++++++++++++++++-
>>   5 files changed, 122 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>> index 681adbb7ac..a3d77f5604 100644
>> --- a/hw/core/machine.c
>> +++ b/hw/core/machine.c
>> @@ -40,6 +40,7 @@
>>   
>>   GlobalProperty hw_compat_10_1[] = {
>>       { TYPE_ACPI_GED, "x-has-hest-addr", "false" },
>> +    { TYPE_VIRTIO_NET, "backend-transfer", "false" },
>>   };
>>   const size_t hw_compat_10_1_len = G_N_ELEMENTS(hw_compat_10_1);
>>   
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 661413c72f..5f9711dee7 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -38,6 +38,7 @@
>>   #include "qapi/qapi-events-migration.h"
>>   #include "hw/virtio/virtio-access.h"
>>   #include "migration/misc.h"
>> +#include "migration/options.h"
>>   #include "standard-headers/linux/ethtool.h"
>>   #include "system/system.h"
>>   #include "system/replay.h"
>> @@ -3358,6 +3359,9 @@ struct VirtIONetMigTmp {
>>       uint16_t        curr_queue_pairs_1;
>>       uint8_t         has_ufo;
>>       uint32_t        has_vnet_hdr;
>> +
>> +    NetClientState *ncs;
>> +    uint32_t max_queue_pairs;
>>   };
>>   
>>   /* The 2nd and subsequent tx_waiting flags are loaded later than
>> @@ -3627,6 +3631,71 @@ static const VMStateDescription vhost_user_net_backend_state = {
>>       }
>>   };
>>   
>> +static bool virtio_net_is_tap_mig(void *opaque, int version_id)
>> +{
>> +    VirtIONet *n = opaque;
>> +    NetClientState *nc;
>> +
>> +    nc = qemu_get_queue(n->nic);
>> +
>> +    return migrate_backend_transfer() && n->backend_transfer && nc->peer &&
>> +        nc->peer->info->type == NET_CLIENT_DRIVER_TAP;
>> +}
>> +
>> +static int virtio_net_nic_pre_save(void *opaque)
>> +{
>> +    struct VirtIONetMigTmp *tmp = opaque;
>> +
>> +    tmp->ncs = tmp->parent->nic->ncs;
>> +    tmp->max_queue_pairs = tmp->parent->max_queue_pairs;
>> +
>> +    return 0;
>> +}
>> +
>> +static int virtio_net_nic_pre_load(void *opaque)
>> +{
>> +    /* Reuse the pointer setup from save */
>> +    virtio_net_nic_pre_save(opaque);
>> +
>> +    return 0;
>> +}
>> +
>> +static int virtio_net_nic_post_load(void *opaque, int version_id)
>> +{
>> +    struct VirtIONetMigTmp *tmp = opaque;
>> +    Error *local_err = NULL;
>> +
>> +    if (!virtio_net_update_host_features(tmp->parent, &local_err)) {
>> +        error_report_err(local_err);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_virtio_net_nic_nc = {
>> +    .name = "virtio-net-nic-nc",
>> +    .fields = (const VMStateField[]) {
>> +        VMSTATE_STRUCT_POINTER(peer, NetClientState, vmstate_tap,
>> +                               NetClientState),
>> +        VMSTATE_END_OF_LIST()
>> +   },
>> +};
>> +
>> +static const VMStateDescription vmstate_virtio_net_nic = {
>> +    .name      = "virtio-net-nic",
>> +    .pre_load  = virtio_net_nic_pre_load,
>> +    .pre_save  = virtio_net_nic_pre_save,
>> +    .post_load  = virtio_net_nic_post_load,
>> +    .fields    = (const VMStateField[]) {
>> +        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ncs, struct VirtIONetMigTmp,
>> +                                             max_queue_pairs,
>> +                                             vmstate_virtio_net_nic_nc,
>> +                                             struct NetClientState),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>>   static const VMStateDescription vmstate_virtio_net_device = {
>>       .name = "virtio-net-device",
>>       .version_id = VIRTIO_NET_VM_VERSION,
>> @@ -3658,6 +3727,9 @@ static const VMStateDescription vmstate_virtio_net_device = {
>>            * but based on the uint.
>>            */
>>           VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
>> +        VMSTATE_WITH_TMP_TEST(VirtIONet, virtio_net_is_tap_mig,
>> +                              struct VirtIONetMigTmp,
>> +                              vmstate_virtio_net_nic),
>>           VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
>>                            vmstate_virtio_net_has_vnet),
>>           VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
>> @@ -4239,7 +4311,7 @@ static bool vhost_user_blk_pre_incoming(void *opaque, Error **errp)
>>       VirtIONet *n = opaque;
>>       int i;
>>   
>> -    if (peer_wait_incoming(n)) {
>> +    if (!virtio_net_is_tap_mig(opaque, 0) && peer_wait_incoming(n)) {
>>           for (i = 0; i < n->max_queue_pairs; i++) {
>>               if (!peer_postponed_init(n, i, errp)) {
>>                   return false;
>> @@ -4389,6 +4461,7 @@ static const Property virtio_net_properties[] = {
>>                                  host_features_ex,
>>                                  VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
>>                                  false),
>> +    DEFINE_PROP_BOOL("backend-transfer", VirtIONet, backend_transfer, true),
>>   };
>>   
>>   static void virtio_net_class_init(ObjectClass *klass, const void *data)
> 
> I really don't like this approach, because it is requiring the frontend
> device to know about every different backend implementation that is able
> to do state transfer. This really violates the separation from the
> frontend and backend. The choice of specific backend should generally
> be opaque to the frontend.
> 
> This really ought to be redesigned to work in terms of an formal API
> exposed by the backend, not poking at TAP backend specific details.
> eg an API that operates on NetClientState, for which each backend
> can provide an optional implementation.

Agree, I'll try.

> 
> 
>> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
>> index 5b8ab7bda7..bf07f8a4cb 100644
>> --- a/include/hw/virtio/virtio-net.h
>> +++ b/include/hw/virtio/virtio-net.h
>> @@ -231,6 +231,7 @@ struct VirtIONet {
>>       struct EBPFRSSContext ebpf_rss;
>>       uint32_t nr_ebpf_rss_fds;
>>       char **ebpf_rss_fds;
>> +    bool backend_transfer;
>>   };
>>   
>>   size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
>> diff --git a/include/net/tap.h b/include/net/tap.h
>> index 5a926ba513..506f7ab719 100644
>> --- a/include/net/tap.h
>> +++ b/include/net/tap.h
>> @@ -36,4 +36,6 @@ int tap_get_fd(NetClientState *nc);
>>   bool tap_wait_incoming(NetClientState *nc);
>>   bool tap_postponed_init(NetClientState *nc, Error **errp);
>>   
>> +extern const VMStateDescription vmstate_tap;
>> +
>>   #endif /* QEMU_NET_TAP_H */
>> diff --git a/net/tap.c b/net/tap.c
>> index 8afbf3b407..b9c12dd64c 100644
>> --- a/net/tap.c
>> +++ b/net/tap.c
>> @@ -819,7 +819,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>>   
>>   static bool net_tap_setup(TAPState *s, int fd, int vnet_hdr, Error **errp)
>>   {
>> -    if (!net_tap_set_fd(s, fd, vnet_hdr, errp)) {
>> +    if (fd != -1 && !net_tap_set_fd(s, fd, vnet_hdr, errp)) {
>>           return false;
>>       }
>>   
>> @@ -1225,6 +1225,49 @@ int tap_disable(NetClientState *nc)
>>       }
>>   }
>>   
>> +static int tap_pre_load(void *opaque)
>> +{
>> +    TAPState *s = opaque;
>> +
>> +    if (s->fd != -1) {
>> +        error_report(
>> +            "TAP is already initialized and cannot receive incoming fd");
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int tap_post_load(void *opaque, int version_id)
>> +{
>> +    TAPState *s = opaque;
>> +    Error *local_err = NULL;
>> +
>> +    if (!net_tap_setup(s, -1, -1, &local_err)) {
>> +        error_report_err(local_err);
>> +        qemu_del_net_client(&s->nc);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +const VMStateDescription vmstate_tap = {
>> +    .name = "net-tap",
>> +    .pre_load = tap_pre_load,
>> +    .post_load = tap_post_load,
>> +    .fields = (const VMStateField[]) {
>> +        VMSTATE_FD(fd, TAPState),
>> +        VMSTATE_BOOL(using_vnet_hdr, TAPState),
>> +        VMSTATE_BOOL(has_ufo, TAPState),
>> +        VMSTATE_BOOL(has_uso, TAPState),
>> +        VMSTATE_BOOL(has_tunnel, TAPState),
>> +        VMSTATE_BOOL(enabled, TAPState),
>> +        VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>>   bool tap_wait_incoming(NetClientState *nc)
>>   {
>>       TAPState *s = DO_UPCAST(TAPState, nc, nc);
> 
> IMHO implementing state transfer in the backends ought to be separate
> commit from adding support for using that in the frontend.
> 

Will do.

Thanks for reviewing!


-- 
Best regards,
Vladimir


  reply	other threads:[~2025-10-16  9:16 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-15 13:21 [PATCH v8 00/19] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 01/19] net/tap: net_init_tap_one(): drop extra error propagation Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 02/19] net/tap: net_init_tap_one(): move parameter checking earlier Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 03/19] net/tap: rework net_tap_init() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 04/19] net/tap: pass NULL to net_init_tap_one() in cases when scripts are NULL Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 05/19] net/tap: rework scripts handling Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 06/19] net/tap: setup exit notifier only when needed Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 07/19] net/tap: split net_tap_fd_init() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 08/19] net/tap: tap_set_sndbuf(): add return value Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 09/19] net/tap: rework tap_set_sndbuf() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 10/19] net/tap: rework sndbuf handling Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 11/19] net/tap: introduce net_tap_setup() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 12/19] net/tap: move vhost fd initialization to net_tap_new() Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 13/19] net/tap: finalize net_tap_set_fd() logic Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 14/19] migration: introduce .pre_incoming() vmsd handler Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 15/19] net/tap: postpone tap setup to pre-incoming Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 16/19] qapi: introduce backend-transfer migration parameter Vladimir Sementsov-Ogievskiy
2025-10-15 18:19   ` Peter Xu
2025-10-15 19:02     ` Vladimir Sementsov-Ogievskiy
2025-10-15 20:07       ` Peter Xu
2025-10-15 21:02         ` Vladimir Sementsov-Ogievskiy
2025-10-16  8:32           ` Daniel P. Berrangé
2025-10-16  9:23             ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:38               ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:55                 ` Daniel P. Berrangé
2025-10-16 18:40               ` Peter Xu
2025-10-16 18:51                 ` Daniel P. Berrangé
2025-10-16 19:19                   ` Daniel P. Berrangé
2025-10-16 19:39                     ` Peter Xu
2025-10-16 20:00                       ` Daniel P. Berrangé
2025-10-16 19:29                   ` Peter Xu
2025-10-16 19:57                     ` Daniel P. Berrangé
2025-10-16 20:28                       ` Peter Xu
2025-10-17  6:51                         ` Vladimir Sementsov-Ogievskiy
2025-10-17 15:55                           ` Peter Xu
2025-10-17  8:10                         ` Daniel P. Berrangé
2025-10-17  8:26                           ` Vladimir Sementsov-Ogievskiy
2025-10-17  8:50                             ` Daniel P. Berrangé
2025-10-17  9:18                               ` Vladimir Sementsov-Ogievskiy
2025-10-17  8:39                           ` Vladimir Sementsov-Ogievskiy
2025-10-17 16:08                           ` Peter Xu
2025-10-16 20:26               ` Vladimir Sementsov-Ogievskiy
2025-10-16 20:30                 ` Vladimir Sementsov-Ogievskiy
2025-10-16 10:56   ` Markus Armbruster
2025-10-16 12:07     ` Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 17/19] virtio-net: support backend-transfer migration for virtio-net/tap Vladimir Sementsov-Ogievskiy
2025-10-16  8:23   ` Daniel P. Berrangé
2025-10-16  9:15     ` Vladimir Sementsov-Ogievskiy [this message]
2025-10-15 13:21 ` [PATCH v8 18/19] tests/functional: add skipWithoutSudo() decorator Vladimir Sementsov-Ogievskiy
2025-10-15 13:21 ` [PATCH v8 19/19] tests/functional: add test_x86_64_tap_migration Vladimir Sementsov-Ogievskiy
2025-10-18 15:38 ` [PATCH v8 00/19] virtio-net: live-TAP local migration Lei Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d99947db-afeb-462f-b2b5-a53e451e762d@yandex-team.ru \
    --to=vsementsov@yandex-team.ru \
    --cc=armbru@redhat.com \
    --cc=berrange@redhat.com \
    --cc=davydov-max@yandex-team.ru \
    --cc=eblake@redhat.com \
    --cc=farosas@suse.de \
    --cc=jasowang@redhat.com \
    --cc=leiyang@redhat.com \
    --cc=michael.roth@amd.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=raphael.s.norwitz@gmail.com \
    --cc=steven.sistare@oracle.com \
    --cc=sw@weilnetz.de \
    --cc=thuth@redhat.com \
    --cc=yc-core@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).