public inbox for qemu-devel@nongnu.org
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
To: Markus Armbruster <armbru@redhat.com>
Cc: jasowang@redhat.com, mst@redhat.com, eblake@redhat.com,
	farosas@suse.de, peterx@redhat.com, zhao1.liu@intel.com,
	wangyanan55@huawei.com, philmd@linaro.org,
	marcel.apfelbaum@gmail.com, eduardo@habkost.net,
	davydov-max@yandex-team.ru, qemu-devel@nongnu.org,
	yc-core@yandex-team.ru, leiyang@redhat.com,
	raphael.s.norwitz@gmail.com, bchaney@akamai.com,
	th.huth+qemu@posteo.eu, berrange@redhat.com, pbonzini@redhat.com
Subject: Re: [PATCH v13 6/8] net/tap: support local migration with virtio-net
Date: Tue, 24 Mar 2026 16:51:47 +0300	[thread overview]
Message-ID: <d3beaccb-6903-47c2-8ffe-82b3f3cdc6b7@yandex-team.ru> (raw)
In-Reply-To: <87y0jhs8z3.fsf@pond.sub.org>

On 24.03.26 15:33, Markus Armbruster wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> 
>> Support transferring of TAP state (including open fd) through
>> migration stream as part of viritio-net "local-migration".
>>
>> Add new option, incoming-fds, which should be set to true to
>> trigger new logic.
>>
>> For new option require explicitly unset script and downscript,
>> to keep possibility of implementing support for them in future.
>>
>> Note disabling read polling on source stop for TAP migration:
>> otherwise, source process may steal packages from TAP fd even
>> after source vm STOP.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   net/tap.c     | 147 +++++++++++++++++++++++++++++++++++++++++++++++---
>>   qapi/net.json |   7 ++-
>>   2 files changed, 147 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/tap.c b/net/tap.c
>> index 9d6213fc3e5..2156b6cbb73 100644
>> --- a/net/tap.c
>> +++ b/net/tap.c
>> @@ -36,6 +36,7 @@
>>   #include "net/net.h"
>>   #include "clients.h"
>>   #include "monitor/monitor.h"
>> +#include "system/runstate.h"
>>   #include "system/system.h"
>>   #include "qapi/error.h"
>>   #include "qemu/cutils.h"
>> @@ -86,6 +87,9 @@ typedef struct TAPState {
>>       VHostNetState *vhost_net;
>>       unsigned host_vnet_hdr_len;
>>       Notifier exit;
>> +
>> +    bool read_poll_detached;
>> +    VMChangeStateEntry *vmstate;
>>   } TAPState;
>>   
>>   static void launch_script(const char *setup_script, const char *ifname,
>> @@ -94,19 +98,25 @@ static void launch_script(const char *setup_script, const char *ifname,
>>   static void tap_send(void *opaque);
>>   static void tap_writable(void *opaque);
>>   
>> +static bool tap_is_explicit_no_scirpt(const char *script_arg)
> 
> "scirpt"?  Do you mean "script"?
> 
>> +{
>> +    return script_arg &&
>> +        (script_arg[0] == '\0' || strcmp(script_arg, "no") == 0);
>> +}
>> +
>>   static char *tap_parse_script(const char *script_arg, const char *default_path)
>>   {
>>       g_autofree char *res = g_strdup(script_arg);
>>   
>> -    if (!res) {
>> -        res = get_relocated_path(default_path);
>> +    if (tap_is_explicit_no_scirpt(script_arg)) {
>> +        return NULL;
>>       }
>>   
>> -    if (res[0] == '\0' || strcmp(res, "no") == 0) {
>> -        return NULL;
>> +    if (!script_arg) {
>> +        return get_relocated_path(default_path);
>>       }
>>   
>> -    return g_steal_pointer(&res);
>> +    return g_strdup(script_arg);
>>   }
>>   
>>   static void tap_update_fd_handler(TAPState *s)
>> @@ -123,6 +133,23 @@ static void tap_read_poll(TAPState *s, bool enable)
>>       tap_update_fd_handler(s);
>>   }
>>   
>> +static void tap_vm_state_change(void *opaque, bool running, RunState state)
>> +{
>> +    TAPState *s = opaque;
>> +
>> +    if (running) {
>> +        if (s->read_poll_detached) {
>> +            tap_read_poll(s, true);
>> +            s->read_poll_detached = false;
>> +        }
>> +    } else if (state == RUN_STATE_FINISH_MIGRATE) {
>> +        if (s->read_poll) {
>> +            s->read_poll_detached = true;
>> +            tap_read_poll(s, false);
>> +        }
>> +    }
>> +}
>> +
>>   static void tap_write_poll(TAPState *s, bool enable)
>>   {
>>       s->write_poll = enable;
>> @@ -353,6 +380,11 @@ static void tap_cleanup(NetClientState *nc)
>>           s->exit.notify = NULL;
>>       }
>>   
>> +    if (s->vmstate) {
>> +        qemu_del_vm_change_state_handler(s->vmstate);
>> +        s->vmstate = NULL;
>> +    }
>> +
>>       tap_read_poll(s, false);
>>       tap_write_poll(s, false);
>>       close(s->fd);
>> @@ -393,6 +425,65 @@ static VHostNetState *tap_get_vhost_net(NetClientState *nc)
>>       return s->vhost_net;
>>   }
>>   
>> +static bool tap_is_wait_incoming(NetClientState *nc)
>> +{
>> +    TAPState *s = DO_UPCAST(TAPState, nc, nc);
>> +    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
>> +    return s->fd == -1;
>> +}
>> +
>> +static int tap_pre_load(void *opaque)
>> +{
>> +    TAPState *s = opaque;
>> +
>> +    if (s->fd != -1) {
>> +        error_report(
>> +            "TAP is already initialized and cannot receive incoming fd");
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static bool tap_setup_vhost(TAPState *s, Error **errp);
>> +
>> +static int tap_post_load(void *opaque, int version_id)
>> +{
>> +    TAPState *s = opaque;
>> +    Error *local_err = NULL;
>> +
>> +    tap_read_poll(s, true);
>> +
>> +    if (s->fd < 0) {
>> +        return -1;
>> +    }
>> +
>> +    if (!tap_setup_vhost(s, &local_err)) {
>> +        error_prepend(&local_err,
>> +                      "Failed to setup vhost during TAP post-load: ");
>> +        error_report_err(local_err);
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_tap = {
>> +    .name = "net-tap",
>> +    .pre_load = tap_pre_load,
>> +    .post_load = tap_post_load,
>> +    .fields = (const VMStateField[]) {
>> +        VMSTATE_FD(fd, TAPState),
>> +        VMSTATE_BOOL(using_vnet_hdr, TAPState),
>> +        VMSTATE_BOOL(has_ufo, TAPState),
>> +        VMSTATE_BOOL(has_uso, TAPState),
>> +        VMSTATE_BOOL(has_tunnel, TAPState),
>> +        VMSTATE_BOOL(enabled, TAPState),
>> +        VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>>   /* fd support */
>>   
>>   static NetClientInfo net_tap_info = {
>> @@ -412,7 +503,9 @@ static NetClientInfo net_tap_info = {
>>       .set_vnet_le = tap_set_vnet_le,
>>       .set_vnet_be = tap_set_vnet_be,
>>       .set_steering_ebpf = tap_set_steering_ebpf,
>> +    .is_wait_incoming = tap_is_wait_incoming,
>>       .get_vhost_net = tap_get_vhost_net,
>> +    .backend_vmsd = &vmstate_tap,
>>   };
>>   
>>   static TAPState *net_tap_fd_init(NetClientState *peer,
>> @@ -748,6 +841,9 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>>       int sndbuf =
>>           (tap->has_sndbuf && tap->sndbuf) ? MIN(tap->sndbuf, INT_MAX) : INT_MAX;
>>   
>> +    s->read_poll_detached = false;
>> +    s->vmstate = qemu_add_vm_change_state_handler(tap_vm_state_change, s);
>> +
>>       if (!tap_set_sndbuf(fd, sndbuf, sndbuf_required ? errp : NULL) &&
>>           sndbuf_required) {
>>           goto failed;
>> @@ -779,6 +875,8 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>>       return true;
>>   
>>   failed:
>> +    qemu_del_vm_change_state_handler(s->vmstate);
>> +    s->vmstate = NULL;
>>       qemu_del_net_client(&s->nc);
>>       return false;
>>   }
>> @@ -910,6 +1008,26 @@ int net_init_tap(const Netdev *netdev, const char *name,
>>           return -1;
>>       }
>>   
>> +    if (tap->incoming_fds &&
>> +        (tap->fd || tap->fds || tap->helper || tap->br || tap->ifname ||
>> +         tap->has_sndbuf || tap->has_vnet_hdr)) {
>> +        error_setg(errp, "incoming-fds is incompatible with "
>> +                   "fd=, fds=, helper=, br=, ifname=, sndbuf= and vnet_hdr=");
> 
> @incoming-fds excludes certain optional members, and ...
> 
>> +        return -1;
>> +    }
>> +
>> +    if (tap->incoming_fds &&
>> +        !(tap_is_explicit_no_scirpt(tap->script) &&
>> +          tap_is_explicit_no_scirpt(tap->downscript))) {
>> +        /*
>> +         * script="" and downscript="" are silently supported to be consistent
>> +         * with cases without incoming_fds, but do not care to put this into
>> +         * error message.
>> +         */
>> +        error_setg(errp, "incoming-fds requires script=no and downscript=no");
> 
> ... requires others.  Not documented in net.json.  Should it be?

Hmm. Excluded options are not documented as well, and as many other relations
between options around TAP.

Still, I script/downscript requirements are more unobvious, so, let's add it to doc.

> 
>> +        return -1;
>> +    }
>> +
>>       queues = tap_parse_fds_and_queues(tap, &fds, errp);
>>       if (queues < 0) {
>>           return -1;
>> @@ -928,7 +1046,24 @@ int net_init_tap(const Netdev *netdev, const char *name,
>>           goto fail;
>>       }
>>   
>> -    if (fds) {
>> +    if (tap->incoming_fds) {
>> +        for (i = 0; i < queues; i++) {
>> +            NetClientState *nc;
>> +            TAPState *s;
>> +
>> +            nc = qemu_new_net_client(&net_tap_info, peer, "tap", name);
>> +            qemu_set_info_str(nc, "incoming");
>> +
>> +            s = DO_UPCAST(TAPState, nc, nc);
>> +            s->fd = -1;
>> +            if (vhost_fds) {
>> +                s->vhostfd = vhost_fds[i];
>> +                s->vhost_busyloop_timeout = tap->has_poll_us ? tap->poll_us : 0;
>> +            } else {
>> +                s->vhostfd = -1;
>> +            }
>> +        }
>> +    } else if (fds) {
>>           for (i = 0; i < queues; i++) {
>>               if (i == 0) {
>>                   vnet_hdr = tap_probe_vnet_hdr(fds[i], errp);
>> diff --git a/qapi/net.json b/qapi/net.json
>> index 118bd349651..2240de7dbf6 100644
>> --- a/qapi/net.json
>> +++ b/qapi/net.json
>> @@ -355,6 +355,10 @@
>>   # @poll-us: maximum number of microseconds that could be spent on busy
>>   #     polling for tap (since 2.7)
>>   #
>> +# @incoming-fds: do not open or create any TAP devices.  Prepare for
>> +#     getting opened TAP file descriptors from incoming migration
>> +#     stream.  (Since 11.0)
> 
> Let's scratch "opened".
> 
> Sure you're still targeting 11.0?

Will fix

> 
>> +#
>>   # Since: 1.2
>>   ##
>>   { 'struct': 'NetdevTapOptions',
>> @@ -373,7 +377,8 @@
>>       '*vhostfds':   'str',
>>       '*vhostforce': 'bool',
>>       '*queues':     'uint32',
>> -    '*poll-us':    'uint32'} }
>> +    '*poll-us':    'uint32',
>> +    '*incoming-fds': 'bool' } }
>>   
>>   ##
>>   # @NetdevSocketOptions:
> 


-- 
Best regards,
Vladimir


  reply	other threads:[~2026-03-24 13:52 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 1/8] net/tap: move vhost-net open() calls to tap_parse_vhost_fds() Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 2/8] net/tap: move vhost initialization to tap_setup_vhost() Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 3/8] qapi: add local migration parameter Vladimir Sementsov-Ogievskiy
2026-03-24 12:24   ` Markus Armbruster
2026-03-24 13:32     ` Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 4/8] net: introduce vmstate_net_peer_backend Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 5/8] virtio-net: support local migration of backend Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 6/8] net/tap: support local migration with virtio-net Vladimir Sementsov-Ogievskiy
2026-03-24 12:33   ` Markus Armbruster
2026-03-24 13:51     ` Vladimir Sementsov-Ogievskiy [this message]
2026-03-19 15:53 ` [PATCH v13 7/8] tests/functional: add skipWithoutSudo() decorator Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 8/8] tests/functional: add test_tap_migration Vladimir Sementsov-Ogievskiy
2026-03-19 16:01 ` [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2026-03-20  9:39 ` Markus Armbruster
2026-03-20 13:15   ` Vladimir Sementsov-Ogievskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d3beaccb-6903-47c2-8ffe-82b3f3cdc6b7@yandex-team.ru \
    --to=vsementsov@yandex-team.ru \
    --cc=armbru@redhat.com \
    --cc=bchaney@akamai.com \
    --cc=berrange@redhat.com \
    --cc=davydov-max@yandex-team.ru \
    --cc=eblake@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=farosas@suse.de \
    --cc=jasowang@redhat.com \
    --cc=leiyang@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=raphael.s.norwitz@gmail.com \
    --cc=th.huth+qemu@posteo.eu \
    --cc=wangyanan55@huawei.com \
    --cc=yc-core@yandex-team.ru \
    --cc=zhao1.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox