* [PATCH v13 1/8] net/tap: move vhost-net open() calls to tap_parse_vhost_fds()
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 2/8] net/tap: move vhost initialization to tap_setup_vhost() Vladimir Sementsov-Ogievskiy
` (8 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
1. Simplify code path: get vhostfds for all cases in one function.
2. Prepare for further tap-fd-migraton feature, when we'll need to
postpone vhost initialization up to post-load stage.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
net/tap.c | 39 ++++++++++++++++++++++-----------------
1 file changed, 22 insertions(+), 17 deletions(-)
diff --git a/net/tap.c b/net/tap.c
index 57ffb09885c..d941c67895e 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -736,8 +736,7 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
}
- if (tap->has_vhost ? tap->vhost :
- (vhostfd != -1) || (tap->has_vhostforce && tap->vhostforce)) {
+ if (vhostfd != -1) {
VhostNetOptions options;
options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
@@ -747,17 +746,6 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
} else {
options.busyloop_timeout = 0;
}
-
- if (vhostfd == -1) {
- vhostfd = open("/dev/vhost-net", O_RDWR);
- if (vhostfd < 0) {
- error_setg_file_open(errp, errno, "/dev/vhost-net");
- goto failed;
- }
- if (!qemu_set_blocking(vhostfd, false, errp)) {
- goto failed;
- }
- }
options.opaque = (void *)(uintptr_t)vhostfd;
options.nvqs = 2;
options.feature_bits = kernel_feature_bits;
@@ -843,14 +831,31 @@ static int tap_parse_fds_and_queues(const NetdevTapOptions *tap, int **fds,
static bool tap_parse_vhost_fds(const NetdevTapOptions *tap, int **vhost_fds,
int queues, Error **errp)
{
- if (!(tap->vhostfd || tap->vhostfds)) {
+ bool need_vhost = tap->has_vhost ? tap->vhost :
+ ((tap->vhostfd || tap->vhostfds) ||
+ (tap->has_vhostforce && tap->vhostforce));
+
+ if (!need_vhost) {
*vhost_fds = NULL;
return true;
}
- if (net_parse_fds(tap->vhostfd ?: tap->vhostfds,
- vhost_fds, queues, errp) < 0) {
- return false;
+ if (tap->vhostfd || tap->vhostfds) {
+ if (net_parse_fds(tap->vhostfd ?: tap->vhostfds,
+ vhost_fds, queues, errp) < 0) {
+ return false;
+ }
+ } else {
+ *vhost_fds = g_new(int, queues);
+ for (int i = 0; i < queues; i++) {
+ int vhostfd = open("/dev/vhost-net", O_RDWR);
+ if (vhostfd < 0) {
+ error_setg_file_open(errp, errno, "/dev/vhost-net");
+ net_free_fds(*vhost_fds, i);
+ return false;
+ }
+ (*vhost_fds)[i] = vhostfd;
+ }
}
if (!unblock_fds(*vhost_fds, queues, errp)) {
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v13 2/8] net/tap: move vhost initialization to tap_setup_vhost()
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 1/8] net/tap: move vhost-net open() calls to tap_parse_vhost_fds() Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 3/8] qapi: add local migration parameter Vladimir Sementsov-Ogievskiy
` (7 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
Make a new helper function in a way it can be reused later for
TAP fd-migration feature: we'll need to initialize vhost in a later
point when we doesn't have access to QAPI parameters.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
net/tap.c | 62 ++++++++++++++++++++++++++++++++++---------------------
1 file changed, 38 insertions(+), 24 deletions(-)
diff --git a/net/tap.c b/net/tap.c
index d941c67895e..9d6213fc3e5 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -71,6 +71,8 @@ static const int kernel_feature_bits[] = {
typedef struct TAPState {
NetClientState nc;
int fd;
+ int vhostfd;
+ uint32_t vhost_busyloop_timeout;
char down_script[1024];
char down_script_arg[128];
uint8_t buf[NET_BUFSIZE];
@@ -702,6 +704,38 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
return fd;
}
+static bool tap_setup_vhost(TAPState *s, Error **errp)
+{
+ VhostNetOptions options;
+
+ if (s->vhostfd == -1) {
+ return true;
+ }
+
+ options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
+ options.net_backend = &s->nc;
+ options.busyloop_timeout = s->vhost_busyloop_timeout;
+ options.opaque = (void *)(uintptr_t)s->vhostfd;
+ options.nvqs = 2;
+ options.feature_bits = kernel_feature_bits;
+ options.get_acked_features = NULL;
+ options.save_acked_features = NULL;
+ options.max_tx_queue_size = 0;
+ options.is_vhost_user = false;
+
+ s->vhost_net = vhost_net_init(&options);
+ if (!s->vhost_net) {
+ error_setg(errp,
+ "vhost-net requested but could not be initialized");
+ return false;
+ }
+
+ /* vhostfd ownership is passed to s->vhost_net */
+ s->vhostfd = -1;
+
+ return true;
+}
+
static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
const char *name,
const char *ifname, const char *script,
@@ -736,30 +770,10 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
}
- if (vhostfd != -1) {
- VhostNetOptions options;
-
- options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
- options.net_backend = &s->nc;
- if (tap->has_poll_us) {
- options.busyloop_timeout = tap->poll_us;
- } else {
- options.busyloop_timeout = 0;
- }
- options.opaque = (void *)(uintptr_t)vhostfd;
- options.nvqs = 2;
- options.feature_bits = kernel_feature_bits;
- options.get_acked_features = NULL;
- options.save_acked_features = NULL;
- options.max_tx_queue_size = 0;
- options.is_vhost_user = false;
-
- s->vhost_net = vhost_net_init(&options);
- if (!s->vhost_net) {
- error_setg(errp,
- "vhost-net requested but could not be initialized");
- goto failed;
- }
+ s->vhostfd = vhostfd;
+ s->vhost_busyloop_timeout = tap->has_poll_us ? tap->poll_us : 0;
+ if (!tap_setup_vhost(s, errp)) {
+ return false;
}
return true;
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v13 3/8] qapi: add local migration parameter
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 1/8] net/tap: move vhost-net open() calls to tap_parse_vhost_fds() Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 2/8] net/tap: move vhost initialization to tap_setup_vhost() Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-24 12:24 ` Markus Armbruster
2026-03-19 15:53 ` [PATCH v13 4/8] net: introduce vmstate_net_peer_backend Vladimir Sementsov-Ogievskiy
` (6 subsequent siblings)
9 siblings, 1 reply; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
We are going to implement local-migration feature: some devices will be
able to transfer open file descriptors through migration stream (which
must UNIX domain socket for that purpose). This allows to transfer the
whole backend state without reconnecting and restarting the backend
service. For example, virtio-net will migrate its attached TAP netdev,
together with its connected file descriptors.
In this commit we introduce a migration parameter, which enables
the feature for devices that support it (none at the moment).
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
include/migration/misc.h | 2 ++
migration/options.c | 18 +++++++++++++++++-
qapi/migration.json | 12 ++++++++++--
3 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 3159a5e53c3..b14dc70ea3d 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -156,4 +156,6 @@ bool multifd_device_state_save_thread_should_exit(void);
void multifd_abort_device_state_save_threads(void);
bool multifd_join_device_state_save_threads(void);
+bool migrate_local(void);
+
#endif
diff --git a/migration/options.c b/migration/options.c
index f33b2979290..d0d73169354 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "qapi/util.h"
#include "exec/target_page.h"
#include "qapi/clone-visitor.h"
#include "qapi/error.h"
@@ -24,6 +25,7 @@
#include "migration/colo.h"
#include "migration/cpr.h"
#include "migration/misc.h"
+#include "migration/options.h"
#include "migration.h"
#include "migration-stats.h"
#include "qemu-file.h"
@@ -336,6 +338,12 @@ bool migrate_mapped_ram(void)
return s->capabilities[MIGRATION_CAPABILITY_MAPPED_RAM];
}
+bool migrate_local(void)
+{
+ MigrationState *s = migrate_get_current();
+ return s->parameters.local;
+}
+
bool migrate_ignore_shared(void)
{
MigrationState *s = migrate_get_current();
@@ -1055,7 +1063,7 @@ static void migrate_mark_all_params_present(MigrationParameters *p)
&p->has_announce_step, &p->has_block_bitmap_mapping,
&p->has_x_vcpu_dirty_limit_period, &p->has_vcpu_dirty_limit,
&p->has_mode, &p->has_zero_page_detection, &p->has_direct_io,
- &p->has_cpr_exec_command,
+ &p->has_cpr_exec_command, &p->has_local,
};
len = ARRAY_SIZE(has_fields);
@@ -1394,6 +1402,10 @@ static void migrate_params_test_apply(MigrationParameters *params,
if (params->has_cpr_exec_command) {
dest->cpr_exec_command = params->cpr_exec_command;
}
+
+ if (params->has_local) {
+ dest->local = params->local;
+ }
}
static void migrate_params_apply(MigrationParameters *params)
@@ -1522,6 +1534,10 @@ static void migrate_params_apply(MigrationParameters *params)
s->parameters.cpr_exec_command =
QAPI_CLONE(strList, params->cpr_exec_command);
}
+
+ if (params->has_local) {
+ s->parameters.local = params->local;
+ }
}
void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
diff --git a/qapi/migration.json b/qapi/migration.json
index 7134d4ce47e..cf7037ed3fc 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -831,7 +831,8 @@
'mode',
'zero-page-detection',
'direct-io',
- 'cpr-exec-command'] }
+ 'cpr-exec-command',
+ 'local'] }
##
# @migrate-set-parameters:
@@ -1007,6 +1008,12 @@
# is @cpr-exec. The first list element is the program's filename,
# the remainder its arguments. (Since 10.2)
#
+# @local: Enable local migration for devices that support it. Backend
+# state and its file descriptors can then be passed to the
+# destination in the migration channel. The migration channel
+# must be a Unix domain socket. Usually needs to be enabled per
+# device. (Since 11.0)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -1046,7 +1053,8 @@
'*mode': 'MigMode',
'*zero-page-detection': 'ZeroPageDetection',
'*direct-io': 'bool',
- '*cpr-exec-command': [ 'str' ]} }
+ '*cpr-exec-command': [ 'str' ],
+ '*local': 'bool' } }
##
# @query-migrate-parameters:
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH v13 3/8] qapi: add local migration parameter
2026-03-19 15:53 ` [PATCH v13 3/8] qapi: add local migration parameter Vladimir Sementsov-Ogievskiy
@ 2026-03-24 12:24 ` Markus Armbruster
2026-03-24 13:32 ` Vladimir Sementsov-Ogievskiy
0 siblings, 1 reply; 16+ messages in thread
From: Markus Armbruster @ 2026-03-24 12:24 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: jasowang, mst, armbru, eblake, farosas, peterx, zhao1.liu,
wangyanan55, philmd, marcel.apfelbaum, eduardo, davydov-max,
qemu-devel, yc-core, leiyang, raphael.s.norwitz, bchaney,
th.huth+qemu, berrange, pbonzini
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> We are going to implement local-migration feature: some devices will be
> able to transfer open file descriptors through migration stream (which
> must UNIX domain socket for that purpose). This allows to transfer the
> whole backend state without reconnecting and restarting the backend
> service. For example, virtio-net will migrate its attached TAP netdev,
> together with its connected file descriptors.
>
> In this commit we introduce a migration parameter, which enables
> the feature for devices that support it (none at the moment).
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
[...]
> void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 7134d4ce47e..cf7037ed3fc 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -831,7 +831,8 @@
> 'mode',
> 'zero-page-detection',
> 'direct-io',
> - 'cpr-exec-command'] }
> + 'cpr-exec-command',
> + 'local'] }
>
> ##
> # @migrate-set-parameters:
> @@ -1007,6 +1008,12 @@
> # is @cpr-exec. The first list element is the program's filename,
> # the remainder its arguments. (Since 10.2)
> #
> +# @local: Enable local migration for devices that support it. Backend
> +# state and its file descriptors can then be passed to the
> +# destination in the migration channel. The migration channel
> +# must be a Unix domain socket. Usually needs to be enabled per
> +# device. (Since 11.0)
Sure your're still targeting 11.0?
> +#
> # Features:
> #
> # @unstable: Members @x-checkpoint-delay and
> @@ -1046,7 +1053,8 @@
> '*mode': 'MigMode',
> '*zero-page-detection': 'ZeroPageDetection',
> '*direct-io': 'bool',
> - '*cpr-exec-command': [ 'str' ]} }
> + '*cpr-exec-command': [ 'str' ],
> + '*local': 'bool' } }
>
> ##
> # @query-migrate-parameters:
With the appropriate Since: tag
Acked-by: Markus Armbruster <armbru@redhat.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v13 3/8] qapi: add local migration parameter
2026-03-24 12:24 ` Markus Armbruster
@ 2026-03-24 13:32 ` Vladimir Sementsov-Ogievskiy
0 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-24 13:32 UTC (permalink / raw)
To: Markus Armbruster
Cc: jasowang, mst, eblake, farosas, peterx, zhao1.liu, wangyanan55,
philmd, marcel.apfelbaum, eduardo, davydov-max, qemu-devel,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
On 24.03.26 15:24, Markus Armbruster wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>
>> We are going to implement local-migration feature: some devices will be
>> able to transfer open file descriptors through migration stream (which
>> must UNIX domain socket for that purpose). This allows to transfer the
>> whole backend state without reconnecting and restarting the backend
>> service. For example, virtio-net will migrate its attached TAP netdev,
>> together with its connected file descriptors.
>>
>> In this commit we introduce a migration parameter, which enables
>> the feature for devices that support it (none at the moment).
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>
> [...]
>
>> void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
>> diff --git a/qapi/migration.json b/qapi/migration.json
>> index 7134d4ce47e..cf7037ed3fc 100644
>> --- a/qapi/migration.json
>> +++ b/qapi/migration.json
>> @@ -831,7 +831,8 @@
>> 'mode',
>> 'zero-page-detection',
>> 'direct-io',
>> - 'cpr-exec-command'] }
>> + 'cpr-exec-command',
>> + 'local'] }
>>
>> ##
>> # @migrate-set-parameters:
>> @@ -1007,6 +1008,12 @@
>> # is @cpr-exec. The first list element is the program's filename,
>> # the remainder its arguments. (Since 10.2)
>> #
>> +# @local: Enable local migration for devices that support it. Backend
>> +# state and its file descriptors can then be passed to the
>> +# destination in the migration channel. The migration channel
>> +# must be a Unix domain socket. Usually needs to be enabled per
>> +# device. (Since 11.0)
>
> Sure your're still targeting 11.0?
Time passes quickly) Will fix to 11.1.
>
>> +#
>> # Features:
>> #
>> # @unstable: Members @x-checkpoint-delay and
>> @@ -1046,7 +1053,8 @@
>> '*mode': 'MigMode',
>> '*zero-page-detection': 'ZeroPageDetection',
>> '*direct-io': 'bool',
>> - '*cpr-exec-command': [ 'str' ]} }
>> + '*cpr-exec-command': [ 'str' ],
>> + '*local': 'bool' } }
>>
>> ##
>> # @query-migrate-parameters:
>
> With the appropriate Since: tag
> Acked-by: Markus Armbruster <armbru@redhat.com>
>
Thanks!
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v13 4/8] net: introduce vmstate_net_peer_backend
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (2 preceding siblings ...)
2026-03-19 15:53 ` [PATCH v13 3/8] qapi: add local migration parameter Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 5/8] virtio-net: support local migration of backend Vladimir Sementsov-Ogievskiy
` (5 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
To implement backend migration in virtio-net in the next commit, we need
a generic API to migrate net backend. Here is it.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
include/net/net.h | 4 ++++
net/net.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/include/net/net.h b/include/net/net.h
index 45bc86fc86b..aa34043b1ac 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -5,6 +5,7 @@
#include "qapi/qapi-types-net.h"
#include "net/queue.h"
#include "hw/core/qdev-properties-system.h"
+#include "migration/vmstate.h"
#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X"
#define MAC_ARG(x) ((uint8_t *)(x))[0], ((uint8_t *)(x))[1], \
@@ -110,6 +111,7 @@ typedef struct NetClientInfo {
SetSteeringEBPF *set_steering_ebpf;
NetCheckPeerType *check_peer_type;
GetVHostNet *get_vhost_net;
+ const VMStateDescription *backend_vmsd;
} NetClientInfo;
struct NetClientState {
@@ -354,4 +356,6 @@ static inline bool net_peer_needs_padding(NetClientState *nc)
return nc->peer && !nc->peer->do_not_pad;
}
+extern const VMStateInfo vmstate_net_peer_backend;
+
#endif
diff --git a/net/net.c b/net/net.c
index a176936f9bc..8d09754fa0d 100644
--- a/net/net.c
+++ b/net/net.c
@@ -58,6 +58,7 @@
#include "qapi/string-output-visitor.h"
#include "qapi/qobject-input-visitor.h"
#include "standard-headers/linux/virtio_net.h"
+#include "migration/vmstate.h"
/* Net bridge is currently not supported for W32. */
#if !defined(_WIN32)
@@ -2173,3 +2174,49 @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
assert(size == 0);
return 0;
}
+
+static int get_peer_backend(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field)
+{
+ NetClientState *nc = pv;
+ Error *local_err = NULL;
+ int ret;
+
+ if (!nc->peer) {
+ return -EINVAL;
+ }
+ nc = nc->peer;
+
+ ret = vmstate_load_state(f, nc->info->backend_vmsd, nc, 0, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ }
+
+ return ret;
+}
+
+static int put_peer_backend(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ NetClientState *nc = pv;
+ Error *local_err = NULL;
+ int ret;
+
+ if (!nc->peer) {
+ return -EINVAL;
+ }
+ nc = nc->peer;
+
+ ret = vmstate_save_state(f, nc->info->backend_vmsd, nc, 0, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ }
+
+ return ret;
+}
+
+const VMStateInfo vmstate_net_peer_backend = {
+ .name = "virtio-net-nic-nc-backend",
+ .get = get_peer_backend,
+ .put = put_peer_backend,
+};
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v13 5/8] virtio-net: support local migration of backend
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (3 preceding siblings ...)
2026-03-19 15:53 ` [PATCH v13 4/8] net: introduce vmstate_net_peer_backend Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 6/8] net/tap: support local migration with virtio-net Vladimir Sementsov-Ogievskiy
` (4 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
Add virtio-net option local-migration, which is true by default,
but false for older machine types, which doesn't support the feature.
When both global migration parameter "local" and new virtio-net
parameter "local-migration" are true, virtio-net transfer the whole
net backend to the destination, including open file descriptors.
Of-course, its only for local migration and the channel must be
UNIX domain socket.
This way management tool should not care about creating new TAP, and
should not handle switching to it. Migration downtime become shorter.
Support for TAP will come in the next commit.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
hw/core/machine.c | 1 +
hw/net/virtio-net.c | 137 ++++++++++++++++++++++++++++++++-
include/hw/virtio/virtio-net.h | 2 +
include/net/net.h | 2 +
4 files changed, 141 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index a14ad05b9a6..8b2e666854b 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -40,6 +40,7 @@
GlobalProperty hw_compat_10_2[] = {
{ "scsi-block", "migrate-pr", "off" },
+ { TYPE_VIRTIO_NET, "local-migration", "false" },
};
const size_t hw_compat_10_2_len = G_N_ELEMENTS(hw_compat_10_2);
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 2a5d642a647..158b9247a58 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -38,8 +38,10 @@
#include "qapi/qapi-events-migration.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
+#include "migration/options.h"
#include "standard-headers/linux/ethtool.h"
#include "system/system.h"
+#include "system/runstate.h"
#include "system/replay.h"
#include "trace.h"
#include "monitor/qdev.h"
@@ -3060,7 +3062,17 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
n->multiqueue = multiqueue;
virtio_net_change_num_queues(n, max * 2 + 1);
- virtio_net_set_queue_pairs(n);
+ /*
+ * virtio_net_set_multiqueue() called from set_features(0) on early
+ * reset, when peer may wait for incoming (and is not initialized
+ * yet).
+ * Don't worry about it: virtio_net_set_queue_pairs() will be called
+ * later form virtio_net_post_load_device(), and anyway will be
+ * noop for local incoming migration with live backend passing.
+ */
+ if (!n->peers_wait_incoming) {
+ virtio_net_set_queue_pairs(n);
+ }
}
static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n)
@@ -3089,6 +3101,17 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
+ if (n->peers_wait_incoming) {
+ /*
+ * Excessive feature set is OK for early initialization when
+ * we wait for local incoming migration: actual guest-negotiated
+ * features will come with migration stream anyway. And we are sure
+ * that we support same host-features as source, because the backend
+ * is the same (the same TAP device, for example).
+ */
+ return;
+ }
+
if (!peer_has_vnet_hdr(n)) {
virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_TSO4);
@@ -3179,6 +3202,18 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
}
}
+static bool virtio_net_update_host_features(VirtIONet *n, Error **errp)
+{
+ ERRP_GUARD();
+ VirtIODevice *vdev = VIRTIO_DEVICE(n);
+
+ peer_test_vnet_hdr(n);
+
+ virtio_net_get_features(vdev, &vdev->host_features, errp);
+
+ return !*errp;
+}
+
static int virtio_net_post_load_device(void *opaque, int version_id)
{
VirtIONet *n = opaque;
@@ -3300,6 +3335,9 @@ struct VirtIONetMigTmp {
uint16_t curr_queue_pairs_1;
uint8_t has_ufo;
uint32_t has_vnet_hdr;
+
+ NetClientState *ncs;
+ uint32_t max_queue_pairs;
};
/* The 2nd and subsequent tx_waiting flags are loaded later than
@@ -3569,6 +3607,57 @@ static const VMStateDescription vhost_user_net_backend_state = {
}
};
+static bool virtio_net_migrate_local(void *opaque, int version_id)
+{
+ VirtIONet *n = opaque;
+
+ return migrate_local() && n->local_migration;
+}
+
+static int virtio_net_nic_pre_save(void *opaque)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ tmp->ncs = tmp->parent->nic->ncs;
+ tmp->max_queue_pairs = tmp->parent->max_queue_pairs;
+
+ return 0;
+}
+
+static int virtio_net_nic_pre_load(void *opaque)
+{
+ /* Reuse the pointer setup from save */
+ virtio_net_nic_pre_save(opaque);
+
+ return 0;
+}
+
+static int virtio_net_nic_post_load(void *opaque, int version_id)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+ Error *local_err = NULL;
+
+ if (!virtio_net_update_host_features(tmp->parent, &local_err)) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_virtio_net_nic = {
+ .name = "virtio-net-nic",
+ .pre_load = virtio_net_nic_pre_load,
+ .pre_save = virtio_net_nic_pre_save,
+ .post_load = virtio_net_nic_post_load,
+ .fields = (const VMStateField[]) {
+ VMSTATE_VARRAY_UINT32(ncs, struct VirtIONetMigTmp,
+ max_queue_pairs, 0, vmstate_net_peer_backend,
+ NetClientState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_virtio_net_device = {
.name = "virtio-net-device",
.version_id = VIRTIO_NET_VM_VERSION,
@@ -3600,6 +3689,9 @@ static const VMStateDescription vmstate_virtio_net_device = {
* but based on the uint.
*/
VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
+ VMSTATE_WITH_TMP_TEST(VirtIONet, virtio_net_migrate_local,
+ struct VirtIONetMigTmp,
+ vmstate_virtio_net_nic),
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
vmstate_virtio_net_has_vnet),
VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
@@ -3864,6 +3956,42 @@ static bool failover_hide_primary_device(DeviceListener *listener,
return qatomic_read(&n->failover_primary_hidden);
}
+static bool virtio_net_check_peers_wait_incoming(VirtIONet *n, bool *waiting,
+ Error **errp)
+{
+ bool has_waiting = false;
+ bool has_not_waiting = false;
+
+ for (int i = 0; i < n->max_queue_pairs; i++) {
+ NetClientState *peer = n->nic->ncs[i].peer;
+ if (!peer) {
+ continue;
+ }
+
+ if (peer->info->is_wait_incoming &&
+ peer->info->is_wait_incoming(peer)) {
+ has_waiting = true;
+ } else {
+ has_not_waiting = true;
+ }
+
+ if (has_waiting && has_not_waiting) {
+ error_setg(errp, "Mixed peer states: some peers wait for incoming "
+ "migration while others don't");
+ return false;
+ }
+ }
+
+ if (has_waiting && !runstate_check(RUN_STATE_INMIGRATE)) {
+ error_setg(errp, "Peers wait for incoming, but it's not an incoming "
+ "migration.");
+ return false;
+ }
+
+ *waiting = has_waiting;
+ return true;
+}
+
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -4001,6 +4129,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
n->nic->ncs[i].do_not_pad = true;
}
+ if (!virtio_net_check_peers_wait_incoming(n, &n->peers_wait_incoming,
+ errp)) {
+ virtio_cleanup(vdev);
+ return;
+ }
+
peer_test_vnet_hdr(n);
if (peer_has_vnet_hdr(n)) {
n->host_hdr_len = sizeof(struct virtio_net_hdr);
@@ -4310,6 +4444,7 @@ static const Property virtio_net_properties[] = {
host_features_ex,
VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
true),
+ DEFINE_PROP_BOOL("local-migration", VirtIONet, local_migration, true),
};
static void virtio_net_class_init(ObjectClass *klass, const void *data)
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 371e3764282..0c14e314409 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -230,6 +230,8 @@ struct VirtIONet {
struct EBPFRSSContext ebpf_rss;
uint32_t nr_ebpf_rss_fds;
char **ebpf_rss_fds;
+ bool peers_wait_incoming;
+ bool local_migration;
};
size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
diff --git a/include/net/net.h b/include/net/net.h
index aa34043b1ac..d4cf399d4a8 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -82,6 +82,7 @@ typedef void (SocketReadStateFinalize)(SocketReadState *rs);
typedef void (NetAnnounce)(NetClientState *);
typedef bool (SetSteeringEBPF)(NetClientState *, int);
typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **);
+typedef bool (IsWaitIncoming)(NetClientState *);
typedef struct vhost_net *(GetVHostNet)(NetClientState *nc);
typedef struct NetClientInfo {
@@ -110,6 +111,7 @@ typedef struct NetClientInfo {
NetAnnounce *announce;
SetSteeringEBPF *set_steering_ebpf;
NetCheckPeerType *check_peer_type;
+ IsWaitIncoming *is_wait_incoming;
GetVHostNet *get_vhost_net;
const VMStateDescription *backend_vmsd;
} NetClientInfo;
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v13 6/8] net/tap: support local migration with virtio-net
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (4 preceding siblings ...)
2026-03-19 15:53 ` [PATCH v13 5/8] virtio-net: support local migration of backend Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-24 12:33 ` Markus Armbruster
2026-03-19 15:53 ` [PATCH v13 7/8] tests/functional: add skipWithoutSudo() decorator Vladimir Sementsov-Ogievskiy
` (3 subsequent siblings)
9 siblings, 1 reply; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
Support transferring of TAP state (including open fd) through
migration stream as part of viritio-net "local-migration".
Add new option, incoming-fds, which should be set to true to
trigger new logic.
For new option require explicitly unset script and downscript,
to keep possibility of implementing support for them in future.
Note disabling read polling on source stop for TAP migration:
otherwise, source process may steal packages from TAP fd even
after source vm STOP.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
net/tap.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++---
qapi/net.json | 7 ++-
2 files changed, 147 insertions(+), 7 deletions(-)
diff --git a/net/tap.c b/net/tap.c
index 9d6213fc3e5..2156b6cbb73 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -36,6 +36,7 @@
#include "net/net.h"
#include "clients.h"
#include "monitor/monitor.h"
+#include "system/runstate.h"
#include "system/system.h"
#include "qapi/error.h"
#include "qemu/cutils.h"
@@ -86,6 +87,9 @@ typedef struct TAPState {
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
Notifier exit;
+
+ bool read_poll_detached;
+ VMChangeStateEntry *vmstate;
} TAPState;
static void launch_script(const char *setup_script, const char *ifname,
@@ -94,19 +98,25 @@ static void launch_script(const char *setup_script, const char *ifname,
static void tap_send(void *opaque);
static void tap_writable(void *opaque);
+static bool tap_is_explicit_no_scirpt(const char *script_arg)
+{
+ return script_arg &&
+ (script_arg[0] == '\0' || strcmp(script_arg, "no") == 0);
+}
+
static char *tap_parse_script(const char *script_arg, const char *default_path)
{
g_autofree char *res = g_strdup(script_arg);
- if (!res) {
- res = get_relocated_path(default_path);
+ if (tap_is_explicit_no_scirpt(script_arg)) {
+ return NULL;
}
- if (res[0] == '\0' || strcmp(res, "no") == 0) {
- return NULL;
+ if (!script_arg) {
+ return get_relocated_path(default_path);
}
- return g_steal_pointer(&res);
+ return g_strdup(script_arg);
}
static void tap_update_fd_handler(TAPState *s)
@@ -123,6 +133,23 @@ static void tap_read_poll(TAPState *s, bool enable)
tap_update_fd_handler(s);
}
+static void tap_vm_state_change(void *opaque, bool running, RunState state)
+{
+ TAPState *s = opaque;
+
+ if (running) {
+ if (s->read_poll_detached) {
+ tap_read_poll(s, true);
+ s->read_poll_detached = false;
+ }
+ } else if (state == RUN_STATE_FINISH_MIGRATE) {
+ if (s->read_poll) {
+ s->read_poll_detached = true;
+ tap_read_poll(s, false);
+ }
+ }
+}
+
static void tap_write_poll(TAPState *s, bool enable)
{
s->write_poll = enable;
@@ -353,6 +380,11 @@ static void tap_cleanup(NetClientState *nc)
s->exit.notify = NULL;
}
+ if (s->vmstate) {
+ qemu_del_vm_change_state_handler(s->vmstate);
+ s->vmstate = NULL;
+ }
+
tap_read_poll(s, false);
tap_write_poll(s, false);
close(s->fd);
@@ -393,6 +425,65 @@ static VHostNetState *tap_get_vhost_net(NetClientState *nc)
return s->vhost_net;
}
+static bool tap_is_wait_incoming(NetClientState *nc)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
+ return s->fd == -1;
+}
+
+static int tap_pre_load(void *opaque)
+{
+ TAPState *s = opaque;
+
+ if (s->fd != -1) {
+ error_report(
+ "TAP is already initialized and cannot receive incoming fd");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool tap_setup_vhost(TAPState *s, Error **errp);
+
+static int tap_post_load(void *opaque, int version_id)
+{
+ TAPState *s = opaque;
+ Error *local_err = NULL;
+
+ tap_read_poll(s, true);
+
+ if (s->fd < 0) {
+ return -1;
+ }
+
+ if (!tap_setup_vhost(s, &local_err)) {
+ error_prepend(&local_err,
+ "Failed to setup vhost during TAP post-load: ");
+ error_report_err(local_err);
+ return -1;
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_tap = {
+ .name = "net-tap",
+ .pre_load = tap_pre_load,
+ .post_load = tap_post_load,
+ .fields = (const VMStateField[]) {
+ VMSTATE_FD(fd, TAPState),
+ VMSTATE_BOOL(using_vnet_hdr, TAPState),
+ VMSTATE_BOOL(has_ufo, TAPState),
+ VMSTATE_BOOL(has_uso, TAPState),
+ VMSTATE_BOOL(has_tunnel, TAPState),
+ VMSTATE_BOOL(enabled, TAPState),
+ VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
/* fd support */
static NetClientInfo net_tap_info = {
@@ -412,7 +503,9 @@ static NetClientInfo net_tap_info = {
.set_vnet_le = tap_set_vnet_le,
.set_vnet_be = tap_set_vnet_be,
.set_steering_ebpf = tap_set_steering_ebpf,
+ .is_wait_incoming = tap_is_wait_incoming,
.get_vhost_net = tap_get_vhost_net,
+ .backend_vmsd = &vmstate_tap,
};
static TAPState *net_tap_fd_init(NetClientState *peer,
@@ -748,6 +841,9 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
int sndbuf =
(tap->has_sndbuf && tap->sndbuf) ? MIN(tap->sndbuf, INT_MAX) : INT_MAX;
+ s->read_poll_detached = false;
+ s->vmstate = qemu_add_vm_change_state_handler(tap_vm_state_change, s);
+
if (!tap_set_sndbuf(fd, sndbuf, sndbuf_required ? errp : NULL) &&
sndbuf_required) {
goto failed;
@@ -779,6 +875,8 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
return true;
failed:
+ qemu_del_vm_change_state_handler(s->vmstate);
+ s->vmstate = NULL;
qemu_del_net_client(&s->nc);
return false;
}
@@ -910,6 +1008,26 @@ int net_init_tap(const Netdev *netdev, const char *name,
return -1;
}
+ if (tap->incoming_fds &&
+ (tap->fd || tap->fds || tap->helper || tap->br || tap->ifname ||
+ tap->has_sndbuf || tap->has_vnet_hdr)) {
+ error_setg(errp, "incoming-fds is incompatible with "
+ "fd=, fds=, helper=, br=, ifname=, sndbuf= and vnet_hdr=");
+ return -1;
+ }
+
+ if (tap->incoming_fds &&
+ !(tap_is_explicit_no_scirpt(tap->script) &&
+ tap_is_explicit_no_scirpt(tap->downscript))) {
+ /*
+ * script="" and downscript="" are silently supported to be consistent
+ * with cases without incoming_fds, but do not care to put this into
+ * error message.
+ */
+ error_setg(errp, "incoming-fds requires script=no and downscript=no");
+ return -1;
+ }
+
queues = tap_parse_fds_and_queues(tap, &fds, errp);
if (queues < 0) {
return -1;
@@ -928,7 +1046,24 @@ int net_init_tap(const Netdev *netdev, const char *name,
goto fail;
}
- if (fds) {
+ if (tap->incoming_fds) {
+ for (i = 0; i < queues; i++) {
+ NetClientState *nc;
+ TAPState *s;
+
+ nc = qemu_new_net_client(&net_tap_info, peer, "tap", name);
+ qemu_set_info_str(nc, "incoming");
+
+ s = DO_UPCAST(TAPState, nc, nc);
+ s->fd = -1;
+ if (vhost_fds) {
+ s->vhostfd = vhost_fds[i];
+ s->vhost_busyloop_timeout = tap->has_poll_us ? tap->poll_us : 0;
+ } else {
+ s->vhostfd = -1;
+ }
+ }
+ } else if (fds) {
for (i = 0; i < queues; i++) {
if (i == 0) {
vnet_hdr = tap_probe_vnet_hdr(fds[i], errp);
diff --git a/qapi/net.json b/qapi/net.json
index 118bd349651..2240de7dbf6 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -355,6 +355,10 @@
# @poll-us: maximum number of microseconds that could be spent on busy
# polling for tap (since 2.7)
#
+# @incoming-fds: do not open or create any TAP devices. Prepare for
+# getting opened TAP file descriptors from incoming migration
+# stream. (Since 11.0)
+#
# Since: 1.2
##
{ 'struct': 'NetdevTapOptions',
@@ -373,7 +377,8 @@
'*vhostfds': 'str',
'*vhostforce': 'bool',
'*queues': 'uint32',
- '*poll-us': 'uint32'} }
+ '*poll-us': 'uint32',
+ '*incoming-fds': 'bool' } }
##
# @NetdevSocketOptions:
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH v13 6/8] net/tap: support local migration with virtio-net
2026-03-19 15:53 ` [PATCH v13 6/8] net/tap: support local migration with virtio-net Vladimir Sementsov-Ogievskiy
@ 2026-03-24 12:33 ` Markus Armbruster
2026-03-24 13:51 ` Vladimir Sementsov-Ogievskiy
0 siblings, 1 reply; 16+ messages in thread
From: Markus Armbruster @ 2026-03-24 12:33 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: jasowang, mst, eblake, farosas, peterx, zhao1.liu, wangyanan55,
philmd, marcel.apfelbaum, eduardo, davydov-max, qemu-devel,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> Support transferring of TAP state (including open fd) through
> migration stream as part of viritio-net "local-migration".
>
> Add new option, incoming-fds, which should be set to true to
> trigger new logic.
>
> For new option require explicitly unset script and downscript,
> to keep possibility of implementing support for them in future.
>
> Note disabling read polling on source stop for TAP migration:
> otherwise, source process may steal packages from TAP fd even
> after source vm STOP.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
> net/tap.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++---
> qapi/net.json | 7 ++-
> 2 files changed, 147 insertions(+), 7 deletions(-)
>
> diff --git a/net/tap.c b/net/tap.c
> index 9d6213fc3e5..2156b6cbb73 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -36,6 +36,7 @@
> #include "net/net.h"
> #include "clients.h"
> #include "monitor/monitor.h"
> +#include "system/runstate.h"
> #include "system/system.h"
> #include "qapi/error.h"
> #include "qemu/cutils.h"
> @@ -86,6 +87,9 @@ typedef struct TAPState {
> VHostNetState *vhost_net;
> unsigned host_vnet_hdr_len;
> Notifier exit;
> +
> + bool read_poll_detached;
> + VMChangeStateEntry *vmstate;
> } TAPState;
>
> static void launch_script(const char *setup_script, const char *ifname,
> @@ -94,19 +98,25 @@ static void launch_script(const char *setup_script, const char *ifname,
> static void tap_send(void *opaque);
> static void tap_writable(void *opaque);
>
> +static bool tap_is_explicit_no_scirpt(const char *script_arg)
"scirpt"? Do you mean "script"?
> +{
> + return script_arg &&
> + (script_arg[0] == '\0' || strcmp(script_arg, "no") == 0);
> +}
> +
> static char *tap_parse_script(const char *script_arg, const char *default_path)
> {
> g_autofree char *res = g_strdup(script_arg);
>
> - if (!res) {
> - res = get_relocated_path(default_path);
> + if (tap_is_explicit_no_scirpt(script_arg)) {
> + return NULL;
> }
>
> - if (res[0] == '\0' || strcmp(res, "no") == 0) {
> - return NULL;
> + if (!script_arg) {
> + return get_relocated_path(default_path);
> }
>
> - return g_steal_pointer(&res);
> + return g_strdup(script_arg);
> }
>
> static void tap_update_fd_handler(TAPState *s)
> @@ -123,6 +133,23 @@ static void tap_read_poll(TAPState *s, bool enable)
> tap_update_fd_handler(s);
> }
>
> +static void tap_vm_state_change(void *opaque, bool running, RunState state)
> +{
> + TAPState *s = opaque;
> +
> + if (running) {
> + if (s->read_poll_detached) {
> + tap_read_poll(s, true);
> + s->read_poll_detached = false;
> + }
> + } else if (state == RUN_STATE_FINISH_MIGRATE) {
> + if (s->read_poll) {
> + s->read_poll_detached = true;
> + tap_read_poll(s, false);
> + }
> + }
> +}
> +
> static void tap_write_poll(TAPState *s, bool enable)
> {
> s->write_poll = enable;
> @@ -353,6 +380,11 @@ static void tap_cleanup(NetClientState *nc)
> s->exit.notify = NULL;
> }
>
> + if (s->vmstate) {
> + qemu_del_vm_change_state_handler(s->vmstate);
> + s->vmstate = NULL;
> + }
> +
> tap_read_poll(s, false);
> tap_write_poll(s, false);
> close(s->fd);
> @@ -393,6 +425,65 @@ static VHostNetState *tap_get_vhost_net(NetClientState *nc)
> return s->vhost_net;
> }
>
> +static bool tap_is_wait_incoming(NetClientState *nc)
> +{
> + TAPState *s = DO_UPCAST(TAPState, nc, nc);
> + assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
> + return s->fd == -1;
> +}
> +
> +static int tap_pre_load(void *opaque)
> +{
> + TAPState *s = opaque;
> +
> + if (s->fd != -1) {
> + error_report(
> + "TAP is already initialized and cannot receive incoming fd");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static bool tap_setup_vhost(TAPState *s, Error **errp);
> +
> +static int tap_post_load(void *opaque, int version_id)
> +{
> + TAPState *s = opaque;
> + Error *local_err = NULL;
> +
> + tap_read_poll(s, true);
> +
> + if (s->fd < 0) {
> + return -1;
> + }
> +
> + if (!tap_setup_vhost(s, &local_err)) {
> + error_prepend(&local_err,
> + "Failed to setup vhost during TAP post-load: ");
> + error_report_err(local_err);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static const VMStateDescription vmstate_tap = {
> + .name = "net-tap",
> + .pre_load = tap_pre_load,
> + .post_load = tap_post_load,
> + .fields = (const VMStateField[]) {
> + VMSTATE_FD(fd, TAPState),
> + VMSTATE_BOOL(using_vnet_hdr, TAPState),
> + VMSTATE_BOOL(has_ufo, TAPState),
> + VMSTATE_BOOL(has_uso, TAPState),
> + VMSTATE_BOOL(has_tunnel, TAPState),
> + VMSTATE_BOOL(enabled, TAPState),
> + VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> /* fd support */
>
> static NetClientInfo net_tap_info = {
> @@ -412,7 +503,9 @@ static NetClientInfo net_tap_info = {
> .set_vnet_le = tap_set_vnet_le,
> .set_vnet_be = tap_set_vnet_be,
> .set_steering_ebpf = tap_set_steering_ebpf,
> + .is_wait_incoming = tap_is_wait_incoming,
> .get_vhost_net = tap_get_vhost_net,
> + .backend_vmsd = &vmstate_tap,
> };
>
> static TAPState *net_tap_fd_init(NetClientState *peer,
> @@ -748,6 +841,9 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
> int sndbuf =
> (tap->has_sndbuf && tap->sndbuf) ? MIN(tap->sndbuf, INT_MAX) : INT_MAX;
>
> + s->read_poll_detached = false;
> + s->vmstate = qemu_add_vm_change_state_handler(tap_vm_state_change, s);
> +
> if (!tap_set_sndbuf(fd, sndbuf, sndbuf_required ? errp : NULL) &&
> sndbuf_required) {
> goto failed;
> @@ -779,6 +875,8 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
> return true;
>
> failed:
> + qemu_del_vm_change_state_handler(s->vmstate);
> + s->vmstate = NULL;
> qemu_del_net_client(&s->nc);
> return false;
> }
> @@ -910,6 +1008,26 @@ int net_init_tap(const Netdev *netdev, const char *name,
> return -1;
> }
>
> + if (tap->incoming_fds &&
> + (tap->fd || tap->fds || tap->helper || tap->br || tap->ifname ||
> + tap->has_sndbuf || tap->has_vnet_hdr)) {
> + error_setg(errp, "incoming-fds is incompatible with "
> + "fd=, fds=, helper=, br=, ifname=, sndbuf= and vnet_hdr=");
@incoming-fds excludes certain optional members, and ...
> + return -1;
> + }
> +
> + if (tap->incoming_fds &&
> + !(tap_is_explicit_no_scirpt(tap->script) &&
> + tap_is_explicit_no_scirpt(tap->downscript))) {
> + /*
> + * script="" and downscript="" are silently supported to be consistent
> + * with cases without incoming_fds, but do not care to put this into
> + * error message.
> + */
> + error_setg(errp, "incoming-fds requires script=no and downscript=no");
... requires others. Not documented in net.json. Should it be?
> + return -1;
> + }
> +
> queues = tap_parse_fds_and_queues(tap, &fds, errp);
> if (queues < 0) {
> return -1;
> @@ -928,7 +1046,24 @@ int net_init_tap(const Netdev *netdev, const char *name,
> goto fail;
> }
>
> - if (fds) {
> + if (tap->incoming_fds) {
> + for (i = 0; i < queues; i++) {
> + NetClientState *nc;
> + TAPState *s;
> +
> + nc = qemu_new_net_client(&net_tap_info, peer, "tap", name);
> + qemu_set_info_str(nc, "incoming");
> +
> + s = DO_UPCAST(TAPState, nc, nc);
> + s->fd = -1;
> + if (vhost_fds) {
> + s->vhostfd = vhost_fds[i];
> + s->vhost_busyloop_timeout = tap->has_poll_us ? tap->poll_us : 0;
> + } else {
> + s->vhostfd = -1;
> + }
> + }
> + } else if (fds) {
> for (i = 0; i < queues; i++) {
> if (i == 0) {
> vnet_hdr = tap_probe_vnet_hdr(fds[i], errp);
> diff --git a/qapi/net.json b/qapi/net.json
> index 118bd349651..2240de7dbf6 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -355,6 +355,10 @@
> # @poll-us: maximum number of microseconds that could be spent on busy
> # polling for tap (since 2.7)
> #
> +# @incoming-fds: do not open or create any TAP devices. Prepare for
> +# getting opened TAP file descriptors from incoming migration
> +# stream. (Since 11.0)
Let's scratch "opened".
Sure you're still targeting 11.0?
> +#
> # Since: 1.2
> ##
> { 'struct': 'NetdevTapOptions',
> @@ -373,7 +377,8 @@
> '*vhostfds': 'str',
> '*vhostforce': 'bool',
> '*queues': 'uint32',
> - '*poll-us': 'uint32'} }
> + '*poll-us': 'uint32',
> + '*incoming-fds': 'bool' } }
>
> ##
> # @NetdevSocketOptions:
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v13 6/8] net/tap: support local migration with virtio-net
2026-03-24 12:33 ` Markus Armbruster
@ 2026-03-24 13:51 ` Vladimir Sementsov-Ogievskiy
0 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-24 13:51 UTC (permalink / raw)
To: Markus Armbruster
Cc: jasowang, mst, eblake, farosas, peterx, zhao1.liu, wangyanan55,
philmd, marcel.apfelbaum, eduardo, davydov-max, qemu-devel,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
On 24.03.26 15:33, Markus Armbruster wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>
>> Support transferring of TAP state (including open fd) through
>> migration stream as part of viritio-net "local-migration".
>>
>> Add new option, incoming-fds, which should be set to true to
>> trigger new logic.
>>
>> For new option require explicitly unset script and downscript,
>> to keep possibility of implementing support for them in future.
>>
>> Note disabling read polling on source stop for TAP migration:
>> otherwise, source process may steal packages from TAP fd even
>> after source vm STOP.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>> net/tap.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++---
>> qapi/net.json | 7 ++-
>> 2 files changed, 147 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/tap.c b/net/tap.c
>> index 9d6213fc3e5..2156b6cbb73 100644
>> --- a/net/tap.c
>> +++ b/net/tap.c
>> @@ -36,6 +36,7 @@
>> #include "net/net.h"
>> #include "clients.h"
>> #include "monitor/monitor.h"
>> +#include "system/runstate.h"
>> #include "system/system.h"
>> #include "qapi/error.h"
>> #include "qemu/cutils.h"
>> @@ -86,6 +87,9 @@ typedef struct TAPState {
>> VHostNetState *vhost_net;
>> unsigned host_vnet_hdr_len;
>> Notifier exit;
>> +
>> + bool read_poll_detached;
>> + VMChangeStateEntry *vmstate;
>> } TAPState;
>>
>> static void launch_script(const char *setup_script, const char *ifname,
>> @@ -94,19 +98,25 @@ static void launch_script(const char *setup_script, const char *ifname,
>> static void tap_send(void *opaque);
>> static void tap_writable(void *opaque);
>>
>> +static bool tap_is_explicit_no_scirpt(const char *script_arg)
>
> "scirpt"? Do you mean "script"?
>
>> +{
>> + return script_arg &&
>> + (script_arg[0] == '\0' || strcmp(script_arg, "no") == 0);
>> +}
>> +
>> static char *tap_parse_script(const char *script_arg, const char *default_path)
>> {
>> g_autofree char *res = g_strdup(script_arg);
>>
>> - if (!res) {
>> - res = get_relocated_path(default_path);
>> + if (tap_is_explicit_no_scirpt(script_arg)) {
>> + return NULL;
>> }
>>
>> - if (res[0] == '\0' || strcmp(res, "no") == 0) {
>> - return NULL;
>> + if (!script_arg) {
>> + return get_relocated_path(default_path);
>> }
>>
>> - return g_steal_pointer(&res);
>> + return g_strdup(script_arg);
>> }
>>
>> static void tap_update_fd_handler(TAPState *s)
>> @@ -123,6 +133,23 @@ static void tap_read_poll(TAPState *s, bool enable)
>> tap_update_fd_handler(s);
>> }
>>
>> +static void tap_vm_state_change(void *opaque, bool running, RunState state)
>> +{
>> + TAPState *s = opaque;
>> +
>> + if (running) {
>> + if (s->read_poll_detached) {
>> + tap_read_poll(s, true);
>> + s->read_poll_detached = false;
>> + }
>> + } else if (state == RUN_STATE_FINISH_MIGRATE) {
>> + if (s->read_poll) {
>> + s->read_poll_detached = true;
>> + tap_read_poll(s, false);
>> + }
>> + }
>> +}
>> +
>> static void tap_write_poll(TAPState *s, bool enable)
>> {
>> s->write_poll = enable;
>> @@ -353,6 +380,11 @@ static void tap_cleanup(NetClientState *nc)
>> s->exit.notify = NULL;
>> }
>>
>> + if (s->vmstate) {
>> + qemu_del_vm_change_state_handler(s->vmstate);
>> + s->vmstate = NULL;
>> + }
>> +
>> tap_read_poll(s, false);
>> tap_write_poll(s, false);
>> close(s->fd);
>> @@ -393,6 +425,65 @@ static VHostNetState *tap_get_vhost_net(NetClientState *nc)
>> return s->vhost_net;
>> }
>>
>> +static bool tap_is_wait_incoming(NetClientState *nc)
>> +{
>> + TAPState *s = DO_UPCAST(TAPState, nc, nc);
>> + assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
>> + return s->fd == -1;
>> +}
>> +
>> +static int tap_pre_load(void *opaque)
>> +{
>> + TAPState *s = opaque;
>> +
>> + if (s->fd != -1) {
>> + error_report(
>> + "TAP is already initialized and cannot receive incoming fd");
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static bool tap_setup_vhost(TAPState *s, Error **errp);
>> +
>> +static int tap_post_load(void *opaque, int version_id)
>> +{
>> + TAPState *s = opaque;
>> + Error *local_err = NULL;
>> +
>> + tap_read_poll(s, true);
>> +
>> + if (s->fd < 0) {
>> + return -1;
>> + }
>> +
>> + if (!tap_setup_vhost(s, &local_err)) {
>> + error_prepend(&local_err,
>> + "Failed to setup vhost during TAP post-load: ");
>> + error_report_err(local_err);
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_tap = {
>> + .name = "net-tap",
>> + .pre_load = tap_pre_load,
>> + .post_load = tap_post_load,
>> + .fields = (const VMStateField[]) {
>> + VMSTATE_FD(fd, TAPState),
>> + VMSTATE_BOOL(using_vnet_hdr, TAPState),
>> + VMSTATE_BOOL(has_ufo, TAPState),
>> + VMSTATE_BOOL(has_uso, TAPState),
>> + VMSTATE_BOOL(has_tunnel, TAPState),
>> + VMSTATE_BOOL(enabled, TAPState),
>> + VMSTATE_UINT32(host_vnet_hdr_len, TAPState),
>> + VMSTATE_END_OF_LIST()
>> + }
>> +};
>> +
>> /* fd support */
>>
>> static NetClientInfo net_tap_info = {
>> @@ -412,7 +503,9 @@ static NetClientInfo net_tap_info = {
>> .set_vnet_le = tap_set_vnet_le,
>> .set_vnet_be = tap_set_vnet_be,
>> .set_steering_ebpf = tap_set_steering_ebpf,
>> + .is_wait_incoming = tap_is_wait_incoming,
>> .get_vhost_net = tap_get_vhost_net,
>> + .backend_vmsd = &vmstate_tap,
>> };
>>
>> static TAPState *net_tap_fd_init(NetClientState *peer,
>> @@ -748,6 +841,9 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>> int sndbuf =
>> (tap->has_sndbuf && tap->sndbuf) ? MIN(tap->sndbuf, INT_MAX) : INT_MAX;
>>
>> + s->read_poll_detached = false;
>> + s->vmstate = qemu_add_vm_change_state_handler(tap_vm_state_change, s);
>> +
>> if (!tap_set_sndbuf(fd, sndbuf, sndbuf_required ? errp : NULL) &&
>> sndbuf_required) {
>> goto failed;
>> @@ -779,6 +875,8 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
>> return true;
>>
>> failed:
>> + qemu_del_vm_change_state_handler(s->vmstate);
>> + s->vmstate = NULL;
>> qemu_del_net_client(&s->nc);
>> return false;
>> }
>> @@ -910,6 +1008,26 @@ int net_init_tap(const Netdev *netdev, const char *name,
>> return -1;
>> }
>>
>> + if (tap->incoming_fds &&
>> + (tap->fd || tap->fds || tap->helper || tap->br || tap->ifname ||
>> + tap->has_sndbuf || tap->has_vnet_hdr)) {
>> + error_setg(errp, "incoming-fds is incompatible with "
>> + "fd=, fds=, helper=, br=, ifname=, sndbuf= and vnet_hdr=");
>
> @incoming-fds excludes certain optional members, and ...
>
>> + return -1;
>> + }
>> +
>> + if (tap->incoming_fds &&
>> + !(tap_is_explicit_no_scirpt(tap->script) &&
>> + tap_is_explicit_no_scirpt(tap->downscript))) {
>> + /*
>> + * script="" and downscript="" are silently supported to be consistent
>> + * with cases without incoming_fds, but do not care to put this into
>> + * error message.
>> + */
>> + error_setg(errp, "incoming-fds requires script=no and downscript=no");
>
> ... requires others. Not documented in net.json. Should it be?
Hmm. Excluded options are not documented as well, and as many other relations
between options around TAP.
Still, I script/downscript requirements are more unobvious, so, let's add it to doc.
>
>> + return -1;
>> + }
>> +
>> queues = tap_parse_fds_and_queues(tap, &fds, errp);
>> if (queues < 0) {
>> return -1;
>> @@ -928,7 +1046,24 @@ int net_init_tap(const Netdev *netdev, const char *name,
>> goto fail;
>> }
>>
>> - if (fds) {
>> + if (tap->incoming_fds) {
>> + for (i = 0; i < queues; i++) {
>> + NetClientState *nc;
>> + TAPState *s;
>> +
>> + nc = qemu_new_net_client(&net_tap_info, peer, "tap", name);
>> + qemu_set_info_str(nc, "incoming");
>> +
>> + s = DO_UPCAST(TAPState, nc, nc);
>> + s->fd = -1;
>> + if (vhost_fds) {
>> + s->vhostfd = vhost_fds[i];
>> + s->vhost_busyloop_timeout = tap->has_poll_us ? tap->poll_us : 0;
>> + } else {
>> + s->vhostfd = -1;
>> + }
>> + }
>> + } else if (fds) {
>> for (i = 0; i < queues; i++) {
>> if (i == 0) {
>> vnet_hdr = tap_probe_vnet_hdr(fds[i], errp);
>> diff --git a/qapi/net.json b/qapi/net.json
>> index 118bd349651..2240de7dbf6 100644
>> --- a/qapi/net.json
>> +++ b/qapi/net.json
>> @@ -355,6 +355,10 @@
>> # @poll-us: maximum number of microseconds that could be spent on busy
>> # polling for tap (since 2.7)
>> #
>> +# @incoming-fds: do not open or create any TAP devices. Prepare for
>> +# getting opened TAP file descriptors from incoming migration
>> +# stream. (Since 11.0)
>
> Let's scratch "opened".
>
> Sure you're still targeting 11.0?
Will fix
>
>> +#
>> # Since: 1.2
>> ##
>> { 'struct': 'NetdevTapOptions',
>> @@ -373,7 +377,8 @@
>> '*vhostfds': 'str',
>> '*vhostforce': 'bool',
>> '*queues': 'uint32',
>> - '*poll-us': 'uint32'} }
>> + '*poll-us': 'uint32',
>> + '*incoming-fds': 'bool' } }
>>
>> ##
>> # @NetdevSocketOptions:
>
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v13 7/8] tests/functional: add skipWithoutSudo() decorator
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (5 preceding siblings ...)
2026-03-19 15:53 ` [PATCH v13 6/8] net/tap: support local migration with virtio-net Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-19 15:53 ` [PATCH v13 8/8] tests/functional: add test_tap_migration Vladimir Sementsov-Ogievskiy
` (2 subsequent siblings)
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini, Thomas Huth
To be used in the next commit: that would be a test for TAP
networking, and it will need to setup TAP device.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Maksim Davydov <davydov-max@yandex-team.ru>
---
tests/functional/qemu_test/decorators.py | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/tests/functional/qemu_test/decorators.py b/tests/functional/qemu_test/decorators.py
index fcf236ecfdf..aa135acc785 100644
--- a/tests/functional/qemu_test/decorators.py
+++ b/tests/functional/qemu_test/decorators.py
@@ -6,6 +6,7 @@
import os
import platform
import resource
+import subprocess
from unittest import skipIf, skipUnless
from .cmd import which
@@ -177,3 +178,18 @@ def skipLockedMemoryTest(locked_memory):
ulimit_memory == resource.RLIM_INFINITY or ulimit_memory >= locked_memory * 1024,
f'Test required {locked_memory} kB of available locked memory',
)
+
+'''
+Decorator to skip execution of a test if passwordless
+sudo command is not available.
+'''
+def skipWithoutSudo():
+ proc = subprocess.run(["sudo", "-n", "/bin/true"],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ universal_newlines=True,
+ check=False)
+
+ return skipUnless(proc.returncode == 0,
+ f'requires password-less sudo access: {proc.stdout}')
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v13 8/8] tests/functional: add test_tap_migration
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (6 preceding siblings ...)
2026-03-19 15:53 ` [PATCH v13 7/8] tests/functional: add skipWithoutSudo() decorator Vladimir Sementsov-Ogievskiy
@ 2026-03-19 15:53 ` Vladimir Sementsov-Ogievskiy
2026-03-19 16:01 ` [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
2026-03-20 9:39 ` Markus Armbruster
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 15:53 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, vsementsov,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
Add test for a new local-migration migration of virtio-net/tap, with fd
passing through UNIX socket.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
tests/functional/x86_64/meson.build | 1 +
tests/functional/x86_64/test_tap_migration.py | 456 ++++++++++++++++++
2 files changed, 457 insertions(+)
create mode 100755 tests/functional/x86_64/test_tap_migration.py
diff --git a/tests/functional/x86_64/meson.build b/tests/functional/x86_64/meson.build
index 1ed10ad6c29..c4bb14f034b 100644
--- a/tests/functional/x86_64/meson.build
+++ b/tests/functional/x86_64/meson.build
@@ -45,4 +45,5 @@ tests_x86_64_system_thorough = [
'virtio_balloon',
'virtio_gpu',
'rebuild_vmfd',
+ 'tap_migration',
]
diff --git a/tests/functional/x86_64/test_tap_migration.py b/tests/functional/x86_64/test_tap_migration.py
new file mode 100755
index 00000000000..6c4edd4c787
--- /dev/null
+++ b/tests/functional/x86_64/test_tap_migration.py
@@ -0,0 +1,456 @@
+#!/usr/bin/env python3
+#
+# Functional test that tests TAP local migration
+# with fd passing
+#
+# Copyright (c) Yandex Technologies LLC, 2026
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os
+import time
+import subprocess
+from subprocess import run
+import signal
+import ctypes
+import ctypes.util
+import unittest
+from contextlib import contextmanager, ExitStack
+from typing import Tuple
+
+from qemu_test import (
+ LinuxKernelTest,
+ Asset,
+ exec_command_and_wait_for_pattern,
+)
+from qemu_test.decorators import skipWithoutSudo
+
+
+GUEST_IP = "192.168.100.2"
+GUEST_IP_MASK = f"{GUEST_IP}/24"
+GUEST_MAC = "d6:0d:75:f8:0f:b7"
+HOST_IP = "192.168.100.1"
+HOST_IP_MASK = f"{HOST_IP}/24"
+TAP_ID = "tap0"
+TAP_ID2 = "tap1"
+TAP_MAC = "e6:1d:44:b5:03:5d"
+NETNS = f"qemu_test_ns_{os.getpid()}"
+
+
+def ip(args, check=True) -> None:
+ """Run ip command with sudo"""
+ run(["sudo", "ip"] + args, check=check)
+
+
+@contextmanager
+def switch_netns(netns_name):
+ libc = ctypes.CDLL(ctypes.util.find_library("c"))
+ netns_path = f"/var/run/netns/{netns_name}"
+
+ def switch_to_fd(fd, check: bool = False):
+ """Switch to netns by file descriptor"""
+ SYS_setns = 308
+ CLONE_NEWNET = 0x40000000
+ ret = libc.syscall(SYS_setns, fd, CLONE_NEWNET)
+ if check and ret != 0:
+ raise RuntimeError("syscall SETNS failed")
+
+ with ExitStack() as stack:
+ original_netns_fd = os.open("/proc/self/ns/net", os.O_RDONLY)
+ stack.callback(os.close, original_netns_fd)
+
+ ip(["netns", "add", netns_name])
+ stack.callback(ip, ["netns", "del", netns_name], check=False)
+
+ new_netns_fd = os.open(netns_path, os.O_RDONLY)
+ stack.callback(os.close, new_netns_fd)
+
+ switch_to_fd(new_netns_fd)
+ stack.callback(switch_to_fd, original_netns_fd, check=False)
+
+ yield
+
+
+def del_tap(tap_name: str = TAP_ID) -> None:
+ ip(["tuntap", "del", tap_name, "mode", "tap", "multi_queue"], check=False)
+
+
+def init_tap(tap_name: str = TAP_ID, with_ip: bool = True) -> None:
+ ip(["tuntap", "add", "dev", tap_name, "mode", "tap", "multi_queue"])
+ if with_ip:
+ ip(["link", "set", "dev", tap_name, "address", TAP_MAC])
+ ip(["addr", "add", HOST_IP_MASK, "dev", tap_name])
+ ip(["link", "set", tap_name, "up"])
+
+
+def switch_network_to_tap2() -> None:
+ ip(["link", "set", TAP_ID2, "down"])
+ ip(["link", "set", TAP_ID, "down"])
+ ip(["addr", "delete", HOST_IP_MASK, "dev", TAP_ID])
+ ip(["link", "set", "dev", TAP_ID2, "address", TAP_MAC])
+ ip(["addr", "add", HOST_IP_MASK, "dev", TAP_ID2])
+ ip(["link", "set", TAP_ID2, "up"])
+
+
+def parse_ping_line(line: str) -> float:
+ # suspect lines like
+ # [1748524876.590509] 64 bytes from 94.245.155.3 \
+ # (94.245.155.3): icmp_seq=1 ttl=250 time=101 ms
+ spl = line.split()
+ return float(spl[0][1:-1])
+
+
+def parse_ping_output(out) -> Tuple[bool, float, float]:
+ lines = [x for x in out.split("\n") if x.startswith("[")]
+
+ try:
+ first_no_ans = next(
+ (ind for ind in range(len(lines)) if lines[ind][20:26] == "no ans")
+ )
+ except StopIteration:
+ return False, parse_ping_line(lines[0]), parse_ping_line(lines[-1])
+
+ last_no_ans = next(
+ ind
+ for ind in range(len(lines) - 1, -1, -1)
+ if lines[ind][20:26] == "no ans"
+ )
+
+ return (
+ True,
+ parse_ping_line(lines[first_no_ans]),
+ parse_ping_line(lines[last_no_ans]),
+ )
+
+
+def wait_migration_finish(source_vm, target_vm):
+ migr_events = (
+ ("MIGRATION", {"data": {"status": "completed"}}),
+ ("MIGRATION", {"data": {"status": "failed"}}),
+ )
+
+ source_e = source_vm.events_wait(migr_events)["data"]
+ target_e = target_vm.events_wait(migr_events)["data"]
+
+ source_s = source_vm.cmd("query-status")["status"]
+ target_s = target_vm.cmd("query-status")["status"]
+
+ assert (
+ source_e["status"] == "completed"
+ and target_e["status"] == "completed"
+ and source_s == "postmigrate"
+ and target_s == "paused"
+ ), f"""Migration failed:
+ SRC status: {source_s}
+ SRC event: {source_e}
+ TGT status: {target_s}
+ TGT event:{target_e}"""
+
+
+@skipWithoutSudo()
+class TAPFdMigration(LinuxKernelTest):
+
+ ASSET_KERNEL = Asset(
+ (
+ "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases"
+ "/31/Server/x86_64/os/images/pxeboot/vmlinuz"
+ ),
+ "d4738d03dbbe083ca610d0821d0a8f1488bebbdccef54ce33e3adb35fda00129",
+ )
+
+ ASSET_INITRD = Asset(
+ (
+ "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases"
+ "/31/Server/x86_64/os/images/pxeboot/initrd.img"
+ ),
+ "277cd6c7adf77c7e63d73bbb2cded8ef9e2d3a2f100000e92ff1f8396513cd8b",
+ )
+
+ ASSET_ALPINE_ISO = Asset(
+ (
+ "https://dl-cdn.alpinelinux.org/"
+ "alpine/v3.22/releases/x86_64/alpine-standard-3.22.1-x86_64.iso"
+ ),
+ "96d1b44ea1b8a5a884f193526d92edb4676054e9fa903ad2f016441a0fe13089",
+ )
+
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+
+ try:
+ cls.netns_context = switch_netns(NETNS)
+ cls.netns_context.__enter__()
+ except (OSError, subprocess.CalledProcessError) as e:
+ raise unittest.SkipTest(f"can't switch network namespace: {e}")
+
+ @classmethod
+ def tearDownClass(cls):
+ if hasattr(cls, "netns_context"):
+ cls.netns_context.__exit__(None, None, None)
+ super().tearDownClass()
+
+ def setUp(self):
+ super().setUp()
+
+ init_tap()
+
+ self.outer_ping_proc = None
+ self.shm_path = None
+
+ def tearDown(self):
+ try:
+ del_tap(TAP_ID)
+ del_tap(TAP_ID2)
+
+ if self.outer_ping_proc:
+ self.stop_outer_ping()
+
+ if self.shm_path:
+ os.unlink(self.shm_path)
+ finally:
+ super().tearDown()
+
+ def start_outer_ping(self) -> None:
+ assert self.outer_ping_proc is None
+ self.outer_ping_log = self.scratch_file("ping.log")
+ with open(self.outer_ping_log, "w") as f:
+ self.outer_ping_proc = subprocess.Popen(
+ ["ping", "-i", "0", "-O", "-D", GUEST_IP],
+ text=True,
+ stdout=f,
+ )
+
+ def stop_outer_ping(self) -> str:
+ assert self.outer_ping_proc
+ self.outer_ping_proc.send_signal(signal.SIGINT)
+
+ self.outer_ping_proc.communicate(timeout=5)
+ self.outer_ping_proc = None
+
+ with open(self.outer_ping_log) as f:
+ return f.read()
+
+ def stop_ping_and_check(self, stop_time, resume_time):
+ ping_res = self.stop_outer_ping()
+
+ discon, a, b = parse_ping_output(ping_res)
+
+ if not discon:
+ text = (
+ f"STOP: {stop_time}, RESUME: {resume_time}," f"PING: {a} - {b}"
+ )
+ if a > stop_time or b < resume_time:
+ self.fail(f"PING failed: {text}")
+ self.log.info(f"PING: no packets lost: {text}")
+ return
+
+ text = (
+ f"STOP: {stop_time}, RESUME: {resume_time},"
+ f"PING: disconnect: {a} - {b}"
+ )
+ self.log.info(text)
+ eps = 0.05
+ if a < stop_time - eps or b > resume_time + eps:
+ self.fail(text)
+
+ def one_ping_from_guest(self, vm) -> None:
+ exec_command_and_wait_for_pattern(
+ self,
+ f"ping -c 1 -W 1 {HOST_IP}",
+ "1 packets transmitted, 1 packets received",
+ "1 packets transmitted, 0 packets received",
+ vm=vm,
+ )
+ self.wait_for_console_pattern("# ", vm=vm)
+
+ def one_ping_from_host(self) -> None:
+ run(
+ ["ping", "-c", "1", "-W", "1", GUEST_IP],
+ stdout=subprocess.DEVNULL,
+ check=True,
+ )
+
+ def setup_shared_memory(self):
+ self.shm_path = f"/dev/shm/qemu_test_{os.getpid()}"
+
+ try:
+ with open(self.shm_path, "wb") as f:
+ f.write(b"\0" * (1024 * 1024 * 1024)) # 1GB
+ except Exception as e:
+ self.fail(f"Failed to create shared memory file: {e}")
+
+ def prepare_and_launch_vm(
+ self, shm_path, vhost, incoming=False, vm=None, local=True
+ ):
+ if not vm:
+ vm = self.vm
+
+ vm.set_console()
+ vm.add_args("-accel", "kvm")
+ vm.add_args("-device", "pcie-pci-bridge,id=pci.1,bus=pcie.0")
+ vm.add_args("-m", "1G")
+
+ vm.add_args(
+ "-object",
+ f"memory-backend-file,id=ram0,size=1G,mem-path={shm_path},share=on",
+ )
+ vm.add_args("-machine", "memory-backend=ram0")
+
+ vm.add_args(
+ "-drive",
+ f"file={self.ASSET_ALPINE_ISO.fetch()},media=cdrom,format=raw",
+ )
+
+ vm.add_args("-S")
+
+ if incoming:
+ vm.add_args("-incoming", "defer")
+
+ vm_s = "target" if incoming else "source"
+ self.log.info(f"Launching {vm_s} VM")
+ vm.launch()
+
+ if not local:
+ tap_name = TAP_ID2 if incoming else TAP_ID
+ else:
+ tap_name = TAP_ID
+
+ self.add_virtio_net(vm, vhost, tap_name, local, incoming)
+
+ self.set_migration_capabilities(vm, local)
+
+ def add_virtio_net(
+ self, vm, vhost: bool, tap_name: str, local: bool, incoming: bool
+ ):
+ incoming_fds = local and incoming
+ netdev_params = {
+ "id": "netdev.1",
+ "vhost": vhost,
+ "type": "tap",
+ "ifname": tap_name,
+ "queues": 4,
+ "vnet_hdr": True,
+ "incoming-fds": incoming_fds,
+ "script": "no",
+ "downscript": "no",
+ }
+
+ vm.cmd("netdev_add", netdev_params)
+
+ vm.cmd(
+ "device_add",
+ driver="virtio-net-pci",
+ romfile="",
+ id="vnet.1",
+ netdev="netdev.1",
+ mq=True,
+ vectors=18,
+ bus="pci.1",
+ mac=GUEST_MAC,
+ disable_legacy="off",
+ local_migration=local,
+ )
+
+ def set_migration_capabilities(self, vm, local=True):
+ vm.cmd(
+ "migrate-set-capabilities",
+ {
+ "capabilities": [
+ {"capability": "events", "state": True},
+ {"capability": "x-ignore-shared", "state": True},
+ ]
+ },
+ )
+ vm.cmd("migrate-set-parameters", {"local": local})
+
+ def setup_guest_network(self) -> None:
+ exec_command_and_wait_for_pattern(self, "ip addr", "# ")
+ exec_command_and_wait_for_pattern(
+ self,
+ f"ip addr add {GUEST_IP_MASK} dev eth0 && "
+ "ip link set eth0 up && echo OK",
+ "OK",
+ )
+ self.wait_for_console_pattern("# ")
+
+ def do_test_tap_fd_migration(self, vhost, local=True):
+ self.require_accelerator("kvm")
+ self.set_machine("q35")
+
+ socket_dir = self.socket_dir()
+ migration_socket = os.path.join(socket_dir.name, "migration.sock")
+
+ self.setup_shared_memory()
+
+ # Setup second TAP if needed
+ if not local:
+ del_tap(TAP_ID2)
+ init_tap(TAP_ID2, with_ip=False)
+
+ self.prepare_and_launch_vm(self.shm_path, vhost, local=local)
+ self.vm.cmd("cont")
+ self.wait_for_console_pattern("login:")
+ exec_command_and_wait_for_pattern(self, "root", "# ")
+
+ self.setup_guest_network()
+
+ self.one_ping_from_guest(self.vm)
+ self.one_ping_from_host()
+ self.start_outer_ping()
+
+ # Get some successful pings before migration
+ time.sleep(0.5)
+
+ target_vm = self.get_vm(name="target")
+ self.prepare_and_launch_vm(
+ self.shm_path,
+ vhost,
+ incoming=True,
+ vm=target_vm,
+ local=local,
+ )
+
+ target_vm.cmd("migrate-incoming", {"uri": f"unix:{migration_socket}"})
+
+ self.log.info("Starting migration")
+ freeze_start = time.time()
+ self.vm.cmd("migrate", {"uri": f"unix:{migration_socket}"})
+
+ self.log.info("Waiting for migration completion")
+ wait_migration_finish(self.vm, target_vm)
+
+ # Switch network to tap1 if not using local-migration
+ if not local:
+ switch_network_to_tap2()
+
+ target_vm.cmd("cont")
+ freeze_end = time.time()
+
+ self.vm.shutdown()
+
+ self.log.info("Verifying PING on target VM after migration")
+ self.one_ping_from_guest(target_vm)
+ self.one_ping_from_host()
+
+ # And a bit more pings after source shutdown
+ time.sleep(0.3)
+ self.stop_ping_and_check(freeze_start, freeze_end)
+
+ target_vm.shutdown()
+
+ def test_tap_fd_migration(self):
+ self.do_test_tap_fd_migration(False)
+
+ def test_tap_fd_migration_vhost(self):
+ self.do_test_tap_fd_migration(True)
+
+ def test_tap_new_tap_migration(self):
+ self.do_test_tap_fd_migration(False, local=False)
+
+ def test_tap_new_tap_migration_vhost(self):
+ self.do_test_tap_fd_migration(True, local=False)
+
+
+if __name__ == "__main__":
+ LinuxKernelTest.main()
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH v13 0/8] virtio-net: live-TAP local migration
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (7 preceding siblings ...)
2026-03-19 15:53 ` [PATCH v13 8/8] tests/functional: add test_tap_migration Vladimir Sementsov-Ogievskiy
@ 2026-03-19 16:01 ` Vladimir Sementsov-Ogievskiy
2026-03-20 9:39 ` Markus Armbruster
9 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-19 16:01 UTC (permalink / raw)
To: jasowang, mst
Cc: armbru, eblake, farosas, peterx, zhao1.liu, wangyanan55, philmd,
marcel.apfelbaum, eduardo, davydov-max, qemu-devel, yc-core,
leiyang, raphael.s.norwitz, bchaney, th.huth+qemu, berrange,
pbonzini
On 19.03.26 18:53, Vladimir Sementsov-Ogievskiy wrote:
> Hi all!
>
> Here is a new migration parameter "local", which allows to
> enable local migration of TAP virtio-net backend (and maybe other
> devices and backends in future), including its properties and open
> fds.
>
> With this new option, management software doesn't need to initialize
> new TAP and do a switch to it. Nothing should be done around
> virtio-net in local migration: it just migrates and continues to use
> same TAP device. So we avoid extra logic in management software, extra
> allocations in kernel (for new TAP), and corresponding extra delay in
> migration downtime.
>
> v13:
> 01: simplify extra "else if" to "else"
> 03: reword documentation (with Markus), drop r-b [sorry, but to be sure,
> that it is OK for Peter too]
> 06: - reword documentation
> - error-out instead of silently ignore
> br=, ifname=, sndbuf=, vnet_hdr=, when mentioned together
> with new incoming-fds parameter.
> - fix stealing packages from TAP by source qemu process after
> source STOP
Based-on: <20260318113144.15697-1-vsementsov@yandex-team.ru>
"[PATCH v4 00/13] net: refactoring and fixes"
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v13 0/8] virtio-net: live-TAP local migration
2026-03-19 15:53 [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
` (8 preceding siblings ...)
2026-03-19 16:01 ` [PATCH v13 0/8] virtio-net: live-TAP local migration Vladimir Sementsov-Ogievskiy
@ 2026-03-20 9:39 ` Markus Armbruster
2026-03-20 13:15 ` Vladimir Sementsov-Ogievskiy
9 siblings, 1 reply; 16+ messages in thread
From: Markus Armbruster @ 2026-03-20 9:39 UTC (permalink / raw)
To: Vladimir Sementsov-Ogievskiy
Cc: jasowang, mst, eblake, farosas, peterx, zhao1.liu, wangyanan55,
philmd, marcel.apfelbaum, eduardo, davydov-max, qemu-devel,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
Does not apply on current master for me. What's your base?
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v13 0/8] virtio-net: live-TAP local migration
2026-03-20 9:39 ` Markus Armbruster
@ 2026-03-20 13:15 ` Vladimir Sementsov-Ogievskiy
0 siblings, 0 replies; 16+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2026-03-20 13:15 UTC (permalink / raw)
To: Markus Armbruster
Cc: jasowang, mst, eblake, farosas, peterx, zhao1.liu, wangyanan55,
philmd, marcel.apfelbaum, eduardo, davydov-max, qemu-devel,
yc-core, leiyang, raphael.s.norwitz, bchaney, th.huth+qemu,
berrange, pbonzini
On 20.03.26 12:39, Markus Armbruster wrote:
> Does not apply on current master for me. What's your base?
>
Based-on: <20260318113144.15697-1-vsementsov@yandex-team.ru>
"[PATCH v4 00/13] net: refactoring and fixes"
Or, branch up-tap-fd-migration-with-bk-opt at https://gitlab.com/vsementsov/qemu.git
--
Best regards,
Vladimir
^ permalink raw reply [flat|nested] 16+ messages in thread