qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page()
@ 2025-02-18  7:43 Li Zhijian via
  2025-02-18  7:43 ` [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA Li Zhijian via
  2025-02-18 20:30 ` [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Fabiano Rosas
  0 siblings, 2 replies; 16+ messages in thread
From: Li Zhijian via @ 2025-02-18  7:43 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Xu, Fabiano Rosas, Laurent Vivier, Paolo Bonzini,
	Li Zhijian

Address an error in RDMA-based migration by ensuring RDMA is prioritized
when saving pages in `ram_save_target_page()`.

Previously, the RDMA protocol's page-saving step was placed after other
protocols due to a refactoring in commit bc38dc2f5f3. This led to migration
failures characterized by unknown control messages and state loading errors
destination:
(qemu) qemu-system-x86_64: Unknown control message QEMU FILE
qemu-system-x86_64: error while loading state section id 1(ram)
qemu-system-x86_64: load of migration failed: Operation not permitted
source:
(qemu) qemu-system-x86_64: RDMA is in an error state waiting migration to abort!
qemu-system-x86_64: failed to save SaveStateEntry with id(name): 1(ram): -1
qemu-system-x86_64: rdma migration: recv polling control error!
qemu-system-x86_64: warning: Early error. Sending error.
qemu-system-x86_64: warning: rdma migration: send polling control error

RDMA migration implemented its own protocol/method to send pages to
destination side, hand over to RDMA first to prevent pages being saved by
other protocol.

Fixes: bc38dc2f5f3 ("migration: refactor ram_save_target_page functions")
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
 migration/ram.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 6f460fd22d2..635a2fe443a 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
     int res;
 
+    /* Hand over to RDMA first */
+    if (control_save_page(pss, offset, &res)) {
+        return res;
+    }
+
     if (!migrate_multifd()
         || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
         if (save_zero_page(rs, pss, offset)) {
@@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
         return ram_save_multifd_page(block, offset);
     }
 
-    if (control_save_page(pss, offset, &res)) {
-        return res;
-    }
-
     return ram_save_page(rs, pss);
 }
 
-- 
2.44.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-18  7:43 [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Li Zhijian via
@ 2025-02-18  7:43 ` Li Zhijian via
  2025-02-18 21:03   ` Fabiano Rosas
  2025-02-18 20:30 ` [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Fabiano Rosas
  1 sibling, 1 reply; 16+ messages in thread
From: Li Zhijian via @ 2025-02-18  7:43 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Xu, Fabiano Rosas, Laurent Vivier, Paolo Bonzini,
	Li Zhijian

This qtest requirs there is RXE link in the host.

Here is an example to show how to add this RXE link:
$ ./new-rdma-link.sh
192.168.22.93

Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
The RDMA migration was broken again...due to lack of sufficient test/qtest.

It's urgly to add and execute a script to establish an RDMA link in
the C program. If anyone has a better suggestion, please let me know.

$ cat ./new-rdma-link.sh
get_ipv4_addr() {
        ip -4 -o addr show dev "$1" |
                sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
}

has_soft_rdma() {
        rdma link | grep -q " netdev $1[[:blank:]]*\$"
}

start_soft_rdma() {
        local type

        modprobe rdma_rxe || return $?
        type=rxe
        (
                cd /sys/class/net &&
                        for i in *; do
                                [ -e "$i" ] || continue
                                [ "$i" = "lo" ] && continue
                                [ "$(<"$i/addr_len")" = 6 ] || continue
                                [ "$(<"$i/carrier")" = 1 ] || continue
                                has_soft_rdma "$i" && break
                                rdma link add "${i}_$type" type $type netdev "$i" && break
                        done
                has_soft_rdma "$i" && echo $i
        )

}

rxe_link=$(start_soft_rdma)
[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link

Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
 tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
 tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 tests/qtest/migration/new-rdma-link.sh

diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
new file mode 100644
index 00000000000..ca20594eaae
--- /dev/null
+++ b/tests/qtest/migration/new-rdma-link.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Copied from blktests
+get_ipv4_addr() {
+	ip -4 -o addr show dev "$1" |
+		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
+}
+
+has_soft_rdma() {
+	rdma link | grep -q " netdev $1[[:blank:]]*\$"
+}
+
+start_soft_rdma() {
+	local type
+
+	modprobe rdma_rxe || return $?
+	type=rxe
+	(
+		cd /sys/class/net &&
+			for i in *; do
+				[ -e "$i" ] || continue
+				[ "$i" = "lo" ] && continue
+				[ "$(<"$i/addr_len")" = 6 ] || continue
+				[ "$(<"$i/carrier")" = 1 ] || continue
+				has_soft_rdma "$i" && break
+				rdma link add "${i}_$type" type $type netdev "$i" && break
+			done
+		has_soft_rdma "$i" && echo $i
+	)
+
+}
+
+rxe_link=$(start_soft_rdma)
+[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
index 162fa695318..d2a1c9c9438 100644
--- a/tests/qtest/migration/precopy-tests.c
+++ b/tests/qtest/migration/precopy-tests.c
@@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
     test_precopy_common(&args);
 }
 
+static int new_rdma_link(char *buffer) {
+    // Copied from blktests
+    const char *script =
+        "#!/bin/bash\n"
+        "\n"
+        "get_ipv4_addr() {\n"
+        "    ip -4 -o addr show dev \"$1\" |\n"
+        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
+        "}\n"
+        "\n"
+        "has_soft_rdma() {\n"
+        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
+        "}\n"
+        "\n"
+        "start_soft_rdma() {\n"
+        "    local type\n"
+        "\n"
+        "    modprobe rdma_rxe || return $?\n"
+        "    type=rxe\n"
+        "    (\n"
+        "        cd /sys/class/net &&\n"
+        "        for i in *; do\n"
+        "            [ -e \"$i\" ] || continue\n"
+        "            [ \"$i\" = \"lo\" ] && continue\n"
+        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
+        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
+        "            has_soft_rdma \"$i\" && break\n"
+        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
+        "        done\n"
+        "        has_soft_rdma \"$i\" && echo $i\n"
+        "    )\n"
+        "}\n"
+        "\n"
+        "rxe_link=$(start_soft_rdma)\n"
+        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
+
+    char script_filename[] = "/tmp/temp_scriptXXXXXX";
+    int fd = mkstemp(script_filename);
+    if (fd == -1) {
+        perror("Failed to create temporary file");
+        return 1;
+    }
+
+    FILE *fp = fdopen(fd, "w");
+    if (fp == NULL) {
+        perror("Failed to open file stream");
+        close(fd);
+        return 1;
+    }
+    fprintf(fp, "%s", script);
+    fclose(fp);
+
+    if (chmod(script_filename, 0700) == -1) {
+        perror("Failed to set execute permission");
+        return 1;
+    }
+
+    FILE *pipe = popen(script_filename, "r");
+    if (pipe == NULL) {
+        perror("Failed to run script");
+        return 1;
+    }
+
+    int idx = 0;
+    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
+        idx += strlen(buffer);
+    }
+    if (buffer[idx - 1] == '\n')
+        buffer[idx - 1] = 0;
+
+    int status = pclose(pipe);
+    if (status == -1) {
+        perror("Error reported by pclose()");
+    } else if (!WIFEXITED(status)) {
+        printf("Script did not terminate normally\n");
+    }
+
+    remove(script_filename);
+
+    return 0;
+}
+
+static void test_precopy_rdma_plain(void)
+{
+    char buffer[128] = {};
+
+    if (new_rdma_link(buffer))
+        return;
+
+    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
+
+    MigrateCommon args = {
+        .listen_uri = uri,
+        .connect_uri = uri,
+    };
+
+    test_precopy_common(&args);
+}
+
 static void test_precopy_tcp_plain(void)
 {
     MigrateCommon args = {
@@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
                        test_multifd_tcp_uri_none);
     migration_test_add("/migration/multifd/tcp/plain/cancel",
                        test_multifd_tcp_cancel);
+#ifdef CONFIG_RDMA
+    migration_test_add("/migration/precopy/rdma/plain",
+                       test_precopy_rdma_plain);
+#endif
 }
 
 void migration_test_add_precopy(MigrationTestEnv *env)
-- 
2.44.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page()
  2025-02-18  7:43 [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Li Zhijian via
  2025-02-18  7:43 ` [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA Li Zhijian via
@ 2025-02-18 20:30 ` Fabiano Rosas
  2025-02-18 22:03   ` Peter Xu
  1 sibling, 1 reply; 16+ messages in thread
From: Fabiano Rosas @ 2025-02-18 20:30 UTC (permalink / raw)
  To: Li Zhijian via, qemu-devel
  Cc: Peter Xu, Laurent Vivier, Paolo Bonzini, Li Zhijian

Li Zhijian via <qemu-devel@nongnu.org> writes:

> Address an error in RDMA-based migration by ensuring RDMA is prioritized
> when saving pages in `ram_save_target_page()`.
>
> Previously, the RDMA protocol's page-saving step was placed after other
> protocols due to a refactoring in commit bc38dc2f5f3. This led to migration
> failures characterized by unknown control messages and state loading errors
> destination:
> (qemu) qemu-system-x86_64: Unknown control message QEMU FILE
> qemu-system-x86_64: error while loading state section id 1(ram)
> qemu-system-x86_64: load of migration failed: Operation not permitted
> source:
> (qemu) qemu-system-x86_64: RDMA is in an error state waiting migration to abort!
> qemu-system-x86_64: failed to save SaveStateEntry with id(name): 1(ram): -1
> qemu-system-x86_64: rdma migration: recv polling control error!
> qemu-system-x86_64: warning: Early error. Sending error.
> qemu-system-x86_64: warning: rdma migration: send polling control error
>
> RDMA migration implemented its own protocol/method to send pages to
> destination side, hand over to RDMA first to prevent pages being saved by
> other protocol.
>
> Fixes: bc38dc2f5f3 ("migration: refactor ram_save_target_page functions")
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> ---
>  migration/ram.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/migration/ram.c b/migration/ram.c
> index 6f460fd22d2..635a2fe443a 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>      ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>      int res;
>  
> +    /* Hand over to RDMA first */
> +    if (control_save_page(pss, offset, &res)) {
> +        return res;
> +    }
> +

Can we hoist that migrate_rdma() from inside the function? Since the
other paths already check first before calling their functions.

>      if (!migrate_multifd()
>          || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
>          if (save_zero_page(rs, pss, offset)) {
> @@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>          return ram_save_multifd_page(block, offset);
>      }
>  
> -    if (control_save_page(pss, offset, &res)) {
> -        return res;
> -    }
> -
>      return ram_save_page(rs, pss);
>  }


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-18  7:43 ` [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA Li Zhijian via
@ 2025-02-18 21:03   ` Fabiano Rosas
  2025-02-18 22:40     ` Peter Xu
  0 siblings, 1 reply; 16+ messages in thread
From: Fabiano Rosas @ 2025-02-18 21:03 UTC (permalink / raw)
  To: Li Zhijian via, qemu-devel
  Cc: Peter Xu, Laurent Vivier, Paolo Bonzini, Li Zhijian

Li Zhijian via <qemu-devel@nongnu.org> writes:

> This qtest requirs there is RXE link in the host.
>
> Here is an example to show how to add this RXE link:
> $ ./new-rdma-link.sh
> 192.168.22.93
>
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> ---
> The RDMA migration was broken again...due to lack of sufficient test/qtest.
>
> It's urgly to add and execute a script to establish an RDMA link in
> the C program. If anyone has a better suggestion, please let me know.
>
> $ cat ./new-rdma-link.sh
> get_ipv4_addr() {
>         ip -4 -o addr show dev "$1" |
>                 sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> }
>
> has_soft_rdma() {
>         rdma link | grep -q " netdev $1[[:blank:]]*\$"
> }
>
> start_soft_rdma() {
>         local type
>
>         modprobe rdma_rxe || return $?
>         type=rxe
>         (
>                 cd /sys/class/net &&
>                         for i in *; do
>                                 [ -e "$i" ] || continue
>                                 [ "$i" = "lo" ] && continue
>                                 [ "$(<"$i/addr_len")" = 6 ] || continue
>                                 [ "$(<"$i/carrier")" = 1 ] || continue
>                                 has_soft_rdma "$i" && break
>                                 rdma link add "${i}_$type" type $type netdev "$i" && break
>                         done
>                 has_soft_rdma "$i" && echo $i
>         )
>
> }
>
> rxe_link=$(start_soft_rdma)
> [[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
>
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> ---
>  tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
>  tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
>  2 files changed, 137 insertions(+)
>  create mode 100644 tests/qtest/migration/new-rdma-link.sh
>
> diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
> new file mode 100644
> index 00000000000..ca20594eaae
> --- /dev/null
> +++ b/tests/qtest/migration/new-rdma-link.sh
> @@ -0,0 +1,34 @@
> +#!/bin/bash
> +
> +# Copied from blktests
> +get_ipv4_addr() {
> +	ip -4 -o addr show dev "$1" |
> +		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> +}
> +
> +has_soft_rdma() {
> +	rdma link | grep -q " netdev $1[[:blank:]]*\$"
> +}
> +
> +start_soft_rdma() {
> +	local type
> +
> +	modprobe rdma_rxe || return $?
> +	type=rxe
> +	(
> +		cd /sys/class/net &&
> +			for i in *; do
> +				[ -e "$i" ] || continue
> +				[ "$i" = "lo" ] && continue
> +				[ "$(<"$i/addr_len")" = 6 ] || continue
> +				[ "$(<"$i/carrier")" = 1 ] || continue
> +				has_soft_rdma "$i" && break
> +				rdma link add "${i}_$type" type $type netdev "$i" && break
> +			done
> +		has_soft_rdma "$i" && echo $i
> +	)
> +
> +}
> +
> +rxe_link=$(start_soft_rdma)
> +[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> index 162fa695318..d2a1c9c9438 100644
> --- a/tests/qtest/migration/precopy-tests.c
> +++ b/tests/qtest/migration/precopy-tests.c
> @@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
>      test_precopy_common(&args);
>  }
>  
> +static int new_rdma_link(char *buffer) {
> +    // Copied from blktests
> +    const char *script =
> +        "#!/bin/bash\n"
> +        "\n"
> +        "get_ipv4_addr() {\n"
> +        "    ip -4 -o addr show dev \"$1\" |\n"
> +        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
> +        "}\n"
> +        "\n"
> +        "has_soft_rdma() {\n"
> +        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
> +        "}\n"
> +        "\n"
> +        "start_soft_rdma() {\n"
> +        "    local type\n"
> +        "\n"
> +        "    modprobe rdma_rxe || return $?\n"
> +        "    type=rxe\n"
> +        "    (\n"
> +        "        cd /sys/class/net &&\n"
> +        "        for i in *; do\n"
> +        "            [ -e \"$i\" ] || continue\n"
> +        "            [ \"$i\" = \"lo\" ] && continue\n"
> +        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
> +        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
> +        "            has_soft_rdma \"$i\" && break\n"
> +        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
> +        "        done\n"
> +        "        has_soft_rdma \"$i\" && echo $i\n"
> +        "    )\n"
> +        "}\n"
> +        "\n"
> +        "rxe_link=$(start_soft_rdma)\n"
> +        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
> +
> +    char script_filename[] = "/tmp/temp_scriptXXXXXX";
> +    int fd = mkstemp(script_filename);
> +    if (fd == -1) {
> +        perror("Failed to create temporary file");
> +        return 1;
> +    }
> +
> +    FILE *fp = fdopen(fd, "w");
> +    if (fp == NULL) {
> +        perror("Failed to open file stream");
> +        close(fd);
> +        return 1;
> +    }
> +    fprintf(fp, "%s", script);
> +    fclose(fp);
> +
> +    if (chmod(script_filename, 0700) == -1) {
> +        perror("Failed to set execute permission");
> +        return 1;
> +    }
> +
> +    FILE *pipe = popen(script_filename, "r");
> +    if (pipe == NULL) {
> +        perror("Failed to run script");
> +        return 1;
> +    }
> +
> +    int idx = 0;
> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> +        idx += strlen(buffer);
> +    }
> +    if (buffer[idx - 1] == '\n')
> +        buffer[idx - 1] = 0;
> +
> +    int status = pclose(pipe);
> +    if (status == -1) {
> +        perror("Error reported by pclose()");
> +    } else if (!WIFEXITED(status)) {
> +        printf("Script did not terminate normally\n");
> +    }
> +
> +    remove(script_filename);
> +
> +    return 0;
> +}
> +
> +static void test_precopy_rdma_plain(void)
> +{
> +    char buffer[128] = {};
> +
> +    if (new_rdma_link(buffer))
> +        return;
> +
> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
> +
> +    MigrateCommon args = {
> +        .listen_uri = uri,
> +        .connect_uri = uri,
> +    };
> +
> +    test_precopy_common(&args);
> +}
> +
>  static void test_precopy_tcp_plain(void)
>  {
>      MigrateCommon args = {
> @@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>                         test_multifd_tcp_uri_none);
>      migration_test_add("/migration/multifd/tcp/plain/cancel",
>                         test_multifd_tcp_cancel);
> +#ifdef CONFIG_RDMA
> +    migration_test_add("/migration/precopy/rdma/plain",
> +                       test_precopy_rdma_plain);
> +#endif
>  }
>  
>  void migration_test_add_precopy(MigrationTestEnv *env)

Thanks, that's definitely better than nothing. I'll experiment with this
locally, see if I can at least run it before sending a pull request.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page()
  2025-02-18 20:30 ` [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Fabiano Rosas
@ 2025-02-18 22:03   ` Peter Xu
  2025-02-19  9:39     ` Zhijian Li (Fujitsu) via
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Xu @ 2025-02-18 22:03 UTC (permalink / raw)
  To: Fabiano Rosas; +Cc: Li Zhijian via, Laurent Vivier, Paolo Bonzini, Li Zhijian

On Tue, Feb 18, 2025 at 05:30:40PM -0300, Fabiano Rosas wrote:
> Li Zhijian via <qemu-devel@nongnu.org> writes:
> 
> > Address an error in RDMA-based migration by ensuring RDMA is prioritized
> > when saving pages in `ram_save_target_page()`.
> >
> > Previously, the RDMA protocol's page-saving step was placed after other
> > protocols due to a refactoring in commit bc38dc2f5f3. This led to migration
> > failures characterized by unknown control messages and state loading errors
> > destination:
> > (qemu) qemu-system-x86_64: Unknown control message QEMU FILE
> > qemu-system-x86_64: error while loading state section id 1(ram)
> > qemu-system-x86_64: load of migration failed: Operation not permitted
> > source:
> > (qemu) qemu-system-x86_64: RDMA is in an error state waiting migration to abort!
> > qemu-system-x86_64: failed to save SaveStateEntry with id(name): 1(ram): -1
> > qemu-system-x86_64: rdma migration: recv polling control error!
> > qemu-system-x86_64: warning: Early error. Sending error.
> > qemu-system-x86_64: warning: rdma migration: send polling control error
> >
> > RDMA migration implemented its own protocol/method to send pages to
> > destination side, hand over to RDMA first to prevent pages being saved by
> > other protocol.
> >
> > Fixes: bc38dc2f5f3 ("migration: refactor ram_save_target_page functions")
> > Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> > ---
> >  migration/ram.c | 9 +++++----
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 6f460fd22d2..635a2fe443a 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
> >      ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> >      int res;
> >  
> > +    /* Hand over to RDMA first */
> > +    if (control_save_page(pss, offset, &res)) {
> > +        return res;
> > +    }
> > +
> 
> Can we hoist that migrate_rdma() from inside the function? Since the
> other paths already check first before calling their functions.

If we're talking about hoist and stuff.. and if we want to go slightly
further, I wonder if we could also drop RAM_SAVE_CONTROL_NOT_SUPP.

    if (!migrate_rdma() || migration_in_postcopy()) {
        return RAM_SAVE_CONTROL_NOT_SUPP;
    }

We should make sure rdma_control_save_page() won't get invoked at all in
either case above..  For postcopy, maybe we could fail in the QMP migrate /
migrate_incoming cmd, at migration_channels_and_transport_compatible().

> 
> >      if (!migrate_multifd()
> >          || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
> >          if (save_zero_page(rs, pss, offset)) {
> > @@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
> >          return ram_save_multifd_page(block, offset);
> >      }
> >  
> > -    if (control_save_page(pss, offset, &res)) {
> > -        return res;
> > -    }
> > -
> >      return ram_save_page(rs, pss);
> >  }
> 

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-18 21:03   ` Fabiano Rosas
@ 2025-02-18 22:40     ` Peter Xu
  2025-02-19  5:33       ` Zhijian Li (Fujitsu) via
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Xu @ 2025-02-18 22:40 UTC (permalink / raw)
  To: Fabiano Rosas; +Cc: Li Zhijian via, Laurent Vivier, Paolo Bonzini, Li Zhijian

On Tue, Feb 18, 2025 at 06:03:48PM -0300, Fabiano Rosas wrote:
> Li Zhijian via <qemu-devel@nongnu.org> writes:
> 
> > This qtest requirs there is RXE link in the host.
> >
> > Here is an example to show how to add this RXE link:
> > $ ./new-rdma-link.sh
> > 192.168.22.93
> >
> > Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> > ---
> > The RDMA migration was broken again...due to lack of sufficient test/qtest.
> >
> > It's urgly to add and execute a script to establish an RDMA link in
> > the C program. If anyone has a better suggestion, please let me know.
> >
> > $ cat ./new-rdma-link.sh
> > get_ipv4_addr() {
> >         ip -4 -o addr show dev "$1" |
> >                 sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> > }
> >
> > has_soft_rdma() {
> >         rdma link | grep -q " netdev $1[[:blank:]]*\$"
> > }
> >
> > start_soft_rdma() {
> >         local type
> >
> >         modprobe rdma_rxe || return $?
> >         type=rxe
> >         (
> >                 cd /sys/class/net &&
> >                         for i in *; do
> >                                 [ -e "$i" ] || continue
> >                                 [ "$i" = "lo" ] && continue
> >                                 [ "$(<"$i/addr_len")" = 6 ] || continue
> >                                 [ "$(<"$i/carrier")" = 1 ] || continue
> >                                 has_soft_rdma "$i" && break
> >                                 rdma link add "${i}_$type" type $type netdev "$i" && break
> >                         done
> >                 has_soft_rdma "$i" && echo $i
> >         )
> >
> > }
> >
> > rxe_link=$(start_soft_rdma)
> > [[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> >
> > Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> > ---
> >  tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
> >  tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
> >  2 files changed, 137 insertions(+)
> >  create mode 100644 tests/qtest/migration/new-rdma-link.sh
> >
> > diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
> > new file mode 100644
> > index 00000000000..ca20594eaae
> > --- /dev/null
> > +++ b/tests/qtest/migration/new-rdma-link.sh
> > @@ -0,0 +1,34 @@
> > +#!/bin/bash
> > +
> > +# Copied from blktests
> > +get_ipv4_addr() {
> > +	ip -4 -o addr show dev "$1" |
> > +		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> > +}
> > +
> > +has_soft_rdma() {
> > +	rdma link | grep -q " netdev $1[[:blank:]]*\$"
> > +}
> > +
> > +start_soft_rdma() {
> > +	local type
> > +
> > +	modprobe rdma_rxe || return $?
> > +	type=rxe
> > +	(
> > +		cd /sys/class/net &&
> > +			for i in *; do
> > +				[ -e "$i" ] || continue
> > +				[ "$i" = "lo" ] && continue
> > +				[ "$(<"$i/addr_len")" = 6 ] || continue
> > +				[ "$(<"$i/carrier")" = 1 ] || continue
> > +				has_soft_rdma "$i" && break
> > +				rdma link add "${i}_$type" type $type netdev "$i" && break
> > +			done
> > +		has_soft_rdma "$i" && echo $i
> > +	)
> > +
> > +}
> > +
> > +rxe_link=$(start_soft_rdma)
> > +[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> > diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> > index 162fa695318..d2a1c9c9438 100644
> > --- a/tests/qtest/migration/precopy-tests.c
> > +++ b/tests/qtest/migration/precopy-tests.c
> > @@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
> >      test_precopy_common(&args);
> >  }
> >  
> > +static int new_rdma_link(char *buffer) {
> > +    // Copied from blktests
> > +    const char *script =
> > +        "#!/bin/bash\n"
> > +        "\n"
> > +        "get_ipv4_addr() {\n"
> > +        "    ip -4 -o addr show dev \"$1\" |\n"
> > +        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
> > +        "}\n"
> > +        "\n"
> > +        "has_soft_rdma() {\n"
> > +        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
> > +        "}\n"
> > +        "\n"
> > +        "start_soft_rdma() {\n"
> > +        "    local type\n"
> > +        "\n"
> > +        "    modprobe rdma_rxe || return $?\n"
> > +        "    type=rxe\n"
> > +        "    (\n"
> > +        "        cd /sys/class/net &&\n"
> > +        "        for i in *; do\n"
> > +        "            [ -e \"$i\" ] || continue\n"
> > +        "            [ \"$i\" = \"lo\" ] && continue\n"
> > +        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
> > +        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
> > +        "            has_soft_rdma \"$i\" && break\n"
> > +        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
> > +        "        done\n"
> > +        "        has_soft_rdma \"$i\" && echo $i\n"
> > +        "    )\n"
> > +        "}\n"
> > +        "\n"
> > +        "rxe_link=$(start_soft_rdma)\n"
> > +        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
> > +
> > +    char script_filename[] = "/tmp/temp_scriptXXXXXX";
> > +    int fd = mkstemp(script_filename);
> > +    if (fd == -1) {
> > +        perror("Failed to create temporary file");
> > +        return 1;
> > +    }
> > +
> > +    FILE *fp = fdopen(fd, "w");
> > +    if (fp == NULL) {
> > +        perror("Failed to open file stream");
> > +        close(fd);
> > +        return 1;
> > +    }
> > +    fprintf(fp, "%s", script);
> > +    fclose(fp);
> > +
> > +    if (chmod(script_filename, 0700) == -1) {
> > +        perror("Failed to set execute permission");
> > +        return 1;
> > +    }
> > +
> > +    FILE *pipe = popen(script_filename, "r");
> > +    if (pipe == NULL) {
> > +        perror("Failed to run script");
> > +        return 1;
> > +    }
> > +
> > +    int idx = 0;
> > +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> > +        idx += strlen(buffer);
> > +    }
> > +    if (buffer[idx - 1] == '\n')
> > +        buffer[idx - 1] = 0;
> > +
> > +    int status = pclose(pipe);
> > +    if (status == -1) {
> > +        perror("Error reported by pclose()");
> > +    } else if (!WIFEXITED(status)) {
> > +        printf("Script did not terminate normally\n");
> > +    }
> > +
> > +    remove(script_filename);

The script can be put separately instead if hard-coded here, right?

> > +
> > +    return 0;
> > +}
> > +
> > +static void test_precopy_rdma_plain(void)
> > +{
> > +    char buffer[128] = {};
> > +
> > +    if (new_rdma_link(buffer))
> > +        return;
> > +
> > +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
> > +
> > +    MigrateCommon args = {
> > +        .listen_uri = uri,
> > +        .connect_uri = uri,
> > +    };
> > +
> > +    test_precopy_common(&args);
> > +}
> > +
> >  static void test_precopy_tcp_plain(void)
> >  {
> >      MigrateCommon args = {
> > @@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
> >                         test_multifd_tcp_uri_none);
> >      migration_test_add("/migration/multifd/tcp/plain/cancel",
> >                         test_multifd_tcp_cancel);
> > +#ifdef CONFIG_RDMA
> > +    migration_test_add("/migration/precopy/rdma/plain",
> > +                       test_precopy_rdma_plain);
> > +#endif
> >  }
> >  
> >  void migration_test_add_precopy(MigrationTestEnv *env)
> 
> Thanks, that's definitely better than nothing. I'll experiment with this
> locally, see if I can at least run it before sending a pull request.

With your newly added --full, IIUC we can add whatever we want there.
E.g. we can add --rdma and iff specified, migration-test adds the rdma test.

Or.. skip the test when the rdma link isn't available.

If we could separate the script into a file, it'll be better.  We could
create scripts/migration dir and put all migration scripts over there, then
in the test it tries to detect rdma link and fetch the ip only (aka, we'd
better keep the test itself not rely on root permission..).

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-18 22:40     ` Peter Xu
@ 2025-02-19  5:33       ` Zhijian Li (Fujitsu) via
  2025-02-19 12:47         ` Peter Xu
  0 siblings, 1 reply; 16+ messages in thread
From: Zhijian Li (Fujitsu) via @ 2025-02-19  5:33 UTC (permalink / raw)
  To: Peter Xu, Fabiano Rosas; +Cc: Li Zhijian via, Laurent Vivier, Paolo Bonzini



On 19/02/2025 06:40, Peter Xu wrote:
> On Tue, Feb 18, 2025 at 06:03:48PM -0300, Fabiano Rosas wrote:
>> Li Zhijian via <qemu-devel@nongnu.org> writes:
>>
>>> This qtest requirs there is RXE link in the host.
>>>
>>> Here is an example to show how to add this RXE link:
>>> $ ./new-rdma-link.sh
>>> 192.168.22.93
>>>
>>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>>> ---
>>> The RDMA migration was broken again...due to lack of sufficient test/qtest.
>>>
>>> It's urgly to add and execute a script to establish an RDMA link in
>>> the C program. If anyone has a better suggestion, please let me know.
>>>
>>> $ cat ./new-rdma-link.sh
>>> get_ipv4_addr() {
>>>          ip -4 -o addr show dev "$1" |
>>>                  sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
>>> }
>>>
>>> has_soft_rdma() {
>>>          rdma link | grep -q " netdev $1[[:blank:]]*\$"
>>> }
>>>
>>> start_soft_rdma() {
>>>          local type
>>>
>>>          modprobe rdma_rxe || return $?
>>>          type=rxe
>>>          (
>>>                  cd /sys/class/net &&
>>>                          for i in *; do
>>>                                  [ -e "$i" ] || continue
>>>                                  [ "$i" = "lo" ] && continue
>>>                                  [ "$(<"$i/addr_len")" = 6 ] || continue
>>>                                  [ "$(<"$i/carrier")" = 1 ] || continue
>>>                                  has_soft_rdma "$i" && break
>>>                                  rdma link add "${i}_$type" type $type netdev "$i" && break
>>>                          done
>>>                  has_soft_rdma "$i" && echo $i
>>>          )
>>>
>>> }
>>>
>>> rxe_link=$(start_soft_rdma)
>>> [[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
>>>
>>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>>> ---
>>>   tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
>>>   tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
>>>   2 files changed, 137 insertions(+)
>>>   create mode 100644 tests/qtest/migration/new-rdma-link.sh
>>>
>>> diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
>>> new file mode 100644
>>> index 00000000000..ca20594eaae
>>> --- /dev/null
>>> +++ b/tests/qtest/migration/new-rdma-link.sh
>>> @@ -0,0 +1,34 @@
>>> +#!/bin/bash
>>> +
>>> +# Copied from blktests
>>> +get_ipv4_addr() {
>>> +	ip -4 -o addr show dev "$1" |
>>> +		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
>>> +}
>>> +
>>> +has_soft_rdma() {
>>> +	rdma link | grep -q " netdev $1[[:blank:]]*\$"
>>> +}
>>> +
>>> +start_soft_rdma() {
>>> +	local type
>>> +
>>> +	modprobe rdma_rxe || return $?
>>> +	type=rxe
>>> +	(
>>> +		cd /sys/class/net &&
>>> +			for i in *; do
>>> +				[ -e "$i" ] || continue
>>> +				[ "$i" = "lo" ] && continue
>>> +				[ "$(<"$i/addr_len")" = 6 ] || continue
>>> +				[ "$(<"$i/carrier")" = 1 ] || continue
>>> +				has_soft_rdma "$i" && break
>>> +				rdma link add "${i}_$type" type $type netdev "$i" && break
>>> +			done
>>> +		has_soft_rdma "$i" && echo $i
>>> +	)
>>> +
>>> +}
>>> +
>>> +rxe_link=$(start_soft_rdma)
>>> +[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
>>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
>>> index 162fa695318..d2a1c9c9438 100644
>>> --- a/tests/qtest/migration/precopy-tests.c
>>> +++ b/tests/qtest/migration/precopy-tests.c
>>> @@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
>>>       test_precopy_common(&args);
>>>   }
>>>   
>>> +static int new_rdma_link(char *buffer) {
>>> +    // Copied from blktests
>>> +    const char *script =
>>> +        "#!/bin/bash\n"
>>> +        "\n"
>>> +        "get_ipv4_addr() {\n"
>>> +        "    ip -4 -o addr show dev \"$1\" |\n"
>>> +        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
>>> +        "}\n"
>>> +        "\n"
>>> +        "has_soft_rdma() {\n"
>>> +        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
>>> +        "}\n"
>>> +        "\n"
>>> +        "start_soft_rdma() {\n"
>>> +        "    local type\n"
>>> +        "\n"
>>> +        "    modprobe rdma_rxe || return $?\n"
>>> +        "    type=rxe\n"
>>> +        "    (\n"
>>> +        "        cd /sys/class/net &&\n"
>>> +        "        for i in *; do\n"
>>> +        "            [ -e \"$i\" ] || continue\n"
>>> +        "            [ \"$i\" = \"lo\" ] && continue\n"
>>> +        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
>>> +        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
>>> +        "            has_soft_rdma \"$i\" && break\n"
>>> +        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
>>> +        "        done\n"
>>> +        "        has_soft_rdma \"$i\" && echo $i\n"
>>> +        "    )\n"
>>> +        "}\n"
>>> +        "\n"
>>> +        "rxe_link=$(start_soft_rdma)\n"
>>> +        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
>>> +
>>> +    char script_filename[] = "/tmp/temp_scriptXXXXXX";
>>> +    int fd = mkstemp(script_filename);
>>> +    if (fd == -1) {
>>> +        perror("Failed to create temporary file");
>>> +        return 1;
>>> +    }
>>> +
>>> +    FILE *fp = fdopen(fd, "w");
>>> +    if (fp == NULL) {
>>> +        perror("Failed to open file stream");
>>> +        close(fd);
>>> +        return 1;
>>> +    }
>>> +    fprintf(fp, "%s", script);
>>> +    fclose(fp);
>>> +
>>> +    if (chmod(script_filename, 0700) == -1) {
>>> +        perror("Failed to set execute permission");
>>> +        return 1;
>>> +    }
>>> +
>>> +    FILE *pipe = popen(script_filename, "r");
>>> +    if (pipe == NULL) {
>>> +        perror("Failed to run script");
>>> +        return 1;
>>> +    }
>>> +
>>> +    int idx = 0;
>>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
>>> +        idx += strlen(buffer);
>>> +    }
>>> +    if (buffer[idx - 1] == '\n')
>>> +        buffer[idx - 1] = 0;
>>> +
>>> +    int status = pclose(pipe);
>>> +    if (status == -1) {
>>> +        perror("Error reported by pclose()");
>>> +    } else if (!WIFEXITED(status)) {
>>> +        printf("Script did not terminate normally\n");
>>> +    }
>>> +
>>> +    remove(script_filename);
> 
> The script can be put separately instead if hard-coded here, right?


Sure, If so, I wonder whether the migration-test program is able to know where is this script?


> 
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void test_precopy_rdma_plain(void)
>>> +{
>>> +    char buffer[128] = {};
>>> +
>>> +    if (new_rdma_link(buffer))
>>> +        return;
>>> +
>>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
>>> +
>>> +    MigrateCommon args = {
>>> +        .listen_uri = uri,
>>> +        .connect_uri = uri,
>>> +    };
>>> +
>>> +    test_precopy_common(&args);
>>> +}
>>> +
>>>   static void test_precopy_tcp_plain(void)
>>>   {
>>>       MigrateCommon args = {
>>> @@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>>>                          test_multifd_tcp_uri_none);
>>>       migration_test_add("/migration/multifd/tcp/plain/cancel",
>>>                          test_multifd_tcp_cancel);
>>> +#ifdef CONFIG_RDMA
>>> +    migration_test_add("/migration/precopy/rdma/plain",
>>> +                       test_precopy_rdma_plain);
>>> +#endif
>>>   }
>>>   
>>>   void migration_test_add_precopy(MigrationTestEnv *env)
>>
>> Thanks, that's definitely better than nothing. I'll experiment with this
>> locally, see if I can at least run it before sending a pull request.
> 
> With your newly added --full, IIUC we can add whatever we want there.
> E.g. we can add --rdma and iff specified, migration-test adds the rdma test.
> 
> Or.. skip the test when the rdma link isn't available.
> 
> If we could separate the script into a file, it'll be better.  We could
> create scripts/migration dir and put all migration scripts over there,

We have any other existing script? I didn't find it in current QEMU tree.


> then
> in the test it tries to detect rdma link and fetch the ip only

It should work without root permission if we just *detect* and *fetch ip*.

Do you also mean we can split new-rdma-link.sh to 2 separate scripts
- add-rdma-link.sh # optionally, execute by user before the test (require root permission)
- detect-fetch-rdma.sh # execute from the migration-test

Thanks
Zhijian


> (aka, we'd
> better keep the test itself not rely on root permission..).
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page()
  2025-02-18 22:03   ` Peter Xu
@ 2025-02-19  9:39     ` Zhijian Li (Fujitsu) via
  2025-02-19 13:23       ` Peter Xu
  0 siblings, 1 reply; 16+ messages in thread
From: Zhijian Li (Fujitsu) via @ 2025-02-19  9:39 UTC (permalink / raw)
  To: Peter Xu, Fabiano Rosas; +Cc: Li Zhijian via, Laurent Vivier, Paolo Bonzini



On 19/02/2025 06:03, Peter Xu wrote:
> On Tue, Feb 18, 2025 at 05:30:40PM -0300, Fabiano Rosas wrote:
>> Li Zhijian via <qemu-devel@nongnu.org> writes:
>>
>>> Address an error in RDMA-based migration by ensuring RDMA is prioritized
>>> when saving pages in `ram_save_target_page()`.
>>>
>>> Previously, the RDMA protocol's page-saving step was placed after other
>>> protocols due to a refactoring in commit bc38dc2f5f3. This led to migration
>>> failures characterized by unknown control messages and state loading errors
>>> destination:
>>> (qemu) qemu-system-x86_64: Unknown control message QEMU FILE
>>> qemu-system-x86_64: error while loading state section id 1(ram)
>>> qemu-system-x86_64: load of migration failed: Operation not permitted
>>> source:
>>> (qemu) qemu-system-x86_64: RDMA is in an error state waiting migration to abort!
>>> qemu-system-x86_64: failed to save SaveStateEntry with id(name): 1(ram): -1
>>> qemu-system-x86_64: rdma migration: recv polling control error!
>>> qemu-system-x86_64: warning: Early error. Sending error.
>>> qemu-system-x86_64: warning: rdma migration: send polling control error
>>>
>>> RDMA migration implemented its own protocol/method to send pages to
>>> destination side, hand over to RDMA first to prevent pages being saved by
>>> other protocol.
>>>
>>> Fixes: bc38dc2f5f3 ("migration: refactor ram_save_target_page functions")
>>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>>> ---
>>>   migration/ram.c | 9 +++++----
>>>   1 file changed, 5 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/migration/ram.c b/migration/ram.c
>>> index 6f460fd22d2..635a2fe443a 100644
>>> --- a/migration/ram.c
>>> +++ b/migration/ram.c
>>> @@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>>>       ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>>>       int res;
>>>   
>>> +    /* Hand over to RDMA first */
>>> +    if (control_save_page(pss, offset, &res)) {
>>> +        return res;
>>> +    }
>>> +
>>
>> Can we hoist that migrate_rdma() from inside the function? Since the
>> other paths already check first before calling their functions.
> 

Yeah, it sounds good to me.


> If we're talking about hoist and stuff.. and if we want to go slightly
> further, I wonder if we could also drop RAM_SAVE_CONTROL_NOT_SUPP.
> 
>      if (!migrate_rdma() || migration_in_postcopy()) {
>          return RAM_SAVE_CONTROL_NOT_SUPP;
>      }
> 
> We should make sure rdma_control_save_page() won't get invoked at all in
> either case above..  

> For postcopy, maybe we could fail in the QMP migrate /
> migrate_incoming cmd, at migration_channels_and_transport_compatible()

I tried to kill RAM_SAVE_CONTROL_NOT_SUPP, but It seems it doesn't need to touch any postcopy logic
"in the QMP migrate / migrate_incoming cmd, at migration_channels_and_transport_compatible()"

Is there something I might have overlooked?

A whole draft diff would be like below:
It includes 3 parts:

migration/rdma: Remove unnecessary RAM_SAVE_CONTROL_NOT_SUPP check in rdma_control_save_page()
migration: kill RAM_SAVE_CONTROL_NOT_SUPP
migration: open control_save_page() to ram_save_target_page()

diff --git a/migration/ram.c b/migration/ram.c
index 589b6505eb2..fc6a964fd64 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1143,32 +1143,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
      return len;
  }
  
-/*
- * @pages: the number of pages written by the control path,
- *        < 0 - error
- *        > 0 - number of pages written
- *
- * Return true if the pages has been saved, otherwise false is returned.
- */
-static bool control_save_page(PageSearchStatus *pss,
-                              ram_addr_t offset, int *pages)
-{
-    int ret;
-
-    ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
-                                 TARGET_PAGE_SIZE);
-    if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
-        return false;
-    }
-
-    if (ret == RAM_SAVE_CONTROL_DELAYED) {
-        *pages = 1;
-        return true;
-    }
-    *pages = ret;
-    return true;
-}
-
  /*
   * directly send the page to the stream
   *
@@ -1964,6 +1938,16 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
      ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
      int res;
  
+    if (migrate_rdma() && !migration_in_postcopy()) {
+        res = rdma_control_save_page(pss->pss_channel, pss->block->offset,
+                                     offset, TARGET_PAGE_SIZE);
+
+        if (res == RAM_SAVE_CONTROL_DELAYED) {
+            res = 1;
+        }
+        return res;
+    }
+
      if (!migrate_multifd()
          || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
          if (save_zero_page(rs, pss, offset)) {
@@ -1976,10 +1960,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
          return ram_save_multifd_page(block, offset);
      }
      }
  
-    if (control_save_page(pss, offset, &res)) {
-        return res;
-    }
-
      return ram_save_page(rs, pss);
  }
  
diff --git a/migration/rdma.c b/migration/rdma.c
index 76fb0349238..c6876347e1e 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3284,14 +3284,11 @@ err:
  int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                             ram_addr_t offset, size_t size)
  {
-    if (!migrate_rdma() || migration_in_postcopy()) {
-        return RAM_SAVE_CONTROL_NOT_SUPP;
-    }
+    assert(migrate_rdma());
  
      int ret = qemu_rdma_save_page(f, block_offset, offset, size);
  
-    if (ret != RAM_SAVE_CONTROL_DELAYED &&
-        ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+    if (ret != RAM_SAVE_CONTROL_DELAYED) {
          if (ret < 0) {
              qemu_file_set_error(f, ret);
          }
diff --git a/migration/rdma.h b/migration/rdma.h
index f55f28bbed1..bb0296c3726 100644
--- a/migration/rdma.h
+++ b/migration/rdma.h
@@ -33,7 +33,6 @@ void rdma_start_incoming_migration(InetSocketAddress *host_port, Error **errp);
  #define RAM_CONTROL_ROUND     1
  #define RAM_CONTROL_FINISH    3
  
-#define RAM_SAVE_CONTROL_NOT_SUPP -1000
  #define RAM_SAVE_CONTROL_DELAYED  -2000
  
  #ifdef CONFIG_RDMA
@@ -56,7 +55,9 @@ static inline
  int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                             ram_addr_t offset, size_t size)
  {
-    return RAM_SAVE_CONTROL_NOT_SUPP;
+    /* never reach */
+    assert(0);
+    return -1;
  }
  #endif
  #endif




Thanks
Zhijian

> 
>>
>>>       if (!migrate_multifd()
>>>           || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
>>>           if (save_zero_page(rs, pss, offset)) {
>>> @@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>>>           return ram_save_multifd_page(block, offset);
>>>       }
>>>   
>>> -    if (control_save_page(pss, offset, &res)) {
>>> -        return res;
>>> -    }
>>> -
>>>       return ram_save_page(rs, pss);
>>>   }
>>
> 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-19  5:33       ` Zhijian Li (Fujitsu) via
@ 2025-02-19 12:47         ` Peter Xu
  2025-02-19 13:20           ` Fabiano Rosas
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Xu @ 2025-02-19 12:47 UTC (permalink / raw)
  To: Zhijian Li (Fujitsu)
  Cc: Fabiano Rosas, Li Zhijian via, Laurent Vivier, Paolo Bonzini

On Wed, Feb 19, 2025 at 05:33:26AM +0000, Zhijian Li (Fujitsu) wrote:
> 
> 
> On 19/02/2025 06:40, Peter Xu wrote:
> > On Tue, Feb 18, 2025 at 06:03:48PM -0300, Fabiano Rosas wrote:
> >> Li Zhijian via <qemu-devel@nongnu.org> writes:
> >>
> >>> This qtest requirs there is RXE link in the host.
> >>>
> >>> Here is an example to show how to add this RXE link:
> >>> $ ./new-rdma-link.sh
> >>> 192.168.22.93
> >>>
> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> >>> ---
> >>> The RDMA migration was broken again...due to lack of sufficient test/qtest.
> >>>
> >>> It's urgly to add and execute a script to establish an RDMA link in
> >>> the C program. If anyone has a better suggestion, please let me know.
> >>>
> >>> $ cat ./new-rdma-link.sh
> >>> get_ipv4_addr() {
> >>>          ip -4 -o addr show dev "$1" |
> >>>                  sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> >>> }
> >>>
> >>> has_soft_rdma() {
> >>>          rdma link | grep -q " netdev $1[[:blank:]]*\$"
> >>> }
> >>>
> >>> start_soft_rdma() {
> >>>          local type
> >>>
> >>>          modprobe rdma_rxe || return $?
> >>>          type=rxe
> >>>          (
> >>>                  cd /sys/class/net &&
> >>>                          for i in *; do
> >>>                                  [ -e "$i" ] || continue
> >>>                                  [ "$i" = "lo" ] && continue
> >>>                                  [ "$(<"$i/addr_len")" = 6 ] || continue
> >>>                                  [ "$(<"$i/carrier")" = 1 ] || continue
> >>>                                  has_soft_rdma "$i" && break
> >>>                                  rdma link add "${i}_$type" type $type netdev "$i" && break
> >>>                          done
> >>>                  has_soft_rdma "$i" && echo $i
> >>>          )
> >>>
> >>> }
> >>>
> >>> rxe_link=$(start_soft_rdma)
> >>> [[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> >>>
> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> >>> ---
> >>>   tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
> >>>   tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
> >>>   2 files changed, 137 insertions(+)
> >>>   create mode 100644 tests/qtest/migration/new-rdma-link.sh
> >>>
> >>> diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
> >>> new file mode 100644
> >>> index 00000000000..ca20594eaae
> >>> --- /dev/null
> >>> +++ b/tests/qtest/migration/new-rdma-link.sh
> >>> @@ -0,0 +1,34 @@
> >>> +#!/bin/bash
> >>> +
> >>> +# Copied from blktests
> >>> +get_ipv4_addr() {
> >>> +	ip -4 -o addr show dev "$1" |
> >>> +		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> >>> +}
> >>> +
> >>> +has_soft_rdma() {
> >>> +	rdma link | grep -q " netdev $1[[:blank:]]*\$"
> >>> +}
> >>> +
> >>> +start_soft_rdma() {
> >>> +	local type
> >>> +
> >>> +	modprobe rdma_rxe || return $?
> >>> +	type=rxe
> >>> +	(
> >>> +		cd /sys/class/net &&
> >>> +			for i in *; do
> >>> +				[ -e "$i" ] || continue
> >>> +				[ "$i" = "lo" ] && continue
> >>> +				[ "$(<"$i/addr_len")" = 6 ] || continue
> >>> +				[ "$(<"$i/carrier")" = 1 ] || continue
> >>> +				has_soft_rdma "$i" && break
> >>> +				rdma link add "${i}_$type" type $type netdev "$i" && break
> >>> +			done
> >>> +		has_soft_rdma "$i" && echo $i
> >>> +	)
> >>> +
> >>> +}
> >>> +
> >>> +rxe_link=$(start_soft_rdma)
> >>> +[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> >>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> >>> index 162fa695318..d2a1c9c9438 100644
> >>> --- a/tests/qtest/migration/precopy-tests.c
> >>> +++ b/tests/qtest/migration/precopy-tests.c
> >>> @@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
> >>>       test_precopy_common(&args);
> >>>   }
> >>>   
> >>> +static int new_rdma_link(char *buffer) {
> >>> +    // Copied from blktests
> >>> +    const char *script =
> >>> +        "#!/bin/bash\n"
> >>> +        "\n"
> >>> +        "get_ipv4_addr() {\n"
> >>> +        "    ip -4 -o addr show dev \"$1\" |\n"
> >>> +        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
> >>> +        "}\n"
> >>> +        "\n"
> >>> +        "has_soft_rdma() {\n"
> >>> +        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
> >>> +        "}\n"
> >>> +        "\n"
> >>> +        "start_soft_rdma() {\n"
> >>> +        "    local type\n"
> >>> +        "\n"
> >>> +        "    modprobe rdma_rxe || return $?\n"
> >>> +        "    type=rxe\n"
> >>> +        "    (\n"
> >>> +        "        cd /sys/class/net &&\n"
> >>> +        "        for i in *; do\n"
> >>> +        "            [ -e \"$i\" ] || continue\n"
> >>> +        "            [ \"$i\" = \"lo\" ] && continue\n"
> >>> +        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
> >>> +        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
> >>> +        "            has_soft_rdma \"$i\" && break\n"
> >>> +        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
> >>> +        "        done\n"
> >>> +        "        has_soft_rdma \"$i\" && echo $i\n"
> >>> +        "    )\n"
> >>> +        "}\n"
> >>> +        "\n"
> >>> +        "rxe_link=$(start_soft_rdma)\n"
> >>> +        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
> >>> +
> >>> +    char script_filename[] = "/tmp/temp_scriptXXXXXX";
> >>> +    int fd = mkstemp(script_filename);
> >>> +    if (fd == -1) {
> >>> +        perror("Failed to create temporary file");
> >>> +        return 1;
> >>> +    }
> >>> +
> >>> +    FILE *fp = fdopen(fd, "w");
> >>> +    if (fp == NULL) {
> >>> +        perror("Failed to open file stream");
> >>> +        close(fd);
> >>> +        return 1;
> >>> +    }
> >>> +    fprintf(fp, "%s", script);
> >>> +    fclose(fp);
> >>> +
> >>> +    if (chmod(script_filename, 0700) == -1) {
> >>> +        perror("Failed to set execute permission");
> >>> +        return 1;
> >>> +    }
> >>> +
> >>> +    FILE *pipe = popen(script_filename, "r");
> >>> +    if (pipe == NULL) {
> >>> +        perror("Failed to run script");
> >>> +        return 1;
> >>> +    }
> >>> +
> >>> +    int idx = 0;
> >>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> >>> +        idx += strlen(buffer);
> >>> +    }
> >>> +    if (buffer[idx - 1] == '\n')
> >>> +        buffer[idx - 1] = 0;
> >>> +
> >>> +    int status = pclose(pipe);
> >>> +    if (status == -1) {
> >>> +        perror("Error reported by pclose()");
> >>> +    } else if (!WIFEXITED(status)) {
> >>> +        printf("Script did not terminate normally\n");
> >>> +    }
> >>> +
> >>> +    remove(script_filename);
> > 
> > The script can be put separately instead if hard-coded here, right?
> 
> 
> Sure, If so, I wonder whether the migration-test program is able to know where is this script?
> 
> 
> > 
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static void test_precopy_rdma_plain(void)
> >>> +{
> >>> +    char buffer[128] = {};
> >>> +
> >>> +    if (new_rdma_link(buffer))
> >>> +        return;
> >>> +
> >>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
> >>> +
> >>> +    MigrateCommon args = {
> >>> +        .listen_uri = uri,
> >>> +        .connect_uri = uri,
> >>> +    };
> >>> +
> >>> +    test_precopy_common(&args);
> >>> +}
> >>> +
> >>>   static void test_precopy_tcp_plain(void)
> >>>   {
> >>>       MigrateCommon args = {
> >>> @@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
> >>>                          test_multifd_tcp_uri_none);
> >>>       migration_test_add("/migration/multifd/tcp/plain/cancel",
> >>>                          test_multifd_tcp_cancel);
> >>> +#ifdef CONFIG_RDMA
> >>> +    migration_test_add("/migration/precopy/rdma/plain",
> >>> +                       test_precopy_rdma_plain);
> >>> +#endif
> >>>   }
> >>>   
> >>>   void migration_test_add_precopy(MigrationTestEnv *env)
> >>
> >> Thanks, that's definitely better than nothing. I'll experiment with this
> >> locally, see if I can at least run it before sending a pull request.
> > 
> > With your newly added --full, IIUC we can add whatever we want there.
> > E.g. we can add --rdma and iff specified, migration-test adds the rdma test.
> > 
> > Or.. skip the test when the rdma link isn't available.
> > 
> > If we could separate the script into a file, it'll be better.  We could
> > create scripts/migration dir and put all migration scripts over there,
> 
> We have any other existing script? I didn't find it in current QEMU tree.

We have a few that I'm aware of:

  - analyze-migration.py
  - vmstate-static-checker.py
  - userfaultfd-wrlat.py

> 
> 
> > then
> > in the test it tries to detect rdma link and fetch the ip only
> 
> It should work without root permission if we just *detect* and *fetch ip*.
> 
> Do you also mean we can split new-rdma-link.sh to 2 separate scripts
> - add-rdma-link.sh # optionally, execute by user before the test (require root permission)
> - detect-fetch-rdma.sh # execute from the migration-test

Hmm indeed we still need a script to scan over all the ports..

If having --rdma is a good idea, maybe we can further make it a parameter
to --rdma?

  $ migration-test --rdma $RDMA_IP

Or:

  $ migration-test --rdma-ip $RDMA_IP

Then maybe migration-test can directly take that IP and run the tests,
assuming the admin setup the rdma link.  Then we keep that one script.

Or I assume it's still ok that the test requires root only for --rdma, then
invoke the script directly in the test.  If so, we'd better also remove the
rdma link after test finished, so no side effect of the test (modprobe is
probably fine).

We can wait and see how far Fabiano went with this, and also his opinion.

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-19 12:47         ` Peter Xu
@ 2025-02-19 13:20           ` Fabiano Rosas
  2025-02-19 14:11             ` Peter Xu
  0 siblings, 1 reply; 16+ messages in thread
From: Fabiano Rosas @ 2025-02-19 13:20 UTC (permalink / raw)
  To: Peter Xu, Zhijian Li (Fujitsu)
  Cc: Li Zhijian via, Laurent Vivier, Paolo Bonzini

Peter Xu <peterx@redhat.com> writes:

> On Wed, Feb 19, 2025 at 05:33:26AM +0000, Zhijian Li (Fujitsu) wrote:
>> 
>> 
>> On 19/02/2025 06:40, Peter Xu wrote:
>> > On Tue, Feb 18, 2025 at 06:03:48PM -0300, Fabiano Rosas wrote:
>> >> Li Zhijian via <qemu-devel@nongnu.org> writes:
>> >>
>> >>> This qtest requirs there is RXE link in the host.
>> >>>
>> >>> Here is an example to show how to add this RXE link:
>> >>> $ ./new-rdma-link.sh
>> >>> 192.168.22.93
>> >>>
>> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>> >>> ---
>> >>> The RDMA migration was broken again...due to lack of sufficient test/qtest.
>> >>>
>> >>> It's urgly to add and execute a script to establish an RDMA link in
>> >>> the C program. If anyone has a better suggestion, please let me know.
>> >>>
>> >>> $ cat ./new-rdma-link.sh
>> >>> get_ipv4_addr() {
>> >>>          ip -4 -o addr show dev "$1" |
>> >>>                  sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
>> >>> }
>> >>>
>> >>> has_soft_rdma() {
>> >>>          rdma link | grep -q " netdev $1[[:blank:]]*\$"
>> >>> }
>> >>>
>> >>> start_soft_rdma() {
>> >>>          local type
>> >>>
>> >>>          modprobe rdma_rxe || return $?
>> >>>          type=rxe
>> >>>          (
>> >>>                  cd /sys/class/net &&
>> >>>                          for i in *; do
>> >>>                                  [ -e "$i" ] || continue
>> >>>                                  [ "$i" = "lo" ] && continue
>> >>>                                  [ "$(<"$i/addr_len")" = 6 ] || continue
>> >>>                                  [ "$(<"$i/carrier")" = 1 ] || continue
>> >>>                                  has_soft_rdma "$i" && break
>> >>>                                  rdma link add "${i}_$type" type $type netdev "$i" && break
>> >>>                          done
>> >>>                  has_soft_rdma "$i" && echo $i
>> >>>          )
>> >>>
>> >>> }
>> >>>
>> >>> rxe_link=$(start_soft_rdma)
>> >>> [[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
>> >>>
>> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>> >>> ---
>> >>>   tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
>> >>>   tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
>> >>>   2 files changed, 137 insertions(+)
>> >>>   create mode 100644 tests/qtest/migration/new-rdma-link.sh
>> >>>
>> >>> diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
>> >>> new file mode 100644
>> >>> index 00000000000..ca20594eaae
>> >>> --- /dev/null
>> >>> +++ b/tests/qtest/migration/new-rdma-link.sh
>> >>> @@ -0,0 +1,34 @@
>> >>> +#!/bin/bash
>> >>> +
>> >>> +# Copied from blktests
>> >>> +get_ipv4_addr() {
>> >>> +	ip -4 -o addr show dev "$1" |
>> >>> +		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
>> >>> +}
>> >>> +
>> >>> +has_soft_rdma() {
>> >>> +	rdma link | grep -q " netdev $1[[:blank:]]*\$"
>> >>> +}
>> >>> +
>> >>> +start_soft_rdma() {
>> >>> +	local type
>> >>> +
>> >>> +	modprobe rdma_rxe || return $?
>> >>> +	type=rxe
>> >>> +	(
>> >>> +		cd /sys/class/net &&
>> >>> +			for i in *; do
>> >>> +				[ -e "$i" ] || continue
>> >>> +				[ "$i" = "lo" ] && continue
>> >>> +				[ "$(<"$i/addr_len")" = 6 ] || continue
>> >>> +				[ "$(<"$i/carrier")" = 1 ] || continue
>> >>> +				has_soft_rdma "$i" && break
>> >>> +				rdma link add "${i}_$type" type $type netdev "$i" && break
>> >>> +			done
>> >>> +		has_soft_rdma "$i" && echo $i
>> >>> +	)
>> >>> +
>> >>> +}
>> >>> +
>> >>> +rxe_link=$(start_soft_rdma)
>> >>> +[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
>> >>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
>> >>> index 162fa695318..d2a1c9c9438 100644
>> >>> --- a/tests/qtest/migration/precopy-tests.c
>> >>> +++ b/tests/qtest/migration/precopy-tests.c
>> >>> @@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
>> >>>       test_precopy_common(&args);
>> >>>   }
>> >>>   
>> >>> +static int new_rdma_link(char *buffer) {
>> >>> +    // Copied from blktests
>> >>> +    const char *script =
>> >>> +        "#!/bin/bash\n"
>> >>> +        "\n"
>> >>> +        "get_ipv4_addr() {\n"
>> >>> +        "    ip -4 -o addr show dev \"$1\" |\n"
>> >>> +        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
>> >>> +        "}\n"
>> >>> +        "\n"
>> >>> +        "has_soft_rdma() {\n"
>> >>> +        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
>> >>> +        "}\n"
>> >>> +        "\n"
>> >>> +        "start_soft_rdma() {\n"
>> >>> +        "    local type\n"
>> >>> +        "\n"
>> >>> +        "    modprobe rdma_rxe || return $?\n"
>> >>> +        "    type=rxe\n"
>> >>> +        "    (\n"
>> >>> +        "        cd /sys/class/net &&\n"
>> >>> +        "        for i in *; do\n"
>> >>> +        "            [ -e \"$i\" ] || continue\n"
>> >>> +        "            [ \"$i\" = \"lo\" ] && continue\n"
>> >>> +        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
>> >>> +        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
>> >>> +        "            has_soft_rdma \"$i\" && break\n"
>> >>> +        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
>> >>> +        "        done\n"
>> >>> +        "        has_soft_rdma \"$i\" && echo $i\n"
>> >>> +        "    )\n"
>> >>> +        "}\n"
>> >>> +        "\n"
>> >>> +        "rxe_link=$(start_soft_rdma)\n"
>> >>> +        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
>> >>> +
>> >>> +    char script_filename[] = "/tmp/temp_scriptXXXXXX";
>> >>> +    int fd = mkstemp(script_filename);
>> >>> +    if (fd == -1) {
>> >>> +        perror("Failed to create temporary file");
>> >>> +        return 1;
>> >>> +    }
>> >>> +
>> >>> +    FILE *fp = fdopen(fd, "w");
>> >>> +    if (fp == NULL) {
>> >>> +        perror("Failed to open file stream");
>> >>> +        close(fd);
>> >>> +        return 1;
>> >>> +    }
>> >>> +    fprintf(fp, "%s", script);
>> >>> +    fclose(fp);
>> >>> +
>> >>> +    if (chmod(script_filename, 0700) == -1) {
>> >>> +        perror("Failed to set execute permission");
>> >>> +        return 1;
>> >>> +    }
>> >>> +
>> >>> +    FILE *pipe = popen(script_filename, "r");
>> >>> +    if (pipe == NULL) {
>> >>> +        perror("Failed to run script");
>> >>> +        return 1;
>> >>> +    }
>> >>> +
>> >>> +    int idx = 0;
>> >>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
>> >>> +        idx += strlen(buffer);
>> >>> +    }
>> >>> +    if (buffer[idx - 1] == '\n')
>> >>> +        buffer[idx - 1] = 0;
>> >>> +
>> >>> +    int status = pclose(pipe);
>> >>> +    if (status == -1) {
>> >>> +        perror("Error reported by pclose()");
>> >>> +    } else if (!WIFEXITED(status)) {
>> >>> +        printf("Script did not terminate normally\n");
>> >>> +    }
>> >>> +
>> >>> +    remove(script_filename);
>> > 
>> > The script can be put separately instead if hard-coded here, right?
>> 
>> 
>> Sure, If so, I wonder whether the migration-test program is able to know where is this script?
>> 
>> 
>> > 
>> >>> +
>> >>> +    return 0;
>> >>> +}
>> >>> +
>> >>> +static void test_precopy_rdma_plain(void)
>> >>> +{
>> >>> +    char buffer[128] = {};
>> >>> +
>> >>> +    if (new_rdma_link(buffer))
>> >>> +        return;
>> >>> +
>> >>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
>> >>> +
>> >>> +    MigrateCommon args = {
>> >>> +        .listen_uri = uri,
>> >>> +        .connect_uri = uri,
>> >>> +    };
>> >>> +
>> >>> +    test_precopy_common(&args);
>> >>> +}
>> >>> +
>> >>>   static void test_precopy_tcp_plain(void)
>> >>>   {
>> >>>       MigrateCommon args = {
>> >>> @@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>> >>>                          test_multifd_tcp_uri_none);
>> >>>       migration_test_add("/migration/multifd/tcp/plain/cancel",
>> >>>                          test_multifd_tcp_cancel);
>> >>> +#ifdef CONFIG_RDMA
>> >>> +    migration_test_add("/migration/precopy/rdma/plain",
>> >>> +                       test_precopy_rdma_plain);
>> >>> +#endif
>> >>>   }
>> >>>   
>> >>>   void migration_test_add_precopy(MigrationTestEnv *env)
>> >>
>> >> Thanks, that's definitely better than nothing. I'll experiment with this
>> >> locally, see if I can at least run it before sending a pull request.
>> > 
>> > With your newly added --full, IIUC we can add whatever we want there.
>> > E.g. we can add --rdma and iff specified, migration-test adds the rdma test.
>> > 
>> > Or.. skip the test when the rdma link isn't available.
>> > 
>> > If we could separate the script into a file, it'll be better.  We could
>> > create scripts/migration dir and put all migration scripts over there,
>> 
>> We have any other existing script? I didn't find it in current QEMU tree.
>
> We have a few that I'm aware of:
>
>   - analyze-migration.py
>   - vmstate-static-checker.py
>   - userfaultfd-wrlat.py
>

If it cannot be reached from there for some reason, we could copy it to
build/tests/qtest/migration during the build. As a last resort I'm fine
with just having it directly at tests/qtest/migration like this patch
does.

>> 
>> 
>> > then
>> > in the test it tries to detect rdma link and fetch the ip only
>> 
>> It should work without root permission if we just *detect* and *fetch ip*.
>> 
>> Do you also mean we can split new-rdma-link.sh to 2 separate scripts
>> - add-rdma-link.sh # optionally, execute by user before the test (require root permission)
>> - detect-fetch-rdma.sh # execute from the migration-test
>
> Hmm indeed we still need a script to scan over all the ports..
>
> If having --rdma is a good idea, maybe we can further make it a parameter
> to --rdma?
>
>   $ migration-test --rdma $RDMA_IP
>
> Or:
>
>   $ migration-test --rdma-ip $RDMA_IP

I think --rdma only makes sense if it's going to do something
special. The optmimal scenario is that it always runs the test when it
can and sets up/tears down anything it needs.

If it needs root, I'd prefer the test informs about this and does the
work itself.

It would also be good to have the add + detect separate so we have more
flexibility, maybe we manage to enable this in CI even.

So:

./add.sh
migration-test
(runs detect.sh + runs rdma test)
(leaves stuff behind)

migration-test
(skips rdma test with message that it needs root)

sudo migration-test
(runs add.sh + detect.sh + runs rdma test)
(cleans itself up)

Does that make sense to you? I hope it's not too much work.

If you'd like to limit the usage of sudo for running the tests, then we
could indeed add the --rdma option and this would be even more
strict. The good thing of not having --rdma is that I could call add.sh
and then run the full make check afterwards, but that's not a huge deal.

> Then maybe migration-test can directly take that IP and run the tests,
> assuming the admin setup the rdma link.  Then we keep that one script.
>
> Or I assume it's still ok that the test requires root only for --rdma, then
> invoke the script directly in the test.  If so, we'd better also remove the
> rdma link after test finished, so no side effect of the test (modprobe is
> probably fine).
>
> We can wait and see how far Fabiano went with this, and also his opinion.

I haven't got the chance to try the script yet. I still need to figure
out what packages I need from the distro.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page()
  2025-02-19  9:39     ` Zhijian Li (Fujitsu) via
@ 2025-02-19 13:23       ` Peter Xu
  2025-02-20  1:21         ` Zhijian Li (Fujitsu) via
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Xu @ 2025-02-19 13:23 UTC (permalink / raw)
  To: Zhijian Li (Fujitsu)
  Cc: Fabiano Rosas, Li Zhijian via, Laurent Vivier, Paolo Bonzini

On Wed, Feb 19, 2025 at 09:39:38AM +0000, Zhijian Li (Fujitsu) wrote:
> 
> 
> On 19/02/2025 06:03, Peter Xu wrote:
> > On Tue, Feb 18, 2025 at 05:30:40PM -0300, Fabiano Rosas wrote:
> >> Li Zhijian via <qemu-devel@nongnu.org> writes:
> >>
> >>> Address an error in RDMA-based migration by ensuring RDMA is prioritized
> >>> when saving pages in `ram_save_target_page()`.
> >>>
> >>> Previously, the RDMA protocol's page-saving step was placed after other
> >>> protocols due to a refactoring in commit bc38dc2f5f3. This led to migration
> >>> failures characterized by unknown control messages and state loading errors
> >>> destination:
> >>> (qemu) qemu-system-x86_64: Unknown control message QEMU FILE
> >>> qemu-system-x86_64: error while loading state section id 1(ram)
> >>> qemu-system-x86_64: load of migration failed: Operation not permitted
> >>> source:
> >>> (qemu) qemu-system-x86_64: RDMA is in an error state waiting migration to abort!
> >>> qemu-system-x86_64: failed to save SaveStateEntry with id(name): 1(ram): -1
> >>> qemu-system-x86_64: rdma migration: recv polling control error!
> >>> qemu-system-x86_64: warning: Early error. Sending error.
> >>> qemu-system-x86_64: warning: rdma migration: send polling control error
> >>>
> >>> RDMA migration implemented its own protocol/method to send pages to
> >>> destination side, hand over to RDMA first to prevent pages being saved by
> >>> other protocol.
> >>>
> >>> Fixes: bc38dc2f5f3 ("migration: refactor ram_save_target_page functions")
> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> >>> ---
> >>>   migration/ram.c | 9 +++++----
> >>>   1 file changed, 5 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/migration/ram.c b/migration/ram.c
> >>> index 6f460fd22d2..635a2fe443a 100644
> >>> --- a/migration/ram.c
> >>> +++ b/migration/ram.c
> >>> @@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
> >>>       ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> >>>       int res;
> >>>   
> >>> +    /* Hand over to RDMA first */
> >>> +    if (control_save_page(pss, offset, &res)) {
> >>> +        return res;
> >>> +    }
> >>> +
> >>
> >> Can we hoist that migrate_rdma() from inside the function? Since the
> >> other paths already check first before calling their functions.
> > 
> 
> Yeah, it sounds good to me.
> 
> 
> > If we're talking about hoist and stuff.. and if we want to go slightly
> > further, I wonder if we could also drop RAM_SAVE_CONTROL_NOT_SUPP.
> > 
> >      if (!migrate_rdma() || migration_in_postcopy()) {
> >          return RAM_SAVE_CONTROL_NOT_SUPP;
> >      }
> > 
> > We should make sure rdma_control_save_page() won't get invoked at all in
> > either case above..  
> 
> > For postcopy, maybe we could fail in the QMP migrate /
> > migrate_incoming cmd, at migration_channels_and_transport_compatible()
> 
> I tried to kill RAM_SAVE_CONTROL_NOT_SUPP, but It seems it doesn't need to touch any postcopy logic
> "in the QMP migrate / migrate_incoming cmd, at migration_channels_and_transport_compatible()"
> 
> Is there something I might have overlooked?

Yes it looks almost good.  What I meant is (please see below):

> 
> A whole draft diff would be like below:
> It includes 3 parts:
> 
> migration/rdma: Remove unnecessary RAM_SAVE_CONTROL_NOT_SUPP check in rdma_control_save_page()
> migration: kill RAM_SAVE_CONTROL_NOT_SUPP
> migration: open control_save_page() to ram_save_target_page()
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 589b6505eb2..fc6a964fd64 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1143,32 +1143,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
>       return len;
>   }
>   
> -/*
> - * @pages: the number of pages written by the control path,
> - *        < 0 - error
> - *        > 0 - number of pages written
> - *
> - * Return true if the pages has been saved, otherwise false is returned.
> - */
> -static bool control_save_page(PageSearchStatus *pss,
> -                              ram_addr_t offset, int *pages)
> -{
> -    int ret;
> -
> -    ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
> -                                 TARGET_PAGE_SIZE);
> -    if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
> -        return false;
> -    }
> -
> -    if (ret == RAM_SAVE_CONTROL_DELAYED) {
> -        *pages = 1;
> -        return true;
> -    }
> -    *pages = ret;
> -    return true;
> -}
> -
>   /*
>    * directly send the page to the stream
>    *
> @@ -1964,6 +1938,16 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>       ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>       int res;
>   
> +    if (migrate_rdma() && !migration_in_postcopy()) {

Here instead of bypassing postcopy, we should fail the migrate cmd early if
postcopy ever enabled:

diff --git a/migration/migration.c b/migration/migration.c
index 862f469ea7..3a82e71437 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -257,6 +257,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
         return false;
     }
 
+    if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE &&
+        migrate_postcopy_ram()) {
+        error_setg(errp, "RDMA migration doesn't support postcopy");
+        return false;
+    }
+
     return true;
 }

> +        res = rdma_control_save_page(pss->pss_channel, pss->block->offset,
> +                                     offset, TARGET_PAGE_SIZE);
> +
> +        if (res == RAM_SAVE_CONTROL_DELAYED) {
> +            res = 1;
> +        }
> +        return res;
> +    }
> +
>       if (!migrate_multifd()
>           || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
>           if (save_zero_page(rs, pss, offset)) {
> @@ -1976,10 +1960,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>           return ram_save_multifd_page(block, offset);
>       }
>       }
>   
> -    if (control_save_page(pss, offset, &res)) {
> -        return res;
> -    }
> -
>       return ram_save_page(rs, pss);
>   }
>   
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 76fb0349238..c6876347e1e 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -3284,14 +3284,11 @@ err:
>   int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
>                              ram_addr_t offset, size_t size)
>   {
> -    if (!migrate_rdma() || migration_in_postcopy()) {
> -        return RAM_SAVE_CONTROL_NOT_SUPP;
> -    }
> +    assert(migrate_rdma());
>   
>       int ret = qemu_rdma_save_page(f, block_offset, offset, size);
>   
> -    if (ret != RAM_SAVE_CONTROL_DELAYED &&
> -        ret != RAM_SAVE_CONTROL_NOT_SUPP) {
> +    if (ret != RAM_SAVE_CONTROL_DELAYED) {
>           if (ret < 0) {
>               qemu_file_set_error(f, ret);
>           }
> diff --git a/migration/rdma.h b/migration/rdma.h
> index f55f28bbed1..bb0296c3726 100644
> --- a/migration/rdma.h
> +++ b/migration/rdma.h
> @@ -33,7 +33,6 @@ void rdma_start_incoming_migration(InetSocketAddress *host_port, Error **errp);
>   #define RAM_CONTROL_ROUND     1
>   #define RAM_CONTROL_FINISH    3
>   
> -#define RAM_SAVE_CONTROL_NOT_SUPP -1000
>   #define RAM_SAVE_CONTROL_DELAYED  -2000
>   
>   #ifdef CONFIG_RDMA
> @@ -56,7 +55,9 @@ static inline
>   int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
>                              ram_addr_t offset, size_t size)
>   {
> -    return RAM_SAVE_CONTROL_NOT_SUPP;
> +    /* never reach */
> +    assert(0);
> +    return -1;
>   }
>   #endif
>   #endif
> 
> 
> 
> 
> Thanks
> Zhijian
> 
> > 
> >>
> >>>       if (!migrate_multifd()
> >>>           || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
> >>>           if (save_zero_page(rs, pss, offset)) {
> >>> @@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
> >>>           return ram_save_multifd_page(block, offset);
> >>>       }
> >>>   
> >>> -    if (control_save_page(pss, offset, &res)) {
> >>> -        return res;
> >>> -    }
> >>> -
> >>>       return ram_save_page(rs, pss);
> >>>   }
> >>
> > 

-- 
Peter Xu



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-19 13:20           ` Fabiano Rosas
@ 2025-02-19 14:11             ` Peter Xu
  2025-02-20  9:40               ` Li Zhijian via
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Xu @ 2025-02-19 14:11 UTC (permalink / raw)
  To: Fabiano Rosas
  Cc: Zhijian Li (Fujitsu), Li Zhijian via, Laurent Vivier,
	Paolo Bonzini

On Wed, Feb 19, 2025 at 10:20:21AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Wed, Feb 19, 2025 at 05:33:26AM +0000, Zhijian Li (Fujitsu) wrote:
> >> 
> >> 
> >> On 19/02/2025 06:40, Peter Xu wrote:
> >> > On Tue, Feb 18, 2025 at 06:03:48PM -0300, Fabiano Rosas wrote:
> >> >> Li Zhijian via <qemu-devel@nongnu.org> writes:
> >> >>
> >> >>> This qtest requirs there is RXE link in the host.
> >> >>>
> >> >>> Here is an example to show how to add this RXE link:
> >> >>> $ ./new-rdma-link.sh
> >> >>> 192.168.22.93
> >> >>>
> >> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> >> >>> ---
> >> >>> The RDMA migration was broken again...due to lack of sufficient test/qtest.
> >> >>>
> >> >>> It's urgly to add and execute a script to establish an RDMA link in
> >> >>> the C program. If anyone has a better suggestion, please let me know.
> >> >>>
> >> >>> $ cat ./new-rdma-link.sh
> >> >>> get_ipv4_addr() {
> >> >>>          ip -4 -o addr show dev "$1" |
> >> >>>                  sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> >> >>> }
> >> >>>
> >> >>> has_soft_rdma() {
> >> >>>          rdma link | grep -q " netdev $1[[:blank:]]*\$"
> >> >>> }
> >> >>>
> >> >>> start_soft_rdma() {
> >> >>>          local type
> >> >>>
> >> >>>          modprobe rdma_rxe || return $?
> >> >>>          type=rxe
> >> >>>          (
> >> >>>                  cd /sys/class/net &&
> >> >>>                          for i in *; do
> >> >>>                                  [ -e "$i" ] || continue
> >> >>>                                  [ "$i" = "lo" ] && continue
> >> >>>                                  [ "$(<"$i/addr_len")" = 6 ] || continue
> >> >>>                                  [ "$(<"$i/carrier")" = 1 ] || continue
> >> >>>                                  has_soft_rdma "$i" && break
> >> >>>                                  rdma link add "${i}_$type" type $type netdev "$i" && break
> >> >>>                          done
> >> >>>                  has_soft_rdma "$i" && echo $i
> >> >>>          )
> >> >>>
> >> >>> }
> >> >>>
> >> >>> rxe_link=$(start_soft_rdma)
> >> >>> [[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> >> >>>
> >> >>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> >> >>> ---
> >> >>>   tests/qtest/migration/new-rdma-link.sh |  34 ++++++++
> >> >>>   tests/qtest/migration/precopy-tests.c  | 103 +++++++++++++++++++++++++
> >> >>>   2 files changed, 137 insertions(+)
> >> >>>   create mode 100644 tests/qtest/migration/new-rdma-link.sh
> >> >>>
> >> >>> diff --git a/tests/qtest/migration/new-rdma-link.sh b/tests/qtest/migration/new-rdma-link.sh
> >> >>> new file mode 100644
> >> >>> index 00000000000..ca20594eaae
> >> >>> --- /dev/null
> >> >>> +++ b/tests/qtest/migration/new-rdma-link.sh
> >> >>> @@ -0,0 +1,34 @@
> >> >>> +#!/bin/bash
> >> >>> +
> >> >>> +# Copied from blktests
> >> >>> +get_ipv4_addr() {
> >> >>> +	ip -4 -o addr show dev "$1" |
> >> >>> +		sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p'
> >> >>> +}
> >> >>> +
> >> >>> +has_soft_rdma() {
> >> >>> +	rdma link | grep -q " netdev $1[[:blank:]]*\$"
> >> >>> +}
> >> >>> +
> >> >>> +start_soft_rdma() {
> >> >>> +	local type
> >> >>> +
> >> >>> +	modprobe rdma_rxe || return $?
> >> >>> +	type=rxe
> >> >>> +	(
> >> >>> +		cd /sys/class/net &&
> >> >>> +			for i in *; do
> >> >>> +				[ -e "$i" ] || continue
> >> >>> +				[ "$i" = "lo" ] && continue
> >> >>> +				[ "$(<"$i/addr_len")" = 6 ] || continue
> >> >>> +				[ "$(<"$i/carrier")" = 1 ] || continue
> >> >>> +				has_soft_rdma "$i" && break
> >> >>> +				rdma link add "${i}_$type" type $type netdev "$i" && break
> >> >>> +			done
> >> >>> +		has_soft_rdma "$i" && echo $i
> >> >>> +	)
> >> >>> +
> >> >>> +}
> >> >>> +
> >> >>> +rxe_link=$(start_soft_rdma)
> >> >>> +[[ "$rxe_link" ]] && get_ipv4_addr $rxe_link
> >> >>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> >> >>> index 162fa695318..d2a1c9c9438 100644
> >> >>> --- a/tests/qtest/migration/precopy-tests.c
> >> >>> +++ b/tests/qtest/migration/precopy-tests.c
> >> >>> @@ -98,6 +98,105 @@ static void test_precopy_unix_dirty_ring(void)
> >> >>>       test_precopy_common(&args);
> >> >>>   }
> >> >>>   
> >> >>> +static int new_rdma_link(char *buffer) {
> >> >>> +    // Copied from blktests
> >> >>> +    const char *script =
> >> >>> +        "#!/bin/bash\n"
> >> >>> +        "\n"
> >> >>> +        "get_ipv4_addr() {\n"
> >> >>> +        "    ip -4 -o addr show dev \"$1\" |\n"
> >> >>> +        "    sed -n 's/.*[[:blank:]]inet[[:blank:]]*\\([^[:blank:]/]*\\).*/\\1/p'\n"
> >> >>> +        "}\n"
> >> >>> +        "\n"
> >> >>> +        "has_soft_rdma() {\n"
> >> >>> +        "    rdma link | grep -q \" netdev $1[[:blank:]]*\\$\"\n"
> >> >>> +        "}\n"
> >> >>> +        "\n"
> >> >>> +        "start_soft_rdma() {\n"
> >> >>> +        "    local type\n"
> >> >>> +        "\n"
> >> >>> +        "    modprobe rdma_rxe || return $?\n"
> >> >>> +        "    type=rxe\n"
> >> >>> +        "    (\n"
> >> >>> +        "        cd /sys/class/net &&\n"
> >> >>> +        "        for i in *; do\n"
> >> >>> +        "            [ -e \"$i\" ] || continue\n"
> >> >>> +        "            [ \"$i\" = \"lo\" ] && continue\n"
> >> >>> +        "            [ \"$(<$i/addr_len)\" = 6 ] || continue\n"
> >> >>> +        "            [ \"$(<$i/carrier)\" = 1 ] || continue\n"
> >> >>> +        "            has_soft_rdma \"$i\" && break\n"
> >> >>> +        "            rdma link add \"${i}_$type\" type $type netdev \"$i\" && break\n"
> >> >>> +        "        done\n"
> >> >>> +        "        has_soft_rdma \"$i\" && echo $i\n"
> >> >>> +        "    )\n"
> >> >>> +        "}\n"
> >> >>> +        "\n"
> >> >>> +        "rxe_link=$(start_soft_rdma)\n"
> >> >>> +        "[[ \"$rxe_link\" ]] && get_ipv4_addr $rxe_link\n";
> >> >>> +
> >> >>> +    char script_filename[] = "/tmp/temp_scriptXXXXXX";
> >> >>> +    int fd = mkstemp(script_filename);
> >> >>> +    if (fd == -1) {
> >> >>> +        perror("Failed to create temporary file");
> >> >>> +        return 1;
> >> >>> +    }
> >> >>> +
> >> >>> +    FILE *fp = fdopen(fd, "w");
> >> >>> +    if (fp == NULL) {
> >> >>> +        perror("Failed to open file stream");
> >> >>> +        close(fd);
> >> >>> +        return 1;
> >> >>> +    }
> >> >>> +    fprintf(fp, "%s", script);
> >> >>> +    fclose(fp);
> >> >>> +
> >> >>> +    if (chmod(script_filename, 0700) == -1) {
> >> >>> +        perror("Failed to set execute permission");
> >> >>> +        return 1;
> >> >>> +    }
> >> >>> +
> >> >>> +    FILE *pipe = popen(script_filename, "r");
> >> >>> +    if (pipe == NULL) {
> >> >>> +        perror("Failed to run script");
> >> >>> +        return 1;
> >> >>> +    }
> >> >>> +
> >> >>> +    int idx = 0;
> >> >>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> >> >>> +        idx += strlen(buffer);
> >> >>> +    }
> >> >>> +    if (buffer[idx - 1] == '\n')
> >> >>> +        buffer[idx - 1] = 0;
> >> >>> +
> >> >>> +    int status = pclose(pipe);
> >> >>> +    if (status == -1) {
> >> >>> +        perror("Error reported by pclose()");
> >> >>> +    } else if (!WIFEXITED(status)) {
> >> >>> +        printf("Script did not terminate normally\n");
> >> >>> +    }
> >> >>> +
> >> >>> +    remove(script_filename);
> >> > 
> >> > The script can be put separately instead if hard-coded here, right?
> >> 
> >> 
> >> Sure, If so, I wonder whether the migration-test program is able to know where is this script?
> >> 
> >> 
> >> > 
> >> >>> +
> >> >>> +    return 0;
> >> >>> +}
> >> >>> +
> >> >>> +static void test_precopy_rdma_plain(void)
> >> >>> +{
> >> >>> +    char buffer[128] = {};
> >> >>> +
> >> >>> +    if (new_rdma_link(buffer))
> >> >>> +        return;
> >> >>> +
> >> >>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
> >> >>> +
> >> >>> +    MigrateCommon args = {
> >> >>> +        .listen_uri = uri,
> >> >>> +        .connect_uri = uri,
> >> >>> +    };
> >> >>> +
> >> >>> +    test_precopy_common(&args);
> >> >>> +}
> >> >>> +
> >> >>>   static void test_precopy_tcp_plain(void)
> >> >>>   {
> >> >>>       MigrateCommon args = {
> >> >>> @@ -968,6 +1067,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
> >> >>>                          test_multifd_tcp_uri_none);
> >> >>>       migration_test_add("/migration/multifd/tcp/plain/cancel",
> >> >>>                          test_multifd_tcp_cancel);
> >> >>> +#ifdef CONFIG_RDMA
> >> >>> +    migration_test_add("/migration/precopy/rdma/plain",
> >> >>> +                       test_precopy_rdma_plain);
> >> >>> +#endif
> >> >>>   }
> >> >>>   
> >> >>>   void migration_test_add_precopy(MigrationTestEnv *env)
> >> >>
> >> >> Thanks, that's definitely better than nothing. I'll experiment with this
> >> >> locally, see if I can at least run it before sending a pull request.
> >> > 
> >> > With your newly added --full, IIUC we can add whatever we want there.
> >> > E.g. we can add --rdma and iff specified, migration-test adds the rdma test.
> >> > 
> >> > Or.. skip the test when the rdma link isn't available.
> >> > 
> >> > If we could separate the script into a file, it'll be better.  We could
> >> > create scripts/migration dir and put all migration scripts over there,
> >> 
> >> We have any other existing script? I didn't find it in current QEMU tree.
> >
> > We have a few that I'm aware of:
> >
> >   - analyze-migration.py
> >   - vmstate-static-checker.py
> >   - userfaultfd-wrlat.py
> >
> 
> If it cannot be reached from there for some reason, we could copy it to
> build/tests/qtest/migration during the build. As a last resort I'm fine
> with just having it directly at tests/qtest/migration like this patch
> does.

Yes, if we want to have the test being able to trigger the script, we can
put it under tests/qtest/migration/.

> 
> >> 
> >> 
> >> > then
> >> > in the test it tries to detect rdma link and fetch the ip only
> >> 
> >> It should work without root permission if we just *detect* and *fetch ip*.
> >> 
> >> Do you also mean we can split new-rdma-link.sh to 2 separate scripts
> >> - add-rdma-link.sh # optionally, execute by user before the test (require root permission)
> >> - detect-fetch-rdma.sh # execute from the migration-test
> >
> > Hmm indeed we still need a script to scan over all the ports..
> >
> > If having --rdma is a good idea, maybe we can further make it a parameter
> > to --rdma?
> >
> >   $ migration-test --rdma $RDMA_IP
> >
> > Or:
> >
> >   $ migration-test --rdma-ip $RDMA_IP
> 
> I think --rdma only makes sense if it's going to do something
> special. The optmimal scenario is that it always runs the test when it
> can and sets up/tears down anything it needs.
> 
> If it needs root, I'd prefer the test informs about this and does the
> work itself.
> 
> It would also be good to have the add + detect separate so we have more
> flexibility, maybe we manage to enable this in CI even.
> 
> So:
> 
> ./add.sh
> migration-test
> (runs detect.sh + runs rdma test)
> (leaves stuff behind)
> 
> migration-test
> (skips rdma test with message that it needs root)
> 
> sudo migration-test
> (runs add.sh + detect.sh + runs rdma test)
> (cleans itself up)
> 
> Does that make sense to you? I hope it's not too much work.

Looks good here.  We can also keep all the rdma stuff into one file, taking
parameters.

./rdma-helper.sh setup
./rdma-helper.sh detect-ip

> 
> If you'd like to limit the usage of sudo for running the tests, then we
> could indeed add the --rdma option and this would be even more
> strict. The good thing of not having --rdma is that I could call add.sh
> and then run the full make check afterwards, but that's not a huge deal.
> 
> > Then maybe migration-test can directly take that IP and run the tests,
> > assuming the admin setup the rdma link.  Then we keep that one script.
> >
> > Or I assume it's still ok that the test requires root only for --rdma, then
> > invoke the script directly in the test.  If so, we'd better also remove the
> > rdma link after test finished, so no side effect of the test (modprobe is
> > probably fine).
> >
> > We can wait and see how far Fabiano went with this, and also his opinion.
> 
> I haven't got the chance to try the script yet. I still need to figure
> out what packages I need from the distro.

For misterious reasons I seem to have all the libs needed, probably I tried
to build RDMA at some point and do something with it.  So I gave it a shot
quickly, I can reproduce Zhijian's failure, and patch 1 fixes it.

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page()
  2025-02-19 13:23       ` Peter Xu
@ 2025-02-20  1:21         ` Zhijian Li (Fujitsu) via
  0 siblings, 0 replies; 16+ messages in thread
From: Zhijian Li (Fujitsu) via @ 2025-02-20  1:21 UTC (permalink / raw)
  To: Peter Xu; +Cc: Fabiano Rosas, Li Zhijian via, Laurent Vivier, Paolo Bonzini



On 19/02/2025 21:23, Peter Xu wrote:
>> I tried to kill RAM_SAVE_CONTROL_NOT_SUPP, but It seems it doesn't need to touch any postcopy logic
>> "in the QMP migrate / migrate_incoming cmd, at migration_channels_and_transport_compatible()"
>>
>> Is there something I might have overlooked?
> Yes it looks almost good.  What I meant is (please see below):
> 
>> A whole draft diff would be like below:
>> It includes 3 parts:
>>
>> migration/rdma: Remove unnecessary RAM_SAVE_CONTROL_NOT_SUPP check in rdma_control_save_page()
>> migration: kill RAM_SAVE_CONTROL_NOT_SUPP
>> migration: open control_save_page() to ram_save_target_page()
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 589b6505eb2..fc6a964fd64 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1143,32 +1143,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
>>        return len;
>>    }
>>    
>> -/*
>> - * @pages: the number of pages written by the control path,
>> - *        < 0 - error
>> - *        > 0 - number of pages written
>> - *
>> - * Return true if the pages has been saved, otherwise false is returned.
>> - */
>> -static bool control_save_page(PageSearchStatus *pss,
>> -                              ram_addr_t offset, int *pages)
>> -{
>> -    int ret;
>> -
>> -    ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
>> -                                 TARGET_PAGE_SIZE);
>> -    if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
>> -        return false;
>> -    }
>> -
>> -    if (ret == RAM_SAVE_CONTROL_DELAYED) {
>> -        *pages = 1;
>> -        return true;
>> -    }
>> -    *pages = ret;
>> -    return true;
>> -}
>> -
>>    /*
>>     * directly send the page to the stream
>>     *
>> @@ -1964,6 +1938,16 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>>        ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>>        int res;
>>    
>> +    if (migrate_rdma() && !migration_in_postcopy()) {
> Here instead of bypassing postcopy, we should fail the migrate cmd early if
> postcopy ever enabled:
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 862f469ea7..3a82e71437 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -257,6 +257,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
>           return false;
>       }
>   
> +    if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE &&
> +        migrate_postcopy_ram()) {

I think there is a typo
s/MIGRATION_ADDRESS_TYPE_FILE/MIGRATION_ADDRESS_TYPE_RDMA


> +        error_setg(errp, "RDMA migration doesn't support postcopy");

IIUC, your change means RDMA + postcopy is no longer supported. I didn't realize this before.
Additionally, we might consider eliminating all remaining `migration_in_postcopy()` conditions in the current `rdma.c` file.

Thanks
Zhijian

> +        return false;
> +    }
> +
>       return true;
>   }

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-19 14:11             ` Peter Xu
@ 2025-02-20  9:40               ` Li Zhijian via
  2025-02-20 15:55                 ` Peter Xu
  0 siblings, 1 reply; 16+ messages in thread
From: Li Zhijian via @ 2025-02-20  9:40 UTC (permalink / raw)
  To: Peter Xu, Fabiano Rosas, qemu-devel
  Cc: Laurent Vivier, Paolo Bonzini, Li Zhijian

On 19/02/2025 22:11, Peter Xu wrote:
>>>>> then
>>>>> in the test it tries to detect rdma link and fetch the ip only
>>>> It should work without root permission if we just*detect*  and*fetch ip*.
>>>>
>>>> Do you also mean we can split new-rdma-link.sh to 2 separate scripts
>>>> - add-rdma-link.sh # optionally, execute by user before the test (require root permission)
>>>> - detect-fetch-rdma.sh # execute from the migration-test
>>> Hmm indeed we still need a script to scan over all the ports..
>>>
>>> If having --rdma is a good idea, maybe we can further make it a parameter
>>> to --rdma?
>>>
>>>    $ migration-test --rdma $RDMA_IP
>>>
>>> Or:
>>>
>>>    $ migration-test --rdma-ip $RDMA_IP
>> I think --rdma only makes sense if it's going to do something
>> special. The optmimal scenario is that it always runs the test when it
>> can and sets up/tears down anything it needs.
>>
>> If it needs root, I'd prefer the test informs about this and does the
>> work itself.
>>
>> It would also be good to have the add + detect separate so we have more
>> flexibility, maybe we manage to enable this in CI even.
>>
>> So:
>>
>> ./add.sh
>> migration-test
>> (runs detect.sh + runs rdma test)
>> (leaves stuff behind)
>>
>> migration-test
>> (skips rdma test with message that it needs root)
>>
>> sudo migration-test
>> (runs add.sh + detect.sh + runs rdma test)
>> (cleans itself up)
>>
>> Does that make sense to you? I hope it's not too much work.
> Looks good here.  We can also keep all the rdma stuff into one file, taking
> parameters.
> 
> ./rdma-helper.sh setup
> ./rdma-helper.sh detect-ip

Hi Peter and Fabiano

Many thanks for your kindly idea and suggestion.
Please take another look at the changes below.
- I don't copy script to the build dir, just execute the script like misc-tests.c
- It will automatically create a new RXE if it doesn't exit when running in root

[PATCH] migration: Add qtest for migration over RDMA

This qtest requires there is RDMA(RoCE) link in the host.
Introduce a scripts/rdma-migration-helper.sh to
- setup a new RXE if it's root
- detect existing RoCE link
to make the qtest work smoothly.

Test will be skip if there is no available RoCE link.
 # Start of rdma tests
 # Running /x86_64/migration/precopy/rdma/plain
 ok 1 /x86_64/migration/precopy/rdma/plain # SKIP There is no available rdma link in the host.
 Maybe you are not running with the root permission
 # End of rdma tests

Admin is able to remove the RXE by passing 'cleanup' to this script.

Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
 scripts/rdma-migration-helper.sh      | 40 +++++++++++++++++++
 tests/qtest/migration/precopy-tests.c | 57 +++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100755 scripts/rdma-migration-helper.sh

diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
new file mode 100755
index 0000000000..4ef62baf0f
--- /dev/null
+++ b/scripts/rdma-migration-helper.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copied from blktests
+get_ipv4_addr() {
+    ip -4 -o addr show dev "$1" |
+        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
+        tr -d '\n'
+}
+
+has_soft_rdma() {
+    rdma link | grep -q " netdev $1[[:blank:]]*\$"
+}
+
+rdma_rxe_setup_detect()
+{
+    (
+        cd /sys/class/net &&
+            for i in *; do
+                [ -e "$i" ] || continue
+                [ "$i" = "lo" ] && continue
+                [ "$(<"$i/addr_len")" = 6 ] || continue
+                [ "$(<"$i/carrier")" = 1 ] || continue
+
+                has_soft_rdma "$i" && break
+                [ "$operation" = "setup" ] && rdma link add "${i}_rxe" type rxe netdev "$i" && break
+            done
+        has_soft_rdma "$i" || return
+        get_ipv4_addr $i
+    )
+}
+
+operation=${1:-setup}
+
+if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
+    rdma_rxe_setup_detect
+elif [ "$operation" == "cleanup" ]; then
+    modprobe -r rdma_rxe
+else
+    echo "Usage: $0 [setup | cleanup | detect]"
+fi
diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
index ba273d10b9..8c72eb699b 100644
--- a/tests/qtest/migration/precopy-tests.c
+++ b/tests/qtest/migration/precopy-tests.c
@@ -99,6 +99,59 @@ static void test_precopy_unix_dirty_ring(void)
     test_precopy_common(&args);
 }
 
+#ifdef CONFIG_RDMA
+
+#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
+static int new_rdma_link(char *buffer) {
+    const char *argument = (geteuid() == 0) ? "setup" : "detect";
+    char command[1024];
+
+    snprintf(command, sizeof(command), "%s %s", RDMA_MIGRATION_HELPER, argument);
+
+    FILE *pipe = popen(command, "r");
+    if (pipe == NULL) {
+        perror("Failed to run script");
+        return -1;
+    }
+
+    int idx = 0;
+    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
+        idx += strlen(buffer);
+    }
+
+    int status = pclose(pipe);
+    if (status == -1) {
+        perror("Error reported by pclose()");
+        return -1;
+    } else if (WIFEXITED(status)) {
+        return WEXITSTATUS(status);
+    }
+
+    return -1;
+}
+
+static void test_precopy_rdma_plain(void)
+{
+    char buffer[128] = {};
+
+    if (new_rdma_link(buffer)) {
+        g_test_skip("There is no available rdma link in the host.\n"
+                    "Maybe you are not running with the root permission");
+        return;
+    }
+
+    /* FIXME: query a free port instead of hard code. */
+    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
+
+    MigrateCommon args = {
+        .listen_uri = uri,
+        .connect_uri = uri,
+    };
+
+    test_precopy_common(&args);
+}
+#endif
+
 static void test_precopy_tcp_plain(void)
 {
     MigrateCommon args = {
@@ -1124,6 +1177,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
                        test_multifd_tcp_uri_none);
     migration_test_add("/migration/multifd/tcp/plain/cancel",
                        test_multifd_tcp_cancel);
+#ifdef CONFIG_RDMA
+    migration_test_add("/migration/precopy/rdma/plain",
+                       test_precopy_rdma_plain);
+#endif
 }
 
 void migration_test_add_precopy(MigrationTestEnv *env)
-- 
2.41.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-20  9:40               ` Li Zhijian via
@ 2025-02-20 15:55                 ` Peter Xu
  2025-02-21  1:32                   ` Zhijian Li (Fujitsu) via
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Xu @ 2025-02-20 15:55 UTC (permalink / raw)
  To: Li Zhijian; +Cc: Fabiano Rosas, qemu-devel, Laurent Vivier, Paolo Bonzini

On Thu, Feb 20, 2025 at 05:40:38PM +0800, Li Zhijian wrote:
> On 19/02/2025 22:11, Peter Xu wrote:
> >>>>> then
> >>>>> in the test it tries to detect rdma link and fetch the ip only
> >>>> It should work without root permission if we just*detect*  and*fetch ip*.
> >>>>
> >>>> Do you also mean we can split new-rdma-link.sh to 2 separate scripts
> >>>> - add-rdma-link.sh # optionally, execute by user before the test (require root permission)
> >>>> - detect-fetch-rdma.sh # execute from the migration-test
> >>> Hmm indeed we still need a script to scan over all the ports..
> >>>
> >>> If having --rdma is a good idea, maybe we can further make it a parameter
> >>> to --rdma?
> >>>
> >>>    $ migration-test --rdma $RDMA_IP
> >>>
> >>> Or:
> >>>
> >>>    $ migration-test --rdma-ip $RDMA_IP
> >> I think --rdma only makes sense if it's going to do something
> >> special. The optmimal scenario is that it always runs the test when it
> >> can and sets up/tears down anything it needs.
> >>
> >> If it needs root, I'd prefer the test informs about this and does the
> >> work itself.
> >>
> >> It would also be good to have the add + detect separate so we have more
> >> flexibility, maybe we manage to enable this in CI even.
> >>
> >> So:
> >>
> >> ./add.sh
> >> migration-test
> >> (runs detect.sh + runs rdma test)
> >> (leaves stuff behind)
> >>
> >> migration-test
> >> (skips rdma test with message that it needs root)
> >>
> >> sudo migration-test
> >> (runs add.sh + detect.sh + runs rdma test)
> >> (cleans itself up)
> >>
> >> Does that make sense to you? I hope it's not too much work.
> > Looks good here.  We can also keep all the rdma stuff into one file, taking
> > parameters.
> > 
> > ./rdma-helper.sh setup
> > ./rdma-helper.sh detect-ip
> 
> Hi Peter and Fabiano
> 
> Many thanks for your kindly idea and suggestion.
> Please take another look at the changes below.
> - I don't copy script to the build dir, just execute the script like misc-tests.c
> - It will automatically create a new RXE if it doesn't exit when running in root

Thanks!  This is much better.  Comments below.

> 
> [PATCH] migration: Add qtest for migration over RDMA
> 
> This qtest requires there is RDMA(RoCE) link in the host.
> Introduce a scripts/rdma-migration-helper.sh to
> - setup a new RXE if it's root
> - detect existing RoCE link
> to make the qtest work smoothly.
> 
> Test will be skip if there is no available RoCE link.
>  # Start of rdma tests
>  # Running /x86_64/migration/precopy/rdma/plain
>  ok 1 /x86_64/migration/precopy/rdma/plain # SKIP There is no available rdma link in the host.
>  Maybe you are not running with the root permission
>  # End of rdma tests
> 
> Admin is able to remove the RXE by passing 'cleanup' to this script.
> 
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> ---
>  scripts/rdma-migration-helper.sh      | 40 +++++++++++++++++++
>  tests/qtest/migration/precopy-tests.c | 57 +++++++++++++++++++++++++++
>  2 files changed, 97 insertions(+)
>  create mode 100755 scripts/rdma-migration-helper.sh
> 
> diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
> new file mode 100755
> index 0000000000..4ef62baf0f
> --- /dev/null
> +++ b/scripts/rdma-migration-helper.sh
> @@ -0,0 +1,40 @@
> +#!/bin/bash
> +
> +# Copied from blktests
> +get_ipv4_addr() {
> +    ip -4 -o addr show dev "$1" |
> +        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
> +        tr -d '\n'
> +}
> +
> +has_soft_rdma() {
> +    rdma link | grep -q " netdev $1[[:blank:]]*\$"
> +}
> +
> +rdma_rxe_setup_detect()
> +{
> +    (
> +        cd /sys/class/net &&
> +            for i in *; do
> +                [ -e "$i" ] || continue
> +                [ "$i" = "lo" ] && continue
> +                [ "$(<"$i/addr_len")" = 6 ] || continue
> +                [ "$(<"$i/carrier")" = 1 ] || continue
> +
> +                has_soft_rdma "$i" && break
> +                [ "$operation" = "setup" ] && rdma link add "${i}_rxe" type rxe netdev "$i" && break
> +            done
> +        has_soft_rdma "$i" || return
> +        get_ipv4_addr $i
> +    )
> +}
> +
> +operation=${1:-setup}
> +
> +if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
> +    rdma_rxe_setup_detect
> +elif [ "$operation" == "cleanup" ]; then
> +    modprobe -r rdma_rxe

Would this affects host when there's existing user of rdma_rxe (or fail
with -EBUSY)?  If there's no major side effect of leftover rdma link, we
could also drop the "cleanup" for now and start from simple.

> +else
> +    echo "Usage: $0 [setup | cleanup | detect]"
> +fi
> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
> index ba273d10b9..8c72eb699b 100644
> --- a/tests/qtest/migration/precopy-tests.c
> +++ b/tests/qtest/migration/precopy-tests.c
> @@ -99,6 +99,59 @@ static void test_precopy_unix_dirty_ring(void)
>      test_precopy_common(&args);
>  }
>  
> +#ifdef CONFIG_RDMA
> +
> +#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
> +static int new_rdma_link(char *buffer) {

Nit: newline before "{".

> +    const char *argument = (geteuid() == 0) ? "setup" : "detect";
> +    char command[1024];
> +
> +    snprintf(command, sizeof(command), "%s %s", RDMA_MIGRATION_HELPER, argument);
> +
> +    FILE *pipe = popen(command, "r");
> +    if (pipe == NULL) {
> +        perror("Failed to run script");
> +        return -1;
> +    }
> +
> +    int idx = 0;
> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
> +        idx += strlen(buffer);
> +    }
> +
> +    int status = pclose(pipe);
> +    if (status == -1) {
> +        perror("Error reported by pclose()");
> +        return -1;
> +    } else if (WIFEXITED(status)) {
> +        return WEXITSTATUS(status);
> +    }
> +
> +    return -1;
> +}
> +
> +static void test_precopy_rdma_plain(void)
> +{
> +    char buffer[128] = {};
> +
> +    if (new_rdma_link(buffer)) {
> +        g_test_skip("There is no available rdma link in the host.\n"
> +                    "Maybe you are not running with the root permission");

Nit: can be slightly more verbose?

           g_test_skip("\nThere is no available rdma link to run RDMA
                       migration test.  To enable the test:\n"
                       "(1) Run \'%s setup\' with root and rerun the test\n"
                       "(2) Run the test with root privilege\n",
                       RDMA_MIGRATION_HELPER);


> +        return;
> +    }
> +
> +    /* FIXME: query a free port instead of hard code. */
> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
> +
> +    MigrateCommon args = {
> +        .listen_uri = uri,
> +        .connect_uri = uri,
> +    };
> +
> +    test_precopy_common(&args);
> +}
> +#endif
> +
>  static void test_precopy_tcp_plain(void)
>  {
>      MigrateCommon args = {
> @@ -1124,6 +1177,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>                         test_multifd_tcp_uri_none);
>      migration_test_add("/migration/multifd/tcp/plain/cancel",
>                         test_multifd_tcp_cancel);
> +#ifdef CONFIG_RDMA
> +    migration_test_add("/migration/precopy/rdma/plain",
> +                       test_precopy_rdma_plain);
> +#endif
>  }
>  
>  void migration_test_add_precopy(MigrationTestEnv *env)
> -- 
> 2.41.0
> 

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA
  2025-02-20 15:55                 ` Peter Xu
@ 2025-02-21  1:32                   ` Zhijian Li (Fujitsu) via
  0 siblings, 0 replies; 16+ messages in thread
From: Zhijian Li (Fujitsu) via @ 2025-02-21  1:32 UTC (permalink / raw)
  To: Peter Xu
  Cc: Fabiano Rosas, qemu-devel@nongnu.org, Laurent Vivier,
	Paolo Bonzini



On 20/02/2025 23:55, Peter Xu wrote:
> On Thu, Feb 20, 2025 at 05:40:38PM +0800, Li Zhijian wrote:
>> On 19/02/2025 22:11, Peter Xu wrote:
>>>>>>> then
>>>>>>> in the test it tries to detect rdma link and fetch the ip only
>>>>>> It should work without root permission if we just*detect*  and*fetch ip*.
>>>>>>
>>>>>> Do you also mean we can split new-rdma-link.sh to 2 separate scripts
>>>>>> - add-rdma-link.sh # optionally, execute by user before the test (require root permission)
>>>>>> - detect-fetch-rdma.sh # execute from the migration-test
>>>>> Hmm indeed we still need a script to scan over all the ports..
>>>>>
>>>>> If having --rdma is a good idea, maybe we can further make it a parameter
>>>>> to --rdma?
>>>>>
>>>>>     $ migration-test --rdma $RDMA_IP
>>>>>
>>>>> Or:
>>>>>
>>>>>     $ migration-test --rdma-ip $RDMA_IP
>>>> I think --rdma only makes sense if it's going to do something
>>>> special. The optmimal scenario is that it always runs the test when it
>>>> can and sets up/tears down anything it needs.
>>>>
>>>> If it needs root, I'd prefer the test informs about this and does the
>>>> work itself.
>>>>
>>>> It would also be good to have the add + detect separate so we have more
>>>> flexibility, maybe we manage to enable this in CI even.
>>>>
>>>> So:
>>>>
>>>> ./add.sh
>>>> migration-test
>>>> (runs detect.sh + runs rdma test)
>>>> (leaves stuff behind)
>>>>
>>>> migration-test
>>>> (skips rdma test with message that it needs root)
>>>>
>>>> sudo migration-test
>>>> (runs add.sh + detect.sh + runs rdma test)
>>>> (cleans itself up)
>>>>
>>>> Does that make sense to you? I hope it's not too much work.
>>> Looks good here.  We can also keep all the rdma stuff into one file, taking
>>> parameters.
>>>
>>> ./rdma-helper.sh setup
>>> ./rdma-helper.sh detect-ip
>>
>> Hi Peter and Fabiano
>>
>> Many thanks for your kindly idea and suggestion.
>> Please take another look at the changes below.
>> - I don't copy script to the build dir, just execute the script like misc-tests.c
>> - It will automatically create a new RXE if it doesn't exit when running in root
> 
> Thanks!  This is much better.  Comments below.
> 
>>
>> [PATCH] migration: Add qtest for migration over RDMA
>>
>> This qtest requires there is RDMA(RoCE) link in the host.
>> Introduce a scripts/rdma-migration-helper.sh to
>> - setup a new RXE if it's root
>> - detect existing RoCE link
>> to make the qtest work smoothly.
>>
>> Test will be skip if there is no available RoCE link.
>>   # Start of rdma tests
>>   # Running /x86_64/migration/precopy/rdma/plain
>>   ok 1 /x86_64/migration/precopy/rdma/plain # SKIP There is no available rdma link in the host.
>>   Maybe you are not running with the root permission
>>   # End of rdma tests
>>
>> Admin is able to remove the RXE by passing 'cleanup' to this script.
>>
>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>> ---
>>   scripts/rdma-migration-helper.sh      | 40 +++++++++++++++++++
>>   tests/qtest/migration/precopy-tests.c | 57 +++++++++++++++++++++++++++
>>   2 files changed, 97 insertions(+)
>>   create mode 100755 scripts/rdma-migration-helper.sh
>>
>> diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
>> new file mode 100755
>> index 0000000000..4ef62baf0f
>> --- /dev/null
>> +++ b/scripts/rdma-migration-helper.sh
>> @@ -0,0 +1,40 @@
>> +#!/bin/bash
>> +
>> +# Copied from blktests
>> +get_ipv4_addr() {
>> +    ip -4 -o addr show dev "$1" |
>> +        sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
>> +        tr -d '\n'
>> +}
>> +
>> +has_soft_rdma() {
>> +    rdma link | grep -q " netdev $1[[:blank:]]*\$"
>> +}
>> +
>> +rdma_rxe_setup_detect()
>> +{
>> +    (
>> +        cd /sys/class/net &&
>> +            for i in *; do
>> +                [ -e "$i" ] || continue
>> +                [ "$i" = "lo" ] && continue
>> +                [ "$(<"$i/addr_len")" = 6 ] || continue
>> +                [ "$(<"$i/carrier")" = 1 ] || continue
>> +
>> +                has_soft_rdma "$i" && break
>> +                [ "$operation" = "setup" ] && rdma link add "${i}_rxe" type rxe netdev "$i" && break
>> +            done
>> +        has_soft_rdma "$i" || return
>> +        get_ipv4_addr $i
>> +    )
>> +}
>> +
>> +operation=${1:-setup}
>> +
>> +if [ "$operation" == "setup" ] || [ "$operation" == "detect" ]; then
>> +    rdma_rxe_setup_detect
>> +elif [ "$operation" == "cleanup" ]; then
>> +    modprobe -r rdma_rxe
> 
> Would this affects host when there's existing user of rdma_rxe (or fail
> with -EBUSY)?  

'modprobe -r ' will fail with EBUSY in this case.


> If there's no major side effect of leftover rdma link, we
> could also drop the "cleanup" for now and start from simple.

In my understanding, there is no any side effect, I'm fine to drop it in this time.


> 
>> +else
>> +    echo "Usage: $0 [setup | cleanup | detect]"
>> +fi
>> diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
>> index ba273d10b9..8c72eb699b 100644
>> --- a/tests/qtest/migration/precopy-tests.c
>> +++ b/tests/qtest/migration/precopy-tests.c
>> @@ -99,6 +99,59 @@ static void test_precopy_unix_dirty_ring(void)
>>       test_precopy_common(&args);
>>   }
>>   
>> +#ifdef CONFIG_RDMA
>> +
>> +#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
>> +static int new_rdma_link(char *buffer) {
> 
> Nit: newline before "{".

Good catch!


> 
>> +    const char *argument = (geteuid() == 0) ? "setup" : "detect";
>> +    char command[1024];
>> +
>> +    snprintf(command, sizeof(command), "%s %s", RDMA_MIGRATION_HELPER, argument);
>> +
>> +    FILE *pipe = popen(command, "r");
>> +    if (pipe == NULL) {
>> +        perror("Failed to run script");
>> +        return -1;
>> +    }
>> +
>> +    int idx = 0;
>> +    while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
>> +        idx += strlen(buffer);
>> +    }
>> +
>> +    int status = pclose(pipe);
>> +    if (status == -1) {
>> +        perror("Error reported by pclose()");
>> +        return -1;
>> +    } else if (WIFEXITED(status)) {
>> +        return WEXITSTATUS(status);
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +static void test_precopy_rdma_plain(void)
>> +{
>> +    char buffer[128] = {};
>> +
>> +    if (new_rdma_link(buffer)) {
>> +        g_test_skip("There is no available rdma link in the host.\n"
>> +                    "Maybe you are not running with the root permission");
> 
> Nit: can be slightly more verbose?
> 
>             g_test_skip("\nThere is no available rdma link to run RDMA
>                         migration test.  To enable the test:\n"
>                         "(1) Run \'%s setup\' with root and rerun the test\n"
>                         "(2) Run the test with root privilege\n",
>                         RDMA_MIGRATION_HELPER);

Yes, it's much better.


Thanks
Zhijian


> 
> 
>> +        return;
>> +    }
>> +
>> +    /* FIXME: query a free port instead of hard code. */
>> +    g_autofree char *uri = g_strdup_printf("rdma:%s:7777", buffer);
>> +
>> +    MigrateCommon args = {
>> +        .listen_uri = uri,
>> +        .connect_uri = uri,
>> +    };
>> +
>> +    test_precopy_common(&args);
>> +}
>> +#endif
>> +
>>   static void test_precopy_tcp_plain(void)
>>   {
>>       MigrateCommon args = {
>> @@ -1124,6 +1177,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
>>                          test_multifd_tcp_uri_none);
>>       migration_test_add("/migration/multifd/tcp/plain/cancel",
>>                          test_multifd_tcp_cancel);
>> +#ifdef CONFIG_RDMA
>> +    migration_test_add("/migration/precopy/rdma/plain",
>> +                       test_precopy_rdma_plain);
>> +#endif
>>   }
>>   
>>   void migration_test_add_precopy(MigrationTestEnv *env)
>> -- 
>> 2.41.0
>>
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2025-02-21  1:33 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-18  7:43 [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Li Zhijian via
2025-02-18  7:43 ` [PATCH 2/2] [NOT-FOR-MERGE] Add qtest for migration over RDMA Li Zhijian via
2025-02-18 21:03   ` Fabiano Rosas
2025-02-18 22:40     ` Peter Xu
2025-02-19  5:33       ` Zhijian Li (Fujitsu) via
2025-02-19 12:47         ` Peter Xu
2025-02-19 13:20           ` Fabiano Rosas
2025-02-19 14:11             ` Peter Xu
2025-02-20  9:40               ` Li Zhijian via
2025-02-20 15:55                 ` Peter Xu
2025-02-21  1:32                   ` Zhijian Li (Fujitsu) via
2025-02-18 20:30 ` [PATCH 1/2] migration: Prioritize RDMA in ram_save_target_page() Fabiano Rosas
2025-02-18 22:03   ` Peter Xu
2025-02-19  9:39     ` Zhijian Li (Fujitsu) via
2025-02-19 13:23       ` Peter Xu
2025-02-20  1:21         ` Zhijian Li (Fujitsu) via

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).