From: Samuel Zhang <guoqing.zhang@amd.com>
To: <qemu-devel@nongnu.org>
Cc: <Emily.Deng@amd.com>, <PengJu.Zhou@amd.com>, <guoqing.zhang@amd.com>
Subject: [PATCH] migration/rdma: add x-rdma-chunk-shift parameter
Date: Mon, 16 Mar 2026 14:23:08 +0800 [thread overview]
Message-ID: <20260316062308.1240426-1-guoqing.zhang@amd.com> (raw)
The default 1MB RDMA chunk size causes slow live migration because
each chunk triggers a write_flush (ibv_post_send). For 8GB RAM,
1MB chunks produce ~15000 flushes vs ~3700 with 1GB chunks.
Add x-rdma-chunk-shift parameter to configure the RDMA chunk size
(2^N bytes) for faster migration.
Usage: -global migration.x-rdma-chunk-shift=30
Performance with RDMA live migration of 8GB RAM VM:
| x-rdma-chunk-shift | chunk size | time (s) | throughput (Mbps) |
|--------------------|------------|----------|-------------------|
| 20 (default) | 1 MB | 37.915 | 1,007 |
| 25 | 32 MB | 17.880 | 2,260 |
| 30 | 1 GB | 4.368 | 17,529 |
Signed-off-by: Samuel Zhang <guoqing.zhang@amd.com>
---
migration/options.c | 13 +++++++++++++
migration/options.h | 1 +
migration/rdma.c | 37 ++++++++++++++++++++++---------------
qapi/migration.json | 9 ++++++++-
4 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/migration/options.c b/migration/options.c
index f33b297929..1503ae35a2 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -90,6 +90,7 @@ const PropertyInfo qdev_prop_StrOrNull;
#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 1000 /* milliseconds */
#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT 1 /* MB/s */
+#define DEFAULT_MIGRATE_X_RDMA_CHUNK_SHIFT 20 /* 1MB */
const Property migration_properties[] = {
DEFINE_PROP_BOOL("store-global-state", MigrationState,
@@ -183,6 +184,9 @@ const Property migration_properties[] = {
DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
ZERO_PAGE_DETECTION_MULTIFD),
+ DEFINE_PROP_UINT8("x-rdma-chunk-shift", MigrationState,
+ parameters.x_rdma_chunk_shift,
+ DEFAULT_MIGRATE_X_RDMA_CHUNK_SHIFT),
/* Migration capabilities */
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -993,6 +997,15 @@ ZeroPageDetection migrate_zero_page_detection(void)
return s->parameters.zero_page_detection;
}
+uint8_t migrate_rdma_chunk_shift(void)
+{
+ MigrationState *s = migrate_get_current();
+ uint8_t chunk_shift = s->parameters.x_rdma_chunk_shift;
+
+ assert(20 <= chunk_shift && chunk_shift <= 30);
+ return chunk_shift;
+}
+
/* parameters helpers */
AnnounceParameters *migrate_announce_params(void)
diff --git a/migration/options.h b/migration/options.h
index b502871097..3f214465a3 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -87,6 +87,7 @@ const char *migrate_tls_creds(void);
const char *migrate_tls_hostname(void);
uint64_t migrate_xbzrle_cache_size(void);
ZeroPageDetection migrate_zero_page_detection(void);
+uint8_t migrate_rdma_chunk_shift(void);
/* parameters helpers */
diff --git a/migration/rdma.c b/migration/rdma.c
index 55ab85650a..d914a7cd3b 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -44,11 +44,18 @@
#define RDMA_RESOLVE_TIMEOUT_MS 10000
-/* Do not merge data if larger than this. */
-#define RDMA_MERGE_MAX (2 * 1024 * 1024)
-#define RDMA_SIGNALED_SEND_MAX (RDMA_MERGE_MAX / 4096)
+#define RDMA_SIGNALED_SEND_MAX 512
+
+static inline uint64_t rdma_chunk_size(void)
+{
+ return 1UL << migrate_rdma_chunk_shift();
+}
-#define RDMA_REG_CHUNK_SHIFT 20 /* 1 MB */
+/* Do not merge data if larger than this. */
+static inline uint64_t rdma_merge_max(void)
+{
+ return rdma_chunk_size() * 2;
+}
/*
* This is only for non-live state being migrated.
@@ -527,21 +534,21 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head,
static inline uint64_t ram_chunk_index(const uint8_t *start,
const uint8_t *host)
{
- return ((uintptr_t) host - (uintptr_t) start) >> RDMA_REG_CHUNK_SHIFT;
+ return ((uintptr_t) host - (uintptr_t) start) >> migrate_rdma_chunk_shift();
}
static inline uint8_t *ram_chunk_start(const RDMALocalBlock *rdma_ram_block,
uint64_t i)
{
return (uint8_t *)(uintptr_t)(rdma_ram_block->local_host_addr +
- (i << RDMA_REG_CHUNK_SHIFT));
+ (i << migrate_rdma_chunk_shift()));
}
static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block,
uint64_t i)
{
uint8_t *result = ram_chunk_start(rdma_ram_block, i) +
- (1UL << RDMA_REG_CHUNK_SHIFT);
+ rdma_chunk_size();
if (result > (rdma_ram_block->local_host_addr + rdma_ram_block->length)) {
result = rdma_ram_block->local_host_addr + rdma_ram_block->length;
@@ -1841,6 +1848,7 @@ static int qemu_rdma_write_one(RDMAContext *rdma,
struct ibv_send_wr *bad_wr;
int reg_result_idx, ret, count = 0;
uint64_t chunk, chunks;
+ uint64_t chunk_size = rdma_chunk_size();
uint8_t *chunk_start, *chunk_end;
RDMALocalBlock *block = &(rdma->local_ram_blocks.block[current_index]);
RDMARegister reg;
@@ -1861,22 +1869,21 @@ retry:
chunk_start = ram_chunk_start(block, chunk);
if (block->is_ram_block) {
- chunks = length / (1UL << RDMA_REG_CHUNK_SHIFT);
+ chunks = length / chunk_size;
- if (chunks && ((length % (1UL << RDMA_REG_CHUNK_SHIFT)) == 0)) {
+ if (chunks && ((length % chunk_size) == 0)) {
chunks--;
}
} else {
- chunks = block->length / (1UL << RDMA_REG_CHUNK_SHIFT);
+ chunks = block->length / chunk_size;
- if (chunks && ((block->length % (1UL << RDMA_REG_CHUNK_SHIFT)) == 0)) {
+ if (chunks && ((block->length % chunk_size) == 0)) {
chunks--;
}
}
trace_qemu_rdma_write_one_top(chunks + 1,
- (chunks + 1) *
- (1UL << RDMA_REG_CHUNK_SHIFT) / 1024 / 1024);
+ (chunks + 1) * chunk_size / 1024 / 1024);
chunk_end = ram_chunk_end(block, chunk + chunks);
@@ -2176,7 +2183,7 @@ static int qemu_rdma_write(RDMAContext *rdma,
rdma->current_length += len;
/* flush it if buffer is too large */
- if (rdma->current_length >= RDMA_MERGE_MAX) {
+ if (rdma->current_length >= rdma_merge_max()) {
return qemu_rdma_write_flush(rdma, errp);
}
@@ -3522,7 +3529,7 @@ int rdma_registration_handle(QEMUFile *f)
} else {
chunk = reg->key.chunk;
host_addr = block->local_host_addr +
- (reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT));
+ (reg->key.chunk * rdma_chunk_size());
/* Check for particularly bad chunk value */
if (host_addr < (void *)block->local_host_addr) {
error_report("rdma: bad chunk for block %s"
diff --git a/qapi/migration.json b/qapi/migration.json
index 7134d4ce47..0521bf3d69 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1007,9 +1007,14 @@
# is @cpr-exec. The first list element is the program's filename,
# the remainder its arguments. (Since 10.2)
#
+# @x-rdma-chunk-shift: RDMA memory registration chunk shift.
+# The chunk size is 2^N bytes where N is the value.
+# Defaults to 20 (1 MiB). Only takes effect for RDMA migration.
+# (Since 10.2)
+#
# Features:
#
-# @unstable: Members @x-checkpoint-delay and
+# @unstable: Members @x-rdma-chunk-shift, @x-checkpoint-delay and
# @x-vcpu-dirty-limit-period are experimental.
#
# Since: 2.4
@@ -1045,6 +1050,8 @@
'*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode',
'*zero-page-detection': 'ZeroPageDetection',
+ '*x-rdma-chunk-shift': { 'type': 'uint8',
+ 'features': [ 'unstable' ] },
'*direct-io': 'bool',
'*cpr-exec-command': [ 'str' ]} }
--
2.43.7
next reply other threads:[~2026-03-16 12:34 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-16 6:23 Samuel Zhang [this message]
2026-03-19 8:48 ` [PATCH] migration/rdma: add x-rdma-chunk-shift parameter Markus Armbruster
2026-03-20 3:39 ` Zhang, GuoQing (Sam)
2026-03-20 7:42 ` Markus Armbruster
2026-03-26 2:58 ` [PATCH v2] migration/rdma: add x-rdma-chunk-size parameter Samuel Zhang
2026-03-26 6:57 ` Markus Armbruster
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260316062308.1240426-1-guoqing.zhang@amd.com \
--to=guoqing.zhang@amd.com \
--cc=Emily.Deng@amd.com \
--cc=PengJu.Zhou@amd.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox