From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:51072) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Up2Lq-0003r5-9E for qemu-devel@nongnu.org; Tue, 18 Jun 2013 16:18:08 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Up2Ln-0007eT-0F for qemu-devel@nongnu.org; Tue, 18 Jun 2013 16:18:02 -0400 Received: from e39.co.us.ibm.com ([32.97.110.160]:40294) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Up2Lm-0007eP-MA for qemu-devel@nongnu.org; Tue, 18 Jun 2013 16:17:58 -0400 Received: from /spool/local by e39.co.us.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 18 Jun 2013 14:17:50 -0600 Received: from d01relay04.pok.ibm.com (d01relay04.pok.ibm.com [9.56.227.236]) by d01dlp03.pok.ibm.com (Postfix) with ESMTP id 769F1C90026 for ; Tue, 18 Jun 2013 16:17:46 -0400 (EDT) Received: from d01av01.pok.ibm.com (d01av01.pok.ibm.com [9.56.224.215]) by d01relay04.pok.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id r5IKHkDn235404 for ; Tue, 18 Jun 2013 16:17:46 -0400 Received: from d01av01.pok.ibm.com (loopback [127.0.0.1]) by d01av01.pok.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id r5IKHhdl000618 for ; Tue, 18 Jun 2013 16:17:43 -0400 Message-ID: <51C0C067.9080603@linux.vnet.ibm.com> Date: Tue, 18 Jun 2013 16:17:43 -0400 From: "Michael R. Hines" MIME-Version: 1.0 References: <1371242153-11262-1-git-send-email-mrhines@linux.vnet.ibm.com> <1371242153-11262-15-git-send-email-mrhines@linux.vnet.ibm.com> <51BE05CC.8040706@hp.com> In-Reply-To: <51BE05CC.8040706@hp.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH v9 14/14] rdma: add pin-all accounting timestamp to QMP statistics List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Chegu Vinod Cc: aliguori@us.ibm.com, quintela@redhat.com, qemu-devel@nongnu.org, owasserm@redhat.com, abali@us.ibm.com, mrhines@us.ibm.com, gokul@us.ibm.com, pbonzini@redhat.com, knoel@redhat.com On 06/16/2013 02:37 PM, Chegu Vinod wrote: > On 6/14/2013 1:35 PM, mrhines@linux.vnet.ibm.com wrote: >> From: "Michael R. Hines" >> >> For very large virtual machines, pinning can take a long time. >> While this does not affect the migration's *actual* time itself, >> it is still important for the user to know what's going on and >> to know what component of the total time is actual taken up by >> pinning. >> >> For example, using a 14GB virtual machine, pinning can take as >> long as 5 seconds, for which the user would not otherwise know >> what was happening. >> >> Reviewed-by: Paolo Bonzini >> Signed-off-by: Michael R. Hines >> --- >> hmp.c | 4 +++ >> include/migration/migration.h | 1 + >> migration-rdma.c | 55 >> +++++++++++++++++++++++++++-------------- >> migration.c | 13 +++++++++- >> qapi-schema.json | 3 ++- >> 5 files changed, 56 insertions(+), 20 deletions(-) >> >> diff --git a/hmp.c b/hmp.c >> index 148a3fb..90c55f2 100644 >> --- a/hmp.c >> +++ b/hmp.c >> @@ -164,6 +164,10 @@ void hmp_info_migrate(Monitor *mon, const QDict >> *qdict) >> monitor_printf(mon, "downtime: %" PRIu64 " >> milliseconds\n", >> info->downtime); >> } >> + if (info->has_pin_all_time) { >> + monitor_printf(mon, "pin-all: %" PRIu64 " milliseconds\n", >> + info->pin_all_time); >> + } >> } >> if (info->has_ram) { >> diff --git a/include/migration/migration.h >> b/include/migration/migration.h >> index b49e68b..d2ca75b 100644 >> --- a/include/migration/migration.h >> +++ b/include/migration/migration.h >> @@ -49,6 +49,7 @@ struct MigrationState >> bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; >> int64_t xbzrle_cache_size; >> double mbps; >> + int64_t pin_all_time; >> }; >> void process_incoming_migration(QEMUFile *f); >> diff --git a/migration-rdma.c b/migration-rdma.c >> index 853de18..e407dce 100644 >> --- a/migration-rdma.c >> +++ b/migration-rdma.c >> @@ -699,11 +699,11 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) >> return 0; >> } >> -static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma, >> - RDMALocalBlocks *rdma_local_ram_blocks) >> +static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) >> { >> int i; >> - uint64_t start = qemu_get_clock_ms(rt_clock); >> + int64_t start = qemu_get_clock_ms(host_clock); >> + RDMALocalBlocks *rdma_local_ram_blocks = &rdma->local_ram_blocks; >> (void)start; >> for (i = 0; i < rdma_local_ram_blocks->num_blocks; i++) { >> @@ -721,7 +721,8 @@ static int >> qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma, >> rdma->total_registrations++; >> } >> - DPRINTF("lock time: %" PRIu64 "\n", >> qemu_get_clock_ms(rt_clock) - start); >> + DPRINTF("local lock time: %" PRId64 "\n", >> + qemu_get_clock_ms(host_clock) - start); >> if (i >= rdma_local_ram_blocks->num_blocks) { >> return 0; >> @@ -1262,7 +1263,8 @@ static void qemu_rdma_move_header(RDMAContext >> *rdma, int idx, >> */ >> static int qemu_rdma_exchange_send(RDMAContext *rdma, >> RDMAControlHeader *head, >> uint8_t *data, RDMAControlHeader >> *resp, >> - int *resp_idx) >> + int *resp_idx, >> + int (*callback)(RDMAContext *rdma)) >> { >> int ret = 0; >> int idx = 0; >> @@ -1315,6 +1317,14 @@ static int qemu_rdma_exchange_send(RDMAContext >> *rdma, RDMAControlHeader *head, >> * If we're expecting a response, block and wait for it. >> */ >> if (resp) { >> + if (callback) { >> + DPRINTF("Issuing callback before receiving response...\n"); >> + ret = callback(rdma); >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + >> DDPRINTF("Waiting for response %s\n", >> control_desc[resp->type]); >> ret = qemu_rdma_exchange_get_response(rdma, resp, >> resp->type, idx + 1); >> @@ -1464,7 +1474,7 @@ static int qemu_rdma_write_one(QEMUFile *f, >> RDMAContext *rdma, >> chunk, sge.length, current_index, offset); >> ret = qemu_rdma_exchange_send(rdma, &head, >> - (uint8_t *) &comp, NULL, NULL); >> + (uint8_t *) &comp, NULL, NULL, NULL); >> if (ret < 0) { >> return -EIO; >> @@ -1487,7 +1497,7 @@ static int qemu_rdma_write_one(QEMUFile *f, >> RDMAContext *rdma, >> chunk, sge.length, current_index, offset); >> ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t >> *) ®, >> - &resp, ®_result_idx); >> + &resp, ®_result_idx, NULL); >> if (ret < 0) { >> return ret; >> } >> @@ -2126,7 +2136,7 @@ static int qemu_rdma_put_buffer(void *opaque, >> const uint8_t *buf, >> head.len = r->len; >> head.type = RDMA_CONTROL_QEMU_FILE; >> - ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL); >> + ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, >> NULL); >> if (ret < 0) { >> rdma->error_state = ret; >> @@ -2482,7 +2492,7 @@ static int >> qemu_rdma_registration_handle(QEMUFile *f, void *opaque, >> DPRINTF("Initial setup info requested.\n"); >> if (rdma->pin_all) { >> - ret = qemu_rdma_reg_whole_ram_blocks(rdma, >> &rdma->local_ram_blocks); >> + ret = qemu_rdma_reg_whole_ram_blocks(rdma); >> if (ret) { >> fprintf(stderr, "rdma migration: error dest " >> "registering ram blocks!\n"); >> @@ -2614,10 +2624,22 @@ static int >> qemu_rdma_registration_stop(QEMUFile *f, void *opaque, >> } >> if (flags == RAM_CONTROL_SETUP) { >> + int64_t start = qemu_get_clock_ms(host_clock); >> + MigrationState *s = migrate_get_current(); >> + >> head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST; >> DPRINTF("Sending registration setup for ram blocks...\n"); >> - ret = qemu_rdma_exchange_send(rdma, &head, NULL, &resp, >> ®_result_idx); >> + /* >> + * Make sure that we parallelize the pinning on both size. >> + * For very large guests, doing this serially takes a really >> + * long time, so we have to 'interleave' the pinning locally >> + * by performing it before we receive the response from the >> + * destination that the pinning has completed. >> + */ >> + ret = qemu_rdma_exchange_send(rdma, &head, NULL, &resp, >> + ®_result_idx, rdma->pin_all ? >> + qemu_rdma_reg_whole_ram_blocks : NULL); >> if (ret < 0) { >> ERROR(errp, "receiving remote info!\n"); >> return ret; >> @@ -2634,12 +2656,7 @@ static int >> qemu_rdma_registration_stop(QEMUFile *f, void *opaque, >> } >> if (rdma->pin_all) { >> - ret = qemu_rdma_reg_whole_ram_blocks(rdma, >> &rdma->local_ram_blocks); >> - if (ret) { >> - fprintf(stderr, "rdma migration: error source " >> - "registering ram blocks!\n"); >> - return ret; >> - } >> + s->pin_all_time = qemu_get_clock_ms(host_clock) - start; >> } else { >> int x = 0; >> for (x = 0; x < rdma->local_ram_blocks.num_blocks; x++) { >> @@ -2653,7 +2670,7 @@ static int qemu_rdma_registration_stop(QEMUFile >> *f, void *opaque, >> DDDPRINTF("Sending registration finish %" PRIu64 "...\n", flags); >> head.type = RDMA_CONTROL_REGISTER_FINISHED; >> - ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL); >> + ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL); >> if (ret < 0) { >> goto err; >> @@ -2801,8 +2818,10 @@ void rdma_start_outgoing_migration(void *opaque, >> DPRINTF("qemu_rdma_source_connect success\n"); >> + if (rdma->pin_all) { >> + s->pin_all_time = 0; >> + } >> s->file = qemu_fopen_rdma(rdma, "wb"); >> - s->total_time = qemu_get_clock_ms(rt_clock); >> migrate_fd_connect(s); >> return; >> err: >> diff --git a/migration.c b/migration.c >> index 62c6b85..17531e9 100644 >> --- a/migration.c >> +++ b/migration.c >> @@ -67,6 +67,7 @@ MigrationState *migrate_get_current(void) >> .bandwidth_limit = MAX_THROTTLE, >> .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, >> .mbps = -1, >> + .pin_all_time = -1, >> }; >> return ¤t_migration; >> @@ -189,7 +190,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) >> break; >> case MIG_STATE_ACTIVE: >> info->has_status = true; >> - info->status = g_strdup("active"); >> + info->status = g_strdup(s->pin_all_time != 0 ? "active" : >> "pinning"); > > Pl. check the logic for setting the migration status correctly. In the > current logic where you rely on "s->pin_all_time" if we did non-rdma > migration the migration status will remain in "pinning" (instead of > "active") till the migration completes.... > > Thx > Vinod I just sent out a V10 which I have also validated with a separate patch to libvirt. (I'm maintaining a sister patch for RDMA to libvirt on github as well, in case you need to use libvirt too). - Michael