From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45455) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGinl-0001Ld-52 for qemu-devel@nongnu.org; Tue, 03 Sep 2013 01:05:26 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1VGinW-0006sw-PP for qemu-devel@nongnu.org; Tue, 03 Sep 2013 01:05:17 -0400 Received: from e23smtp09.au.ibm.com ([202.81.31.142]:36721) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGinW-0006nQ-43 for qemu-devel@nongnu.org; Tue, 03 Sep 2013 01:05:02 -0400 Received: from /spool/local by e23smtp09.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Wed, 4 Sep 2013 01:58:52 +1000 Message-ID: <52256DA4.4060607@linux.vnet.ibm.com> Date: Tue, 03 Sep 2013 13:03:32 +0800 From: Lei Li MIME-Version: 1.0 References: In-Reply-To: Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [Qemu-stable][PATCH] rdma: fix multiple VMs parallel migration List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Frank Yang Cc: qemu-devel@nongnu.org, pbonzini@redhat.com, qemu-stable@nongnu.org, Michael R Hines Hi Frank, I failed to apply this patch. Please make sure to use git-send-email, otherwise it's a little hard to review. :) On 08/30/2013 08:39 PM, Frank Yang wrote: > When several VMs migrate with RDMA at the same time, the increased > pressure cause packet loss probabilistically and make source and > destination wait for each other. There might be some of VMs blocked > during the migration. > > Fix the bug by using two completion queues, for sending and receiving > respectively. > > From 0c4829495cdc89eea2e94b103ac42c3f6a4b32c2 Mon Sep 17 00:00:00 2001 > From: Frank Yang > > Date: Fri, 30 Aug 2013 17:53:34 +0800 > Subject: [PATCH] rdma: fix multiple VMs parallel migration The commit message should be here within the patch. You can use 'git commit --amend' to add it. > > Signed-off-by: Frank Yang > > --- > migration-rdma.c | 57 > ++++++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 37 insertions(+), 20 deletions(-) > > diff --git a/migration-rdma.c b/migration-rdma.c > index 3d1266f..d0eacbb 100644 > --- a/migration-rdma.c > +++ b/migration-rdma.c > @@ -362,7 +362,8 @@ typedef struct RDMAContext { > struct ibv_qp *qp; /* queue pair */ > struct ibv_comp_channel *comp_channel; /* completion channel */ > struct ibv_pd *pd; /* protection domain */ > - struct ibv_cq *cq; /* completion queue */ > + struct ibv_cq *send_cq; /* send completion queue */ > + struct ibv_cq *recv_cq; /* receive completion > queue */ > /* > * If a previous write failed (perhaps because of a failed > @@ -1006,9 +1007,12 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) > * Completion queue can be filled by both read and write work > requests, > * so must reflect the sum of both possible queue sizes. > */ > - rdma->cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), > + rdma->send_cq = ibv_create_cq(rdma->verbs, > (RDMA_SIGNALED_SEND_MAX * 2), > NULL, rdma->comp_channel, 0); > - if (!rdma->cq) { > + rdma->recv_cq = ibv_create_cq(rdma->verbs, > RDMA_SIGNALED_SEND_MAX, NULL, > + rdma->comp_channel, 0); > + > + if (!rdma->send_cq || !rdma->recv_cq) { > fprintf(stderr, "failed to allocate completion queue\n"); > goto err_alloc_pd_cq; > } > @@ -1040,8 +1044,8 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) > attr.cap.max_recv_wr = 3; > attr.cap.max_send_sge = 1; > attr.cap.max_recv_sge = 1; > - attr.send_cq = rdma->cq; > - attr.recv_cq = rdma->cq; > + attr.send_cq = rdma->send_cq; > + attr.recv_cq = rdma->recv_cq; > attr.qp_type = IBV_QPT_RC; > ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr); > @@ -1361,13 +1365,18 @@ static void > qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index, > * Return the work request ID that completed. > */ > static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, > - uint32_t *byte_len) > + uint32_t *byte_len, int wrid_requested) > { > int ret; > struct ibv_wc wc; > uint64_t wr_id; > - ret = ibv_poll_cq(rdma->cq, 1, &wc); > + if (wrid_requested == RDMA_WRID_RDMA_WRITE || > + wrid_requested == RDMA_WRID_SEND_CONTROL) { > + ret = ibv_poll_cq(rdma->send_cq, 1, &wc); > + } else if (wrid_requested >= RDMA_WRID_RECV_CONTROL) { > + ret = ibv_poll_cq(rdma->recv_cq, 1, &wc); > + } > if (!ret) { > *wr_id_out = RDMA_WRID_NONE; > @@ -1460,12 +1469,9 @@ static int qemu_rdma_block_for_wrid(RDMAContext > *rdma, int wrid_requested, > void *cq_ctx; > uint64_t wr_id = RDMA_WRID_NONE, wr_id_in; > - if (ibv_req_notify_cq(rdma->cq, 0)) { > - return -1; > - } > /* poll cq first */ > while (wr_id != wrid_requested) { > - ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len); > + ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len, wrid_requested); > if (ret < 0) { > return ret; > } > @@ -1487,6 +1493,17 @@ static int qemu_rdma_block_for_wrid(RDMAContext > *rdma, int wrid_requested, > } > while (1) { > + if (wrid_requested == RDMA_WRID_RDMA_WRITE || > + wrid_requested == RDMA_WRID_SEND_CONTROL) { > + if (ibv_req_notify_cq(rdma->send_cq, 0)) { > + return -1; > + } > + } else if (wrid_requested >= RDMA_WRID_RECV_CONTROL) { > + if (ibv_req_notify_cq(rdma->recv_cq, 0)) { > + return -1; > + } > + } > + > /* > * Coroutine doesn't start until process_incoming_migration() > * so don't yield unless we know we're running inside of a > coroutine. > @@ -1502,12 +1519,8 @@ static int qemu_rdma_block_for_wrid(RDMAContext > *rdma, int wrid_requested, > num_cq_events++; > - if (ibv_req_notify_cq(cq, 0)) { > - goto err_block_for_wrid; > - } > - > while (wr_id != wrid_requested) { > - ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len); > + ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len, > wrid_requested); > if (ret < 0) { > goto err_block_for_wrid; > } > @@ -2236,9 +2249,13 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) > ibv_destroy_qp(rdma->qp); > rdma->qp = NULL; > } > - if (rdma->cq) { > - ibv_destroy_cq(rdma->cq); > - rdma->cq = NULL; > + if (rdma->send_cq) { > + ibv_destroy_cq(rdma->send_cq); > + rdma->send_cq = NULL; > + } > + if (rdma->recv_cq) { > + ibv_destroy_cq(rdma->recv_cq); > + rdma->recv_cq = NULL; > } > if (rdma->comp_channel) { > ibv_destroy_comp_channel(rdma->comp_channel); > @@ -2770,7 +2787,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, > void *opaque, > */ > while (1) { > uint64_t wr_id, wr_id_in; > - int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL); > + int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL, > RDMA_WRID_RDMA_WRITE); > if (ret < 0) { > fprintf(stderr, "rdma migration: polling error! %d\n", ret); > goto err; > -- > 1.8.3.msysgit.0 > > -- Lei