qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Yuval Shaia <yuval.shaia@oracle.com>
To: dgilbert@redhat.com, yuval.shaia@oracle.com,
	marcel.apfelbaum@gmail.com, armbru@redhat.com,
	qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 03/10] hw/rdma: Warn when too many consecutive poll CQ triggered on an empty CQ
Date: Thu, 31 Jan 2019 15:08:43 +0200	[thread overview]
Message-ID: <20190131130850.6850-4-yuval.shaia@oracle.com> (raw)
In-Reply-To: <20190131130850.6850-1-yuval.shaia@oracle.com>

To protect against CPU over utilization when guest performs unneeded
busy waiting loop on an empty CQ.

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
---
 hw/rdma/rdma_backend.c      | 11 +++++++----
 hw/rdma/rdma_backend.h      |  2 +-
 hw/rdma/rdma_rm.c           |  1 +
 hw/rdma/rdma_rm_defs.h      |  6 +++++-
 hw/rdma/vmw/pvrdma_qp_ops.c | 24 +++++++++++++++++++++++-
 5 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 2f6372f8f0..b7d6afb5da 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -64,9 +64,9 @@ static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err,
     comp_handler(ctx, &wc);
 }
 
-static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
+static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
 {
-    int i, ne;
+    int i, ne, total_ne = 0;
     BackendCtx *bctx;
     struct ibv_wc wc[2];
 
@@ -76,6 +76,7 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
         trace_rdma_poll_cq(ne, ibcq);
 
         for (i = 0; i < ne; i++) {
+            total_ne++;
             bctx = rdma_rm_get_cqe_ctx(rdma_dev_res, wc[i].wr_id);
             if (unlikely(!bctx)) {
                 rdma_error_report("No matching ctx for req %"PRId64,
@@ -93,6 +94,8 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
     if (ne < 0) {
         rdma_error_report("ibv_poll_cq fail, rc=%d, errno=%d", ne, errno);
     }
+
+    return total_ne;
 }
 
 static void *comp_handler_thread(void *arg)
@@ -267,9 +270,9 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev,
     return 0;
 }
 
-void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq)
+int rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq)
 {
-    rdma_poll_cq(rdma_dev_res, cq->ibcq);
+    return rdma_poll_cq(rdma_dev_res, cq->ibcq);
 }
 
 static GHashTable *ah_hash;
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 5114c90e67..36305cd148 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -85,7 +85,7 @@ void rdma_backend_destroy_mr(RdmaBackendMR *mr);
 int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq,
                            int cqe);
 void rdma_backend_destroy_cq(RdmaBackendCQ *cq);
-void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
+int rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
 
 int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
                            RdmaBackendPD *pd, RdmaBackendCQ *scq,
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 64c6ea1a4e..1ba77ac42c 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -261,6 +261,7 @@ int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
     if (!cq) {
         return -ENOMEM;
     }
+    atomic_set(&cq->missing_cqe, 0);
 
     cq->opaque = opaque;
     cq->notify = CNT_CLEAR;
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 0ba61d1838..08692e87d4 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -34,7 +34,9 @@
 #define MAX_QP_INIT_RD_ATOM   16
 #define MAX_AH                64
 
-#define MAX_RM_TBL_NAME 16
+#define MAX_RM_TBL_NAME             16
+#define MAX_CONSEQ_EMPTY_POLL_CQ    2048 /* considered as error above this */
+
 typedef struct RdmaRmResTbl {
     char name[MAX_RM_TBL_NAME];
     QemuMutex lock;
@@ -59,6 +61,8 @@ typedef struct RdmaRmCQ {
     RdmaBackendCQ backend_cq;
     void *opaque;
     CQNotificationType notify;
+    int missing_cqe;
+    int conseq_empty_poll;
 } RdmaRmCQ;
 
 /* MR (DMA region) */
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index 16db726dac..5d650a4943 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -60,6 +60,8 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
         return -EINVAL;
     }
 
+    atomic_dec(&cq->missing_cqe);
+
     ring = (PvrdmaRing *)cq->opaque;
 
     /* Step #1: Put CQE on CQ ring */
@@ -141,12 +143,15 @@ void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
     PvrdmaRing *ring;
     int sgid_idx;
     union ibv_gid *sgid;
+    RdmaRmCQ *cq;
 
     qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
     if (unlikely(!qp)) {
         return;
     }
 
+    cq = rdma_rm_get_cq(&dev->rdma_dev_res, qp->send_cq_handle);
+
     ring = (PvrdmaRing *)qp->opaque;
 
     wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
@@ -186,6 +191,7 @@ void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
             continue;
         }
 
+        atomic_inc(&cq->missing_cqe);
         rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
                                sgid_idx, sgid,
@@ -204,12 +210,15 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
     RdmaRmQP *qp;
     PvrdmaRqWqe *wqe;
     PvrdmaRing *ring;
+    RdmaRmCQ *cq;
 
     qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
     if (unlikely(!qp)) {
         return;
     }
 
+    cq = rdma_rm_get_cq(&dev->rdma_dev_res, qp->recv_cq_handle);
+
     ring = &((PvrdmaRing *)qp->opaque)[1];
 
     wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
@@ -231,6 +240,7 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
             continue;
         }
 
+        atomic_inc(&cq->missing_cqe);
         rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
                                &qp->backend_qp, qp->qp_type,
                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
@@ -245,11 +255,23 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 {
     RdmaRmCQ *cq;
+    int polled;
 
     cq = rdma_rm_get_cq(dev_res, cq_handle);
     if (!cq) {
         return;
     }
 
-    rdma_backend_poll_cq(dev_res, &cq->backend_cq);
+    polled = rdma_backend_poll_cq(dev_res, &cq->backend_cq);
+    if (!polled) {
+        if (cq->conseq_empty_poll == MAX_CONSEQ_EMPTY_POLL_CQ) {
+            rdma_warn_report("%d consequtive empty polls from CQ %d, missing cqe %d",
+                             cq->conseq_empty_poll, cq_handle,
+                             atomic_read(&cq->missing_cqe));
+            cq->conseq_empty_poll = 0;
+        }
+        cq->conseq_empty_poll++;
+    } else {
+        cq->conseq_empty_poll = 0;
+    }
 }
-- 
2.17.2

  parent reply	other threads:[~2019-01-31 13:09 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-31 13:08 [Qemu-devel] [PATCH 00/10] Misc fixes to pvrdma device Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 01/10] hw/rdma: Switch to generic error reporting way Yuval Shaia
2019-02-01 12:36   ` Dr. David Alan Gilbert
2019-02-03  7:32     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 02/10] hw/rdma: Introduce locked qlist Yuval Shaia
2019-02-07  9:05   ` Marcel Apfelbaum
2019-02-07 10:28     ` Yuval Shaia
2019-01-31 13:08 ` Yuval Shaia [this message]
2019-02-06 10:14   ` [Qemu-devel] [PATCH 03/10] hw/rdma: Warn when too many consecutive poll CQ triggered on an empty CQ Marcel Apfelbaum
2019-02-06 14:59     ` Yuval Shaia
2019-02-06 15:02     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 04/10] hw/rdma: Protect against concurrent execution of poll_cq Yuval Shaia
2019-02-05 20:14   ` Marcel Apfelbaum
2019-01-31 13:08 ` [Qemu-devel] [PATCH 05/10] hw/pvrdma: Add device statistics counters Yuval Shaia
2019-02-06 10:17   ` Marcel Apfelbaum
2019-02-06 14:44     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 06/10] hw/pvrdma: Dump device statistics counters to file Yuval Shaia
2019-02-04 13:03   ` Markus Armbruster
2019-02-04 16:14     ` Yuval Shaia
2019-02-04 18:21       ` Markus Armbruster
2019-01-31 13:08 ` [Qemu-devel] [PATCH 07/10] monitor: Expose pvrdma device statistics counters Yuval Shaia
2019-01-31 13:17   ` Eric Blake
2019-01-31 20:08     ` Yuval Shaia
2019-01-31 20:52       ` Eric Blake
2019-02-01  7:33         ` Markus Armbruster
2019-02-01 11:42           ` Dr. David Alan Gilbert
2019-02-03  7:12             ` Yuval Shaia
2019-02-03  7:06           ` Yuval Shaia
2019-02-04  8:23             ` Markus Armbruster
2019-02-04 16:07               ` Yuval Shaia
2019-02-05  7:21                 ` Markus Armbruster
2019-02-04  8:00       ` Markus Armbruster
2019-01-31 13:08 ` [Qemu-devel] [PATCH 08/10] hw/rdma: Free all MAD receive buffers when device is closed Yuval Shaia
2019-02-06 10:19   ` Marcel Apfelbaum
2019-01-31 13:08 ` [Qemu-devel] [PATCH 09/10] hw/rdma: Free all receive buffers when QP is destroyed Yuval Shaia
2019-02-06 10:23   ` Marcel Apfelbaum
2019-02-06 15:55     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 10/10] hw/pvrdma: Delete unneeded function argument Yuval Shaia
2019-02-05 20:16   ` Marcel Apfelbaum
2019-02-02 13:50 ` [Qemu-devel] [PATCH 00/10] Misc fixes to pvrdma device no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190131130850.6850-4-yuval.shaia@oracle.com \
    --to=yuval.shaia@oracle.com \
    --cc=armbru@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).