Netdev List
 help / color / mirror / Atom feed
From: Gerd Rausch <gerd.rausch@oracle.com>
To: Santosh Shilimkar <santosh.shilimkar@oracle.com>,
	netdev@vger.kernel.org, linux-rdma@vger.kernel.org,
	rds-devel@oss.oracle.com
Cc: David Miller <davem@davemloft.net>
Subject: [PATCH net v3 3/7] net/rds: Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition after posting IB_WR_LOCAL_INV
Date: Tue, 16 Jul 2019 15:29:02 -0700	[thread overview]
Message-ID: <59304e3c-e15f-6a3b-a8d2-63de370ed847@oracle.com> (raw)

In order to:
1) avoid a silly bouncing between "clean_list" and "drop_list"
   triggered by function "rds_ib_reg_frmr" as it is releases frmr
   regions whose state is not "FRMR_IS_FREE" right away.

2) prevent an invalid access error in a race from a pending
   "IB_WR_LOCAL_INV" operation with a teardown ("dma_unmap_sg", "put_page")
   and de-registration ("ib_dereg_mr") of the corresponding
   memory region.

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
---
 net/rds/ib_frmr.c | 65 +++++++++++++++++++++++++++--------------------
 net/rds/ib_mr.h   |  2 ++
 2 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 6038138d6e38..708c553c3da5 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -76,6 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
 
 	frmr->fr_state = FRMR_IS_FREE;
 	init_waitqueue_head(&frmr->fr_inv_done);
+	init_waitqueue_head(&frmr->fr_reg_done);
 	return ibmr;
 
 out_no_cigar:
@@ -124,6 +125,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 	 */
 	ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
 	frmr->fr_state = FRMR_IS_INUSE;
+	frmr->fr_reg = true;
 
 	memset(&reg_wr, 0, sizeof(reg_wr));
 	reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
@@ -144,7 +146,17 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 		if (printk_ratelimit())
 			pr_warn("RDS/IB: %s returned error(%d)\n",
 				__func__, ret);
+		goto out;
 	}
+
+	/* Wait for the registration to complete in order to prevent an invalid
+	 * access error resulting from a race between the memory region already
+	 * being accessed while registration is still pending.
+	 */
+	wait_event(frmr->fr_reg_done, !frmr->fr_reg);
+
+out:
+
 	return ret;
 }
 
@@ -262,6 +274,19 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 		pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
 		goto out;
 	}
+
+	/* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
+	 * 1) avoid a silly bouncing between "clean_list" and "drop_list"
+	 *    triggered by function "rds_ib_reg_frmr" as it is releases frmr
+	 *    regions whose state is not "FRMR_IS_FREE" right away.
+	 * 2) prevents an invalid access error in a race
+	 *    from a pending "IB_WR_LOCAL_INV" operation
+	 *    with a teardown ("dma_unmap_sg", "put_page")
+	 *    and de-registration ("ib_dereg_mr") of the corresponding
+	 *    memory region.
+	 */
+	wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
+
 out:
 	return ret;
 }
@@ -289,6 +314,11 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 		wake_up(&frmr->fr_inv_done);
 	}
 
+	if (frmr->fr_reg) {
+		frmr->fr_reg = false;
+		wake_up(&frmr->fr_reg_done);
+	}
+
 	atomic_inc(&ic->i_fastreg_wrs);
 }
 
@@ -297,14 +327,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
 {
 	struct rds_ib_mr *ibmr, *next;
 	struct rds_ib_frmr *frmr;
-	int ret = 0;
+	int ret = 0, ret2;
 	unsigned int freed = *nfreed;
 
 	/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
 	list_for_each_entry(ibmr, list, unmap_list) {
-		if (ibmr->sg_dma_len)
-			ret |= rds_ib_post_inv(ibmr);
+		if (ibmr->sg_dma_len) {
+			ret2 = rds_ib_post_inv(ibmr);
+			if (ret2 && !ret)
+				ret = ret2;
+		}
 	}
+
 	if (ret)
 		pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
 
@@ -347,31 +381,8 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
 	}
 
 	do {
-		if (ibmr) {
-			/* Memory regions make it onto the "clean_list" via
-			 * "rds_ib_flush_mr_pool", after the memory region has
-			 * been posted for invalidation via "rds_ib_post_inv".
-			 *
-			 * At that point in time, "fr_state" may still be
-			 * in state "FRMR_IS_INUSE", since the only place where
-			 * "fr_state" transitions to "FRMR_IS_FREE" is in
-			 * is in "rds_ib_mr_cqe_handler", which is
-			 * triggered by a tasklet.
-			 *
-			 * So we wait for "fr_inv_done" to trigger
-			 * and only put memory regions onto the drop_list
-			 * that failed (i.e. not marked "FRMR_IS_FREE").
-			 *
-			 * This avoids the problem of memory-regions bouncing
-			 * between "clean_list" and "drop_list" before they
-			 * even have a chance to be properly invalidated.
-			 */
-			frmr = &ibmr->u.frmr;
-			wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
-			if (frmr->fr_state == FRMR_IS_FREE)
-				break;
+		if (ibmr)
 			rds_ib_free_frmr(ibmr, true);
-		}
 		ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
 		if (IS_ERR(ibmr))
 			return ibmr;
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index ab26c20ed66f..9045a8c0edff 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -58,6 +58,8 @@ struct rds_ib_frmr {
 	enum rds_ib_fr_state	fr_state;
 	bool			fr_inv;
 	wait_queue_head_t	fr_inv_done;
+	bool			fr_reg;
+	wait_queue_head_t	fr_reg_done;
 	struct ib_send_wr	fr_wr;
 	unsigned int		dma_npages;
 	unsigned int		sg_byte_len;
-- 
2.22.0



             reply	other threads:[~2019-07-16 22:29 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-16 22:29 Gerd Rausch [this message]
2019-07-17  0:27 ` [PATCH net v3 3/7] net/rds: Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition after posting IB_WR_LOCAL_INV santosh.shilimkar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=59304e3c-e15f-6a3b-a8d2-63de370ed847@oracle.com \
    --to=gerd.rausch@oracle.com \
    --cc=davem@davemloft.net \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=rds-devel@oss.oracle.com \
    --cc=santosh.shilimkar@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox