linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Mike Marciniszyn
	<mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 18/20] IB/hfi1: Eliminate synchronize_rcu() in mr delete
Date: Wed, 01 Mar 2017 10:24:09 -0800	[thread overview]
Message-ID: <20170301182404.29989.63441.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20170301181719.29989.31238.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>

From: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

The synchronize_rcu() call can be eliminated to improve memory deregistration
performance.

There are two key fields involved:
- The rcu pointer itself
- the lkey_published field

To close the window between the rcu read of the mregion pointer and the
reference count the code should:

1. To lkey/rkey validation (reader)

Read the rcu pointer.  If the pointer is non-NULL, get a reference.

To the current validation tests use a READ_ONCE() on the lkey_published.

Upon any failure release the reference.

2. To the remove logic (delete)

Insure the published is zeroed prior to setting the pointer to NULL.
This requires using rcu_assign_pointer() to insure lkey_published
is written prior to the NULL.

3. To the insert logic (add)

Insure the published is set use an rcu_assign_pointer() to insure the
pointer is after all MR fields.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/sw/rdmavt/mr.c |   49 +++++++++++++++++++++++++------------
 1 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index ae30b68..7c86955 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -191,8 +191,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 
 		tmr = rcu_access_pointer(dev->dma_mr);
 		if (!tmr) {
-			rcu_assign_pointer(dev->dma_mr, mr);
 			mr->lkey_published = 1;
+			/* Insure published written first */
+			rcu_assign_pointer(dev->dma_mr, mr);
 			rvt_get_mr(mr);
 		}
 		goto success;
@@ -224,8 +225,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 		mr->lkey |= 1 << 8;
 		rkt->gen++;
 	}
-	rcu_assign_pointer(rkt->table[r], mr);
 	mr->lkey_published = 1;
+	/* Insure published written first */
+	rcu_assign_pointer(rkt->table[r], mr);
 success:
 	spin_unlock_irqrestore(&rkt->lock, flags);
 out:
@@ -253,23 +255,24 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
 	spin_lock_irqsave(&rkt->lock, flags);
 	if (!lkey) {
 		if (mr->lkey_published) {
-			RCU_INIT_POINTER(dev->dma_mr, NULL);
+			mr->lkey_published = 0;
+			/* insure published is written before pointer */
+			rcu_assign_pointer(dev->dma_mr, NULL);
 			rvt_put_mr(mr);
 		}
 	} else {
 		if (!mr->lkey_published)
 			goto out;
 		r = lkey >> (32 - dev->dparms.lkey_table_size);
-		RCU_INIT_POINTER(rkt->table[r], NULL);
+		mr->lkey_published = 0;
+		/* insure published is written before pointer */
+		rcu_assign_pointer(rkt->table[r], NULL);
 	}
-	mr->lkey_published = 0;
 	freed++;
 out:
 	spin_unlock_irqrestore(&rkt->lock, flags);
-	if (freed) {
-		synchronize_rcu();
+	if (freed)
 		percpu_ref_kill(&mr->refcount);
-	}
 }
 
 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
@@ -822,16 +825,21 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
 		goto ok;
 	}
 	mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
-	if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
-		     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+	if (!mr)
 		goto bail;
+	rvt_get_mr(mr);
+	if (!READ_ONCE(mr->lkey_published))
+		goto bail_unref;
+
+	if (unlikely(atomic_read(&mr->lkey_invalid) ||
+		     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+		goto bail_unref;
 
 	off = sge->addr - mr->user_base;
 	if (unlikely(sge->addr < mr->user_base ||
 		     off + sge->length > mr->length ||
 		     (mr->access_flags & acc) != acc))
-		goto bail;
-	rvt_get_mr(mr);
+		goto bail_unref;
 	rcu_read_unlock();
 
 	off += mr->offset;
@@ -867,6 +875,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
 	isge->n = n;
 ok:
 	return 1;
+bail_unref:
+	rvt_put_mr(mr);
 bail:
 	rcu_read_unlock();
 	return 0;
@@ -922,15 +932,20 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 	}
 
 	mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
-	if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
-		     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+	if (!mr)
 		goto bail;
+	rvt_get_mr(mr);
+	/* insure mr read is before test */
+	if (!READ_ONCE(mr->lkey_published))
+		goto bail_unref;
+	if (unlikely(atomic_read(&mr->lkey_invalid) ||
+		     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+		goto bail_unref;
 
 	off = vaddr - mr->iova;
 	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
 		     (mr->access_flags & acc) == 0))
-		goto bail;
-	rvt_get_mr(mr);
+		goto bail_unref;
 	rcu_read_unlock();
 
 	off += mr->offset;
@@ -966,6 +981,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 	sge->n = n;
 ok:
 	return 1;
+bail_unref:
+	rvt_put_mr(mr);
 bail:
 	rcu_read_unlock();
 	return 0;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-03-01 18:24 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-01 18:21 [PATCH 00/20] IB/hfi1, qib, rdmavt: Another round of patches for 4.11 Dennis Dalessandro
     [not found] ` <20170301181719.29989.31238.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-03-01 18:21   ` [PATCH 01/20] IB/hfi1: Force logical link down Dennis Dalessandro
2017-03-01 18:21   ` [PATCH 02/20] IB/hfi1: Race hazard avoidance in user SDMA driver Dennis Dalessandro
2017-03-01 18:21   ` [PATCH 03/20] IB/hfi1: Cache registers during state change Dennis Dalessandro
2017-03-01 18:21   ` [PATCH 04/20] IB/hfi1: NULL pointer dereference when freeing rhashtable Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 05/20] IB/rdmavt, IB/hfi1, IB/qib: Make wc opcode translation driver dependent Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 06/20] IB/rdmavt: Add additional fields to post send trace Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 07/20] IB/rdmavt: Add tracing for cq entry and poll Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 08/20] IB/rdmavt: Add swqe completion trace Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 09/20] IB/hfi1: Check device id early during init Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 10/20] IB/hfi1: Protect the global dev_cntr_names and port_cntr_names Dennis Dalessandro
2017-03-01 18:22   ` [PATCH 11/20] IB/hfi1: Check for QSFP presence before attempting reads Dennis Dalessandro
2017-03-01 18:23   ` [PATCH 12/20] IB/hfi1: Add a patch value to the firmware version string Dennis Dalessandro
2017-03-01 18:23   ` [PATCH 13/20] IB/rdmavt,IB/hfi1: Fix timer migration regressions Dennis Dalessandro
2017-03-01 18:23   ` [PATCH 14/20] IB/rdmavt: Avoid reseting wqe send_flags in unreserve Dennis Dalessandro
2017-03-01 18:23   ` [PATCH 15/20] IB/hfi1: Ensure VL index is within bounds Dennis Dalessandro
2017-03-01 18:23   ` [PATCH 16/20] IB/hfi1: Add receive fault injection feature Dennis Dalessandro
     [not found]     ` <20170301182344.29989.12032.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-03-02 17:19       ` Dennis Dalessandro
2017-03-01 18:23   ` [PATCH 17/20] IB/hfi1: Add transmit " Dennis Dalessandro
2017-03-01 18:24   ` Dennis Dalessandro [this message]
2017-03-01 18:24   ` [PATCH 19/20] IB/rdmavt, IB/qib, IB/hfi1: Make percpu refcount optional for user MRs Dennis Dalessandro
2017-03-01 18:24   ` [PATCH 20/20] IB/core: If the MGID/MLID pair is not on the list return an error Dennis Dalessandro
     [not found]     ` <20170301182426.29989.77369.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-03-02 14:00       ` Leon Romanovsky
     [not found]         ` <20170302140028.GE9525-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-03-02 15:01           ` Dennis Dalessandro
     [not found]             ` <4d49f7ab-5bc5-029c-4321-fd349c9dc0f8-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2017-03-02 15:13               ` Leon Romanovsky
2017-03-02 15:18   ` [PATCH v2 " Dennis Dalessandro
2017-03-02 18:26   ` [PATCH v2 16/20] IB/hfi1: Add receive fault injection feature Dennis Dalessandro
     [not found]     ` <20170302182610.28851.42687.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-03-03 16:15       ` Leon Romanovsky
     [not found]         ` <20170303161508.GF14379-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-03-03 17:02           ` Dennis Dalessandro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170301182404.29989.63441.stgit@scvm10.sc.intel.com \
    --to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).