netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference
@ 2023-03-13  6:04 Kai
  2023-03-15  7:34 ` Jakub Kicinski
  0 siblings, 1 reply; 6+ messages in thread
From: Kai @ 2023-03-13  6:04 UTC (permalink / raw)
  To: kgraul, wenjia, jaka; +Cc: kuba, davem, netdev, linux-s390, linux-rdma

The refcount wr_tx_refcnt may cause cache thrashing problems among
cores and we can use percpu ref to mitigate this issue here. We
gain some performance improvement with percpu ref here on our
customized smc-r verion. Applying cache alignment may also mitigate
this problem but it seem more reasonable to use percpu ref here.
We can also replace wr_reg_refcnt with one percpu reference like
wr_tx_refcnt.

redis-benchmark on smc-r with atomic wr_tx_refcnt:
SET: 525707.06 requests per second, p50=0.087 msec
GET: 554877.38 requests per second, p50=0.087 msec

redis-benchmark on the percpu_ref version:
SET: 540482.06 requests per second, p50=0.087 msec
GET: 570711.12 requests per second, p50=0.079 msec

Cases are like "redis-benchmark -h x.x.x.x -q -t set,get -P 1 -n
5000000 -c 50 -d 10 --threads 4".

Signed-off-by: Kai <KaiShen@linux.alibaba.com>

v1->v2:
- Modify patch prefix

v2->v3:
- Make wr_reg_refcnt a percpu one as well
- Init percpu ref with 0 flag instead of ALLOW_REINIT flag

v3->v4:
- Update performance data, this data may differ from previous data
  as I ran cases on other machines
---
 net/smc/smc_core.h | 10 ++++++++--
 net/smc/smc_wr.c   | 35 ++++++++++++++++++++++++++++-------
 net/smc/smc_wr.h   |  5 ++---
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 08b457c2d294..1645fba0d2d3 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -106,7 +106,10 @@ struct smc_link {
 	unsigned long		*wr_tx_mask;	/* bit mask of used indexes */
 	u32			wr_tx_cnt;	/* number of WR send buffers */
 	wait_queue_head_t	wr_tx_wait;	/* wait for free WR send buf */
-	atomic_t		wr_tx_refcnt;	/* tx refs to link */
+	struct {
+		struct percpu_ref	wr_tx_refs;
+	} ____cacheline_aligned_in_smp;
+	struct completion	tx_ref_comp;
 
 	struct smc_wr_buf	*wr_rx_bufs;	/* WR recv payload buffers */
 	struct ib_recv_wr	*wr_rx_ibs;	/* WR recv meta data */
@@ -122,7 +125,10 @@ struct smc_link {
 
 	struct ib_reg_wr	wr_reg;		/* WR register memory region */
 	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */
-	atomic_t		wr_reg_refcnt;	/* reg refs to link */
+	struct {
+		struct percpu_ref	wr_reg_refs;
+	} ____cacheline_aligned_in_smp;
+	struct completion	reg_ref_comp;
 	enum smc_wr_reg_state	wr_reg_state;	/* state of wr_reg request */
 
 	u8			gid[SMC_GID_SIZE];/* gid matching used vlan id*/
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index b0678a417e09..0021065a600a 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -377,12 +377,11 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
 	if (rc)
 		return rc;
 
-	atomic_inc(&link->wr_reg_refcnt);
+	percpu_ref_get(&link->wr_reg_refs);
 	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
 					      (link->wr_reg_state != POSTED),
 					      SMC_WR_REG_MR_WAIT_TIME);
-	if (atomic_dec_and_test(&link->wr_reg_refcnt))
-		wake_up_all(&link->wr_reg_wait);
+	percpu_ref_put(&link->wr_reg_refs);
 	if (!rc) {
 		/* timeout - terminate link */
 		smcr_link_down_cond_sched(link);
@@ -647,8 +646,10 @@ void smc_wr_free_link(struct smc_link *lnk)
 	smc_wr_wakeup_tx_wait(lnk);
 
 	smc_wr_tx_wait_no_pending_sends(lnk);
-	wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
-	wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
+	percpu_ref_kill(&lnk->wr_reg_refs);
+	wait_for_completion(&lnk->reg_ref_comp);
+	percpu_ref_kill(&lnk->wr_tx_refs);
+	wait_for_completion(&lnk->tx_ref_comp);
 
 	if (lnk->wr_rx_dma_addr) {
 		ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -847,6 +848,20 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev)
 	tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn);
 }
 
+static void smcr_wr_tx_refs_free(struct percpu_ref *ref)
+{
+	struct smc_link *lnk = container_of(ref, struct smc_link, wr_tx_refs);
+
+	complete(&lnk->tx_ref_comp);
+}
+
+static void smcr_wr_reg_refs_free(struct percpu_ref *ref)
+{
+	struct smc_link *lnk = container_of(ref, struct smc_link, wr_reg_refs);
+
+	complete(&lnk->reg_ref_comp);
+}
+
 int smc_wr_create_link(struct smc_link *lnk)
 {
 	struct ib_device *ibdev = lnk->smcibdev->ibdev;
@@ -890,9 +905,15 @@ int smc_wr_create_link(struct smc_link *lnk)
 	smc_wr_init_sge(lnk);
 	bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
 	init_waitqueue_head(&lnk->wr_tx_wait);
-	atomic_set(&lnk->wr_tx_refcnt, 0);
+	rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL);
+	if (rc)
+		goto dma_unmap;
+	init_completion(&lnk->tx_ref_comp);
 	init_waitqueue_head(&lnk->wr_reg_wait);
-	atomic_set(&lnk->wr_reg_refcnt, 0);
+	rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL);
+	if (rc)
+		goto dma_unmap;
+	init_completion(&lnk->reg_ref_comp);
 	init_waitqueue_head(&lnk->wr_rx_empty_wait);
 	return rc;
 
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 45e9b894d3f8..f3008dda222a 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -63,14 +63,13 @@ static inline bool smc_wr_tx_link_hold(struct smc_link *link)
 {
 	if (!smc_link_sendable(link))
 		return false;
-	atomic_inc(&link->wr_tx_refcnt);
+	percpu_ref_get(&link->wr_tx_refs);
 	return true;
 }
 
 static inline void smc_wr_tx_link_put(struct smc_link *link)
 {
-	if (atomic_dec_and_test(&link->wr_tx_refcnt))
-		wake_up_all(&link->wr_tx_wait);
+	percpu_ref_put(&link->wr_tx_refs);
 }
 
 static inline void smc_wr_drain_cq(struct smc_link *lnk)
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference
  2023-03-13  6:04 [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference Kai
@ 2023-03-15  7:34 ` Jakub Kicinski
  2023-03-15  8:27   ` Tony Lu
  2023-03-17  2:44   ` Kai
  0 siblings, 2 replies; 6+ messages in thread
From: Jakub Kicinski @ 2023-03-15  7:34 UTC (permalink / raw)
  To: Kai; +Cc: kgraul, wenjia, jaka, davem, netdev, linux-s390, linux-rdma

On Mon, 13 Mar 2023 06:04:25 +0000 Kai wrote:
> Signed-off-by: Kai <KaiShen@linux.alibaba.com>

Kai Shen ?

> 

You're missing a --- separator here, try to apply this patch with 
git am :/

> v1->v2:
> - Modify patch prefix
> 
> v2->v3:
> - Make wr_reg_refcnt a percpu one as well
> - Init percpu ref with 0 flag instead of ALLOW_REINIT flag
> 
> v3->v4:
> - Update performance data, this data may differ from previous data
>   as I ran cases on other machines
> ---

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference
  2023-03-15  7:34 ` Jakub Kicinski
@ 2023-03-15  8:27   ` Tony Lu
  2023-03-15 20:40     ` Jakub Kicinski
  2023-03-17  2:44   ` Kai
  1 sibling, 1 reply; 6+ messages in thread
From: Tony Lu @ 2023-03-15  8:27 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Kai, kgraul, wenjia, jaka, davem, netdev, linux-s390, linux-rdma

On Wed, Mar 15, 2023 at 12:34:40AM -0700, Jakub Kicinski wrote:
> On Mon, 13 Mar 2023 06:04:25 +0000 Kai wrote:
> > Signed-off-by: Kai <KaiShen@linux.alibaba.com>
> 
> Kai Shen ?
> 
> > 
> 
> You're missing a --- separator here, try to apply this patch with 
> git am :/

There is another commit ce7ca794712f ("net/smc: fix fallback failed
while sendmsg with fastopen") that has been merged that also has this
problem. Maybe we can add some scripts to check this?

> 
> > v1->v2:
> > - Modify patch prefix
> > 
> > v2->v3:
> > - Make wr_reg_refcnt a percpu one as well
> > - Init percpu ref with 0 flag instead of ALLOW_REINIT flag
> > 
> > v3->v4:
> > - Update performance data, this data may differ from previous data
> >   as I ran cases on other machines
> > ---

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference
  2023-03-15  8:27   ` Tony Lu
@ 2023-03-15 20:40     ` Jakub Kicinski
  2023-03-16  6:37       ` Tony Lu
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2023-03-15 20:40 UTC (permalink / raw)
  To: Tony Lu; +Cc: Kai, kgraul, wenjia, jaka, davem, netdev, linux-s390, linux-rdma

On Wed, 15 Mar 2023 16:27:05 +0800 Tony Lu wrote:
> > You're missing a --- separator here, try to apply this patch with 
> > git am :/  
> 
> There is another commit ce7ca794712f ("net/smc: fix fallback failed
> while sendmsg with fastopen") that has been merged that also has this
> problem. Maybe we can add some scripts to check this?

Good idea, checkpatch is probably the right place to complain?
A check along the lines of "if Sign-off-by: has been seen, no
empty lines are allowed until ---"?
Would you be willing to try to code that up and send it to the
checkpatch maintainer? If they refuse we can create a local
check just for networking in our on scripts.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference
  2023-03-15 20:40     ` Jakub Kicinski
@ 2023-03-16  6:37       ` Tony Lu
  0 siblings, 0 replies; 6+ messages in thread
From: Tony Lu @ 2023-03-16  6:37 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Kai, kgraul, wenjia, jaka, davem, netdev, linux-s390, linux-rdma

On Wed, Mar 15, 2023 at 01:40:45PM -0700, Jakub Kicinski wrote:
> On Wed, 15 Mar 2023 16:27:05 +0800 Tony Lu wrote:
> > > You're missing a --- separator here, try to apply this patch with 
> > > git am :/  
> > 
> > There is another commit ce7ca794712f ("net/smc: fix fallback failed
> > while sendmsg with fastopen") that has been merged that also has this
> > problem. Maybe we can add some scripts to check this?
> 
> Good idea, checkpatch is probably the right place to complain?

Agree with you.

> A check along the lines of "if Sign-off-by: has been seen, no
> empty lines are allowed until ---"?

Yes.

> Would you be willing to try to code that up and send it to the
> checkpatch maintainer? If they refuse we can create a local
> check just for networking in our on scripts.

Sure, I will do it.

Thanks,
Tony Lu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference
  2023-03-15  7:34 ` Jakub Kicinski
  2023-03-15  8:27   ` Tony Lu
@ 2023-03-17  2:44   ` Kai
  1 sibling, 0 replies; 6+ messages in thread
From: Kai @ 2023-03-17  2:44 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: kgraul, wenjia, jaka, davem, netdev, linux-s390, linux-rdma



On 3/15/23 3:34 PM, Jakub Kicinski wrote:
> On Mon, 13 Mar 2023 06:04:25 +0000 Kai wrote:
>> Signed-off-by: Kai <KaiShen@linux.alibaba.com>
> 
> Kai Shen ?
> 
>>
> 
> You're missing a --- separator here, try to apply this patch with
> git am :/
> 
>> v1->v2:
>> - Modify patch prefix
>>
>> v2->v3:
>> - Make wr_reg_refcnt a percpu one as well
>> - Init percpu ref with 0 flag instead of ALLOW_REINIT flag
>>
>> v3->v4:
>> - Update performance data, this data may differ from previous data
>>    as I ran cases on other machines
>> ---
Will fix, thanks

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-03-17  2:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-13  6:04 [PATCH net-next v4] net/smc: Use percpu ref for wr tx reference Kai
2023-03-15  7:34 ` Jakub Kicinski
2023-03-15  8:27   ` Tony Lu
2023-03-15 20:40     ` Jakub Kicinski
2023-03-16  6:37       ` Tony Lu
2023-03-17  2:44   ` Kai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).