netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Maxim Mikityanskiy <maximmi@nvidia.com>
Subject: [PATCH net-next 04/15] net/mlx5e: xsk: Improve need_wakeup logic
Date: Sat,  1 Oct 2022 21:56:21 -0700	[thread overview]
Message-ID: <20221002045632.291612-5-saeed@kernel.org> (raw)
In-Reply-To: <20221002045632.291612-1-saeed@kernel.org>

From: Maxim Mikityanskiy <maximmi@nvidia.com>

XSK need_wakeup mechanism allows the driver to stop busy waiting for
buffers when the fill ring is empty, yield to the application and signal
it that the driver needs to be waken up after the application refills
the fill ring.

Add protection against the race condition on the RX (refill) side: if
the application refills buffers after xskrq->post_wqes is called, but
before mlx5e_xsk_update_rx_wakeup, NAPI will exit, skipping taking these
buffers to the hardware WQ, and the application won't wake it up again.

Optimize the whole need_wakeup logic, removing unneeded flows, to
compensate for this new check.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/xsk/rx.h   | 14 --------
 .../ethernet/mellanox/mlx5/core/en/xsk/tx.h   | 12 -------
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |  1 +
 .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 33 ++++++++++++-------
 4 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index 84a496a8d72f..087c943bd8e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,7 +5,6 @@
 #define __MLX5_EN_XSK_RX_H__
 
 #include "en.h"
-#include <net/xdp_sock_drv.h>
 
 /* RX data path */
 
@@ -21,17 +20,4 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 					      struct mlx5e_wqe_frag_info *wi,
 					      u32 cqe_bcnt);
 
-static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
-{
-	if (!xsk_uses_need_wakeup(rq->xsk_pool))
-		return alloc_err;
-
-	if (unlikely(alloc_err))
-		xsk_set_rx_need_wakeup(rq->xsk_pool);
-	else
-		xsk_clear_rx_need_wakeup(rq->xsk_pool);
-
-	return false;
-}
-
 #endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index a05085035f23..9c505158b975 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,6 @@
 #define __MLX5_EN_XSK_TX_H__
 
 #include "en.h"
-#include <net/xdp_sock_drv.h>
 
 /* TX data path */
 
@@ -13,15 +12,4 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
 
 bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
 
-static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
-{
-	if (!xsk_uses_need_wakeup(sq->xsk_pool))
-		return;
-
-	if (sq->pc != sq->cc)
-		xsk_clear_tx_need_wakeup(sq->xsk_pool);
-	else
-		xsk_set_tx_need_wakeup(sq->xsk_pool);
-}
-
 #endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5835d86be8d8..b61604d87701 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -41,6 +41,7 @@
 #include <net/gro.h>
 #include <net/udp.h>
 #include <net/tcp.h>
+#include <net/xdp_sock_drv.h>
 #include "en.h"
 #include "en/txrx.h"
 #include "en_tc.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 833be29170a1..9a458a5d9853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/irq.h>
+#include <net/xdp_sock_drv.h>
 #include "en.h"
 #include "en/txrx.h"
 #include "en/xdp.h"
@@ -86,26 +87,36 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
 
 static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
 {
+	bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool);
 	bool busy_xsk = false, xsk_rx_alloc_err;
 
-	/* Handle the race between the application querying need_wakeup and the
-	 * driver setting it:
-	 * 1. Update need_wakeup both before and after the TX. If it goes to
-	 * "yes", it can only happen with the first update.
-	 * 2. If the application queried need_wakeup before we set it, the
-	 * packets will be transmitted anyway, even w/o a wakeup.
-	 * 3. Give a chance to clear need_wakeup after new packets were queued
-	 * for TX.
+	/* If SQ is empty, there are no TX completions to trigger NAPI, so set
+	 * need_wakeup. Do it before queuing packets for TX to avoid race
+	 * condition with userspace.
 	 */
-	mlx5e_xsk_update_tx_wakeup(xsksq);
+	if (need_wakeup && xsksq->pc == xsksq->cc)
+		xsk_set_tx_need_wakeup(xsksq->xsk_pool);
 	busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
-	mlx5e_xsk_update_tx_wakeup(xsksq);
+	/* If we queued some packets for TX, no need for wakeup anymore. */
+	if (need_wakeup && xsksq->pc != xsksq->cc)
+		xsk_clear_tx_need_wakeup(xsksq->xsk_pool);
 
+	/* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it
+	 * before refilling to avoid race condition with userspace.
+	 */
+	if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq))
+		xsk_set_rx_need_wakeup(xskrq->xsk_pool);
 	xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes,
 					   mlx5e_post_rx_mpwqes,
 					   mlx5e_post_rx_wqes,
 					   xskrq);
-	busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err);
+	/* Ask for wakeup if WQ is not full after refill. */
+	if (!need_wakeup)
+		busy_xsk |= xsk_rx_alloc_err;
+	else if (xsk_rx_alloc_err)
+		xsk_set_rx_need_wakeup(xskrq->xsk_pool);
+	else
+		xsk_clear_rx_need_wakeup(xskrq->xsk_pool);
 
 	return busy_xsk;
 }
-- 
2.37.3


  parent reply	other threads:[~2022-10-02  5:14 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-02  4:56 [PATCH net-next 00/15] ] mlx5 xsk updates part4 and more Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 01/15] net/mlx5e: xsk: Flush RQ on XSK activation to save memory Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 02/15] net/mlx5e: xsk: Set napi_id to support busy polling Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 03/15] net/mlx5e: xsk: Include XSK skb_from_cqe callbacks in INDIRECT_CALL Saeed Mahameed
2022-10-02  4:56 ` Saeed Mahameed [this message]
2022-10-02  4:56 ` [PATCH net-next 05/15] net/mlx5e: xsk: Use umr_mode to calculate striding RQ parameters Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 06/15] net/mlx5e: Improve MTT/KSM alignment Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 07/15] net/mlx5e: xsk: Use KLM to protect frame overrun in unaligned mode Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 08/15] net/mlx5e: xsk: Print a warning in slow configurations Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 09/15] net/mlx5e: xsk: Optimize for unaligned mode with 3072-byte frames Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 10/15] net/mlx5e: Expose rx_oversize_pkts_buffer counter Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 11/15] net/mlx5: Start health poll at earlier stage of driver load Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 12/15] net/mlx5: Set default grace period based on function type Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 13/15] net/mlx5: E-Switch, Allow offloading fwd dest flow table with vport Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 14/15] net/mlx5: E-switch, Don't update group if qos is not enabled Saeed Mahameed
2022-10-02  4:56 ` [PATCH net-next 15/15] net/mlx5: E-Switch, Return EBUSY if can't get mode lock Saeed Mahameed
2022-10-04  0:10 ` [PATCH net-next 00/15] ] mlx5 xsk updates part4 and more patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221002045632.291612-5-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=maximmi@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).