All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
	michael.chan@broadcom.com, Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net-next v2 3/3] eth: bnxt: handle invalid Tx completions more gracefully
Date: Wed, 19 Jul 2023 18:04:40 -0700	[thread overview]
Message-ID: <20230720010440.1967136-4-kuba@kernel.org> (raw)
In-Reply-To: <20230720010440.1967136-1-kuba@kernel.org>

Invalid Tx completions should never happen (tm) but when they do
they crash the host, because driver blindly trusts that there is
a valid skb pointer on the ring.

The completions I've seen appear to be some form of FW / HW
miscalculation or staleness, they have typical (small) values
(<100), but they are most often higher than number of queued
descriptors. They usually happen after boot.

Instead of crashing, print a warning and schedule a reset.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
--
v2:
 - factor out the reset scheduling to a helper
 - add the check in XDP as well
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 25 ++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  3 +++
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |  4 +++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7b545d2a98b4..a3bbd13c070f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -331,6 +331,22 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 	rxr->rx_next_cons = 0xffff;
 }
 
+void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+			  int idx)
+{
+	struct bnxt_napi *bnapi = txr->bnapi;
+
+	if (bnapi->tx_fault)
+		return;
+
+	netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
+		   txr->txq_index, bnapi->tx_pkts,
+		   txr->tx_cons, txr->tx_prod, idx);
+	WARN_ON_ONCE(1);
+	bnapi->tx_fault = 1;
+	bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
+}
+
 const u16 bnxt_lhint_arr[] = {
 	TX_BD_FLAGS_LHINT_512_AND_SMALLER,
 	TX_BD_FLAGS_LHINT_512_TO_1023,
@@ -690,6 +706,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 		skb = tx_buf->skb;
 		tx_buf->skb = NULL;
 
+		if (unlikely(!skb)) {
+			bnxt_sched_reset_txr(bp, txr, i);
+			return;
+		}
+
 		tx_bytes += skb->len;
 
 		if (tx_buf->is_push) {
@@ -2576,7 +2597,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
 {
-	if (bnapi->tx_pkts) {
+	if (bnapi->tx_pkts && !bnapi->tx_fault) {
 		bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
 		bnapi->tx_pkts = 0;
 	}
@@ -9429,6 +9450,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr;
 
+		bnapi->tx_fault = 0;
+
 		cpr = &bnapi->cp_ring;
 		if (bnapi->in_reset)
 			cpr->sw_stats.rx.rx_resets++;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 080e73496066..9d16757e27fe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1008,6 +1008,7 @@ struct bnxt_napi {
 					  int);
 	int			tx_pkts;
 	u8			events;
+	u8			tx_fault:1;
 
 	u32			flags;
 #define BNXT_NAPI_FLAG_XDP	0x1
@@ -2329,6 +2330,8 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
+void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+			  int idx);
 void bnxt_report_link(struct bnxt *bp);
 int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
 int bnxt_hwrm_set_pause(struct bnxt *);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 4efa5fe6972b..5b6fbdc4dc40 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -149,6 +149,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 			tx_buf->action = 0;
 			tx_buf->xdpf = NULL;
 		} else if (tx_buf->action == XDP_TX) {
+			tx_buf->action = 0;
 			rx_doorbell_needed = true;
 			last_tx_cons = tx_cons;
 
@@ -158,6 +159,9 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 				tx_buf = &txr->tx_buf_ring[tx_cons];
 				page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
 			}
+		} else {
+			bnxt_sched_reset_txr(bp, txr, i);
+			return;
 		}
 		tx_cons = NEXT_TX(tx_cons);
 	}
-- 
2.41.0


  parent reply	other threads:[~2023-07-20  1:05 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-20  1:04 [PATCH net-next v2 0/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
2023-07-20  1:04 ` [PATCH net-next v2 1/3] eth: bnxt: move and rename reset helpers Jakub Kicinski
2023-07-20  1:04 ` [PATCH net-next v2 2/3] eth: bnxt: take the bit to set as argument of bnxt_queue_sp_work() Jakub Kicinski
2023-07-20  1:04 ` Jakub Kicinski [this message]
2023-07-20  6:59 ` [PATCH net-next v2 0/3] eth: bnxt: handle invalid Tx completions more gracefully Michael Chan
2023-07-21  3:30 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230720010440.1967136-4-kuba@kernel.org \
    --to=kuba@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=michael.chan@broadcom.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.