public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
* [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
@ 2026-01-23  3:16 Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
                   ` (7 more replies)
  0 siblings, 8 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23  3:16 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao

Includes fixes for mbuf/memzone leaks and mailbox state handling.

Dimon Zhao (4):
  net/nbl: fix memzone leak in queue release
  net/nbl: optimize mbuf headroom usage in packet transmission
  net/nbl: fix mbuf double-free in queue cleanup
  net/nbl: improve exception handling for the mailbox

 drivers/net/nbl/nbl_dev/nbl_dev.c     |   2 +-
 drivers/net/nbl/nbl_hw/nbl_channel.c  |  94 ++++++++++++++++----
 drivers/net/nbl/nbl_hw/nbl_channel.h  |  11 ++-
 drivers/net/nbl/nbl_hw/nbl_resource.h |   2 +
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 122 +++++++++++++++++++-------
 drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |   2 +-
 6 files changed, 179 insertions(+), 54 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 1/4] net/nbl: fix memzone leak in queue release
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
@ 2026-01-23  3:16 ` Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission Dimon Zhao
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23  3:16 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Add memzone free calls in tx_queue_release() and rx_queue_release()
to fix memory leaks. The memzones allocated in tx_queue_setup() and
rx_queue_setup() were not being freed when queues were released.

Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_resource.h |  2 ++
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 44 ++++++++++++++++-----------
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index 1f6515f64d..469c3f5827 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -137,6 +137,7 @@ struct nbl_res_tx_ring {
 	volatile struct nbl_packed_desc *desc;
 	struct nbl_tx_entry *tx_entry;
 	const struct rte_memzone *net_hdr_mz;
+	const struct rte_memzone *desc_mz;
 	volatile uint8_t *notify;
 	const struct rte_eth_dev *eth_dev;
 	struct nbl_common_info *common;
@@ -178,6 +179,7 @@ struct nbl_res_tx_ring {
 struct nbl_res_rx_ring {
 	volatile struct nbl_packed_desc *desc;
 	struct nbl_rx_entry *rx_entry;
+	const struct rte_memzone *desc_mz;
 	struct rte_mempool *mempool;
 	volatile uint8_t *notify;
 	const struct rte_eth_dev *eth_dev;
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 650790b4fc..103f56c3a7 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -71,7 +71,12 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
 	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
 	if (!tx_ring)
 		return;
-	rte_free(tx_ring->tx_entry);
+	if (tx_ring->tx_entry)
+		rte_free(tx_ring->tx_entry);
+	if (tx_ring->net_hdr_mz)
+		rte_memzone_free(tx_ring->net_hdr_mz);
+	if (tx_ring->desc_mz)
+		rte_memzone_free(tx_ring->desc_mz);
 	rte_free(tx_ring);
 	txrx_mgt->tx_rings[queue_idx] = NULL;
 }
@@ -104,7 +109,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 
 	if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
 		NBL_LOG(WARNING, "re-setup an already allocated tx queue");
-		nbl_res_txrx_stop_tx_ring(priv, param->queue_idx);
+		nbl_res_txrx_release_tx_ring(priv, param->queue_idx);
 		eth_dev->data->tx_queues[param->queue_idx] = NULL;
 	}
 
@@ -173,6 +178,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 	tx_ring->next_to_use = 0;
 	tx_ring->desc = (struct nbl_packed_desc *)memzone->addr;
 	tx_ring->net_hdr_mz = net_hdr_mz;
+	tx_ring->desc_mz = memzone;
 	tx_ring->eth_dev = eth_dev;
 	tx_ring->dma_set_msb = common->dma_set_msb;
 	tx_ring->dma_limit_msb = common->dma_limit_msb;
@@ -226,6 +232,23 @@ static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
 	rx_ring->next_to_use = 0;
 }
 
+static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
+{
+	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
+	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
+	struct nbl_res_rx_ring *rx_ring =
+			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+	if (!rx_ring)
+		return;
+
+	if (rx_ring->rx_entry)
+		rte_free(rx_ring->rx_entry);
+	if (rx_ring->desc_mz)
+		rte_memzone_free(rx_ring->desc_mz);
+	rte_free(rx_ring);
+	txrx_mgt->rx_rings[queue_idx] = NULL;
+}
+
 static int nbl_res_txrx_start_rx_ring(void *priv,
 				      struct nbl_start_rx_ring_param *param,
 				      u64 *dma_addr)
@@ -244,7 +267,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
 
 	if (eth_dev->data->rx_queues[param->queue_idx] != NULL) {
 		NBL_LOG(WARNING, "re-setup an already allocated rx queue");
-		nbl_res_txrx_stop_rx_ring(priv, param->queue_idx);
+		nbl_res_txrx_release_rx_ring(priv, param->queue_idx);
 		eth_dev->data->rx_queues[param->queue_idx] = NULL;
 	}
 
@@ -275,6 +298,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
 
 	rx_ring->product = param->product;
 	rx_ring->mempool = param->mempool;
+	rx_ring->desc_mz = memzone;
 	rx_ring->nb_desc = param->nb_desc;
 	rx_ring->queue_id = param->queue_idx;
 	rx_ring->notify_qid =
@@ -376,20 +400,6 @@ static int nbl_res_alloc_rx_bufs(void *priv, u16 queue_idx)
 	return 0;
 }
 
-static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
-{
-	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
-	struct nbl_res_rx_ring *rx_ring =
-			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
-	if (!rx_ring)
-		return;
-
-	rte_free(rx_ring->rx_entry);
-	rte_free(rx_ring);
-	txrx_mgt->rx_rings[queue_idx] = NULL;
-}
-
 static void nbl_res_txrx_update_rx_ring(void *priv, u16 index)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
@ 2026-01-23  3:16 ` Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23  3:16 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Only use mbuf headroom when mbuf->refcnt == 1,
indicating exclusive ownership. For shared mbufs (refcnt > 1),
use next desc instead to avoid data corruption.

This prevents modifying shared buffers that may be used by other
processing contexts.

Implementation details:
- Check mbuf->refcnt before using headroom
- For refcnt == 1: use existing headroom (performance optimal)
- For refcnt > 1: use next desc

Fixes: e5fc1f78c78c ("net/nbl: support Tx and Rx burst")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_txrx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 103f56c3a7..e906f0707e 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -514,7 +514,10 @@ nbl_res_txrx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts, u
 			tx_extend_len = required_headroom + sizeof(struct rte_ether_hdr);
 		}
 
-		if (rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
+		if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
+		    RTE_MBUF_DIRECT(tx_pkt) &&
+		    tx_pkt->nb_segs == 1 &&
+		    rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
 			can_push = 1;
 			u = rte_pktmbuf_mtod_offset(tx_pkt, union nbl_tx_extend_head *,
 						    -required_headroom);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission Dimon Zhao
@ 2026-01-23  3:16 ` Dimon Zhao
  2026-01-23  3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23  3:16 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Only free hardware-owned mbufs to avoid double-free
corruption of application-owned buffers.

TX ring: free only transmitted mbufs (next_to_clean to next_to_use).
RX ring: free only allocated receive buffers
	 (next_to_clean to next_to_use),
	 mbufs already delivered to the application (before next_to_clean)
	 must not be touched.

Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_dev/nbl_dev.c     |  2 +-
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 73 ++++++++++++++++++++++-----
 drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |  2 +-
 3 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/drivers/net/nbl/nbl_dev/nbl_dev.c b/drivers/net/nbl/nbl_dev/nbl_dev.c
index edff61e52e..2b0413fb7c 100644
--- a/drivers/net/nbl/nbl_dev/nbl_dev.c
+++ b/drivers/net/nbl/nbl_dev/nbl_dev.c
@@ -328,8 +328,8 @@ int nbl_dev_port_stop(struct rte_eth_dev *eth_dev)
 	rte_delay_ms(NBL_SAFE_THREADS_WAIT_TIME);
 
 	nbl_dev_hw_stats_stop(eth_dev);
-	nbl_clear_queues(eth_dev);
 	nbl_dev_txrx_stop(eth_dev);
+	nbl_clear_queues(eth_dev);
 	nbl_userdev_port_config(adapter, NBL_KERNEL_NETWORK);
 	return 0;
 }
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index e906f0707e..a90b0debc5 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -44,19 +44,43 @@ static void nbl_res_txrx_stop_tx_ring(void *priv, u16 queue_idx)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
-	int i;
+	struct nbl_tx_entry *tx_entry;
+	u16 i, free_cnt, free_mbuf_cnt;
 
 	if (!tx_ring)
 		return;
 
-	for (i = 0; i < tx_ring->nb_desc; i++) {
-		if (tx_ring->tx_entry[i].mbuf != NULL) {
-			rte_pktmbuf_free_seg(tx_ring->tx_entry[i].mbuf);
-			memset(&tx_ring->tx_entry[i], 0, sizeof(*tx_ring->tx_entry));
+	i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
+	free_cnt = tx_ring->vq_free_cnt;
+	tx_entry = &tx_ring->tx_entry[i];
+	free_mbuf_cnt = 0;
+
+	while (free_cnt < tx_ring->nb_desc) {
+		if (tx_entry->mbuf) {
+			free_mbuf_cnt++;
+			rte_pktmbuf_free_seg(tx_entry->mbuf);
+		}
+
+		i++;
+		tx_entry++;
+		free_cnt++;
+		if (i == tx_ring->nb_desc) {
+			i = 0;
+			tx_entry = &tx_ring->tx_entry[i];
 		}
+	}
+
+	NBL_LOG(DEBUG, "stop tx ring ntc %u, ntu %u, vq_free_cnt %u, mbuf free_cnt %u",
+		tx_ring->next_to_clean, tx_ring->next_to_use, tx_ring->vq_free_cnt, free_mbuf_cnt);
+
+	for (i = 0; i < tx_ring->nb_desc; i++) {
+		tx_ring->desc[i].addr = 0;
+		tx_ring->desc[i].len = 0;
+		tx_ring->desc[i].id = 0;
 		tx_ring->desc[i].flags = 0;
 	}
 
+	memset(tx_ring->tx_entry, 0, sizeof(*tx_entry) * tx_ring->nb_desc);
 	tx_ring->avail_used_flags = NBL_PACKED_DESC_F_AVAIL_BIT;
 	tx_ring->used_wrap_counter = 1;
 	tx_ring->next_to_clean = NBL_TX_RS_THRESH - 1;
@@ -209,25 +233,46 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-	struct nbl_res_rx_ring *rx_ring =
-			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
-	u16 i;
+	struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+	struct nbl_rx_entry *rx_buf;
+	u16 i, free_cnt, free_mbuf_cnt;
 
 	if (!rx_ring)
 		return;
+
+	i = rx_ring->next_to_clean;
+	free_cnt = rx_ring->vq_free_cnt;
+	free_mbuf_cnt = 0;
+
 	if (rx_ring->rx_entry != NULL) {
-		for (i = 0; i < rx_ring->nb_desc; i++) {
-			if (rx_ring->rx_entry[i].mbuf != NULL) {
-				rte_pktmbuf_free_seg(rx_ring->rx_entry[i].mbuf);
-				rx_ring->rx_entry[i].mbuf = NULL;
+		rx_buf = &rx_ring->rx_entry[i];
+		while (free_cnt < rx_ring->nb_desc) {
+			if (rx_buf->mbuf) {
+				free_mbuf_cnt++;
+				rte_pktmbuf_free_seg(rx_buf->mbuf);
+			}
+
+			i++;
+			rx_buf++;
+			free_cnt++;
+			if (i == rx_ring->nb_desc) {
+				i = 0;
+				rx_buf = &rx_ring->rx_entry[i];
 			}
-			rx_ring->desc[i].flags = 0;
 		}
 
-		for (i = rx_ring->nb_desc; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++)
+		memset(rx_ring->rx_entry, 0, sizeof(struct nbl_rx_entry) * rx_ring->nb_desc);
+
+		for (i = 0; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++) {
+			rx_ring->desc[i].addr = 0;
+			rx_ring->desc[i].len = 0;
+			rx_ring->desc[i].id = 0;
 			rx_ring->desc[i].flags = 0;
+		}
 	}
 
+	NBL_LOG(DEBUG, "stop rx ring ntc %u, ntu %u, vq_free_cnt %u, free_mbuf_cnt %u",
+		rx_ring->next_to_clean, rx_ring->next_to_use, rx_ring->vq_free_cnt, free_mbuf_cnt);
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 }
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
index 2ab4b09683..dc0bab2b7b 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
@@ -43,7 +43,7 @@ nbl_tx_free_bufs(struct nbl_res_tx_ring *txq)
 	if (!desc_is_used(&txq->desc[next_to_clean], txq->used_wrap_counter))
 		return 0;
 
-	n = 32;
+	n = NBL_TX_RS_THRESH;
 
 	 /* first buffer to free from S/W ring is at index
 	  * tx_next_dd - (tx_rs_thresh-1)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                   ` (2 preceding siblings ...)
  2026-01-23  3:16 ` [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
@ 2026-01-23  3:16 ` Dimon Zhao
  2026-01-24 18:31   ` [REVIEW] " Stephen Hemminger
  2026-01-23  4:36 ` [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23  3:16 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
   reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
   prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
   prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
   is found to be in use or waiting for an ACK during transmission,
   the message will not be sent.

Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
 drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
 2 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..75e4354e18 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -22,6 +22,7 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
 {
 	struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
 	size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+	int i;
 
 	txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
 	if (!txq->desc) {
@@ -36,6 +37,10 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
 		goto req_wait_queue_failed;
 	}
 
+	for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+		rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+					  rte_memory_order_relaxed);
+
 	size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
 	txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
 	if (!txq->buf) {
@@ -253,24 +258,55 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
 	uint32_t ack_msgid;
 	uint32_t ack_msgtype;
 	uint32_t copy_len;
+	int status = NBL_MBX_STATUS_WAITING;
 
 	chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
 	ack_msgtype = *payload;
 	ack_msgid = *(payload + 1);
+
+	if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+		NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+		return;
+	}
+
 	wait_head = &chan_info->mailbox.wait[ack_msgid];
+	if (wait_head->msg_type != ack_msgtype) {
+		NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+			ack_msgid, ack_msgtype, wait_head->msg_type);
+		return;
+	}
+
+	if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+							 NBL_MBX_STATUS_ACKING,
+							 rte_memory_order_acq_rel,
+							 rte_memory_order_relaxed)) {
+		NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+			wait_head->status, ack_msgid, ack_msgtype);
+		return;
+	}
+
 	wait_head->ack_err = *(payload + 2);
 
 	NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
-		" data_len:%u, ack_data_len:%u",
-		srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+		" data_len:%u, ack_data_len:%u, ack_err:%d",
+		srcid, ack_msgtype, msgid, ack_msgid, data_len,
+		wait_head->ack_data_len, wait_head->ack_err);
 
 	if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
 		/* the mailbox msg parameter structure may change */
+		if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+			NBL_LOG(INFO, "payload_len do not match ack_len!,"
+				" srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+				" data_len:%u, ack_data_len:%u",
+				srcid, ack_msgtype, msgid,
+				ack_msgid, data_len, wait_head->ack_data_len);
 		copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
-		memcpy(wait_head->ack_data, payload + 3, copy_len);
+		if (copy_len)
+			memcpy(wait_head->ack_data, payload + 3, copy_len);
 	}
 
-	rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+	rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+				  rte_memory_order_release);
 }
 
 static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
@@ -371,12 +407,27 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
 {
 	struct nbl_chan_ring *txq;
 	struct nbl_chan_tx_desc *tx_desc;
+	struct nbl_chan_waitqueue_head *wait_head;
 	uint64_t pa;
 	void *va;
 	uint16_t next_to_use;
+	int status;
+
+	if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+		NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+		return 0xFFFF;
+	}
 
 	txq = &chan_info->mailbox.txq;
 	next_to_use = txq->next_to_use;
+
+	wait_head = &chan_info->mailbox.wait[next_to_use];
+	status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+	if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+		NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+		return 0xFFFF;
+	}
+
 	va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
 	pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
 	tx_desc = NBL_CHAN_TX_DESC(txq, next_to_use);
@@ -384,10 +435,6 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
 	tx_desc->dstid = dstid;
 	tx_desc->msg_type = msg_type;
 	tx_desc->msgid = next_to_use;
-	if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
-		NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
-		return -1;
-	}
 
 	if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
 		memcpy(va, arg, arg_len);
@@ -418,6 +465,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 	uint16_t msgid;
 	int ret;
 	int retry_time = 0;
+	int status = NBL_MBX_STATUS_WAITING;
 
 	if (chan_mgt->state)
 		return -EIO;
@@ -431,8 +479,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 
 	if (msgid == 0xFFFF) {
 		rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
-		NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
-			" to dstid:%u failed",
+		NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
 			chan_send->msg_type, chan_send->dstid);
 		return -ECOMM;
 	}
@@ -446,23 +493,34 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 	wait_head = &chan_info->mailbox.wait[msgid];
 	wait_head->ack_data = chan_send->resp;
 	wait_head->ack_data_len = chan_send->resp_len;
-	rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
 	wait_head->msg_type = chan_send->msg_type;
-	rte_wmb();
+	rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+				  rte_memory_order_release);
 	nbl_chan_kick_tx_ring(chan_mgt, chan_info);
 	rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
 
 	while (1) {
-		if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
-			return wait_head->ack_err;
+		if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+		    NBL_MBX_STATUS_ACKED) {
+			ret = wait_head->ack_err;
+			rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+						  rte_memory_order_release);
+			return ret;
+		}
 
 		rte_delay_us(50);
 		retry_time++;
-		if (retry_time > NBL_CHAN_RETRY_TIMES)
-			return -EIO;
+		if (retry_time > NBL_CHAN_RETRY_TIMES &&
+		    rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+								&status, NBL_MBX_STATUS_TIMEOUT,
+								rte_memory_order_acq_rel,
+								rte_memory_order_relaxed)) {
+			NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+				chan_send->msg_type, msgid, chan_send->dstid);
+			break;
+		}
 	}
-
-	return 0;
+	return -EIO;
 }
 
 static int nbl_chan_send_ack(void *priv, struct nbl_chan_ack_info *chan_ack)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -41,6 +41,14 @@ enum {
 	NBL_MB_TX_QID = 1,
 };
 
+enum {
+	NBL_MBX_STATUS_IDLE = 0,
+	NBL_MBX_STATUS_WAITING,
+	NBL_MBX_STATUS_ACKING,
+	NBL_MBX_STATUS_ACKED,
+	NBL_MBX_STATUS_TIMEOUT,
+};
+
 struct __rte_packed_begin nbl_chan_tx_desc {
 	uint16_t flags;
 	uint16_t srcid;
@@ -74,8 +82,7 @@ struct nbl_chan_ring {
 
 struct nbl_chan_waitqueue_head {
 	char *ack_data;
-	RTE_ATOMIC(int)
-		acked;
+	RTE_ATOMIC(int) status;
 	int ack_err;
 	uint16_t ack_data_len;
 	uint16_t msg_type;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                   ` (3 preceding siblings ...)
  2026-01-23  3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
@ 2026-01-23  4:36 ` Stephen Hemminger
  2026-01-24 19:08 ` Stephen Hemminger
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-23  4:36 UTC (permalink / raw)
  To: Dimon Zhao; +Cc: dev

On Thu, 22 Jan 2026 19:16:23 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:

> Includes fixes for mbuf/memzone leaks and mailbox state handling.
> 
> Dimon Zhao (4):
>   net/nbl: fix memzone leak in queue release
>   net/nbl: optimize mbuf headroom usage in packet transmission
>   net/nbl: fix mbuf double-free in queue cleanup
>   net/nbl: improve exception handling for the mailbox
> 
>  drivers/net/nbl/nbl_dev/nbl_dev.c     |   2 +-
>  drivers/net/nbl/nbl_hw/nbl_channel.c  |  94 ++++++++++++++++----
>  drivers/net/nbl/nbl_hw/nbl_channel.h  |  11 ++-
>  drivers/net/nbl/nbl_hw/nbl_resource.h |   2 +
>  drivers/net/nbl/nbl_hw/nbl_txrx.c     | 122 +++++++++++++++++++-------
>  drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |   2 +-
>  6 files changed, 179 insertions(+), 54 deletions(-)
> 

This is the result of AI code review of this series.
Note: AI can and does flag things that are not a problem, do not blindly follow

# Code Review: DPDK Patch Bundle for net/nbl Driver

## Overview
This patch series contains 4 patches fixing various issues in the net/nbl driver: memzone leaks, mbuf headroom usage, double-free issues, and mailbox exception handling.

---

## Patch 1/4: fix memzone leak in queue release

### Errors

1. **Unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`**
   - File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`, lines 74-79, 245-248
   - `rte_free()` and `rte_memzone_free()` safely handle NULL pointers
   ```c
   // Current (unnecessary checks):
   if (tx_ring->tx_entry)
       rte_free(tx_ring->tx_entry);
   if (tx_ring->net_hdr_mz)
       rte_memzone_free(tx_ring->net_hdr_mz);
   
   // Should be:
   rte_free(tx_ring->tx_entry);
   rte_memzone_free(tx_ring->net_hdr_mz);
   rte_memzone_free(tx_ring->desc_mz);
   ```

### Warnings

1. **Missing blank line between declarations and statements**
   - File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`, function `nbl_res_txrx_release_rx_ring()`
   ```c
   struct nbl_res_rx_ring *rx_ring =
           NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
   if (!rx_ring)  // Missing blank line before this
       return;
   ```

### Info

1. The fix correctly addresses the memzone leak by storing memzone pointers and freeing them on release.

---

## Patch 2/4: optimize mbuf headroom usage in packet transmission

### Errors

None.

### Warnings

1. **Subject line could be clearer about the fix nature**
   - The subject says "optimize" but this is actually a **bug fix** that prevents data corruption
   - Consider: `net/nbl: fix data corruption in shared mbuf transmission`

2. **Commit body mentions "Fixes" tag but issue is more of a safety improvement**
   - The `Fixes:` tag is appropriate since this prevents potential data corruption

### Info

1. The logic change is correct - checking `refcnt == 1`, `RTE_MBUF_DIRECT()`, and `nb_segs == 1` before modifying headroom ensures exclusive ownership.

---

## Patch 3/4: fix mbuf double-free in queue cleanup

### Errors

1. **Missing blank line between declaration and code**
   - File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`, function `nbl_res_txrx_stop_tx_ring()`
   ```c
   struct nbl_tx_entry *tx_entry;
   u16 i, free_cnt, free_mbuf_cnt;

   if (!tx_ring)  // Need blank line before this
       return;
   ```

2. **Inconsistent use of types**
   - Mix of `u16` and `uint16_t` - DPDK prefers standard C types (`uint16_t`)
   - File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`
   ```c
   u16 i, free_cnt, free_mbuf_cnt;  // Should use uint16_t
   ```

### Warnings

1. **Subject line formatting**
   - "queue cleanup" is vague; consider more specific wording
   - Suggest: `net/nbl: fix mbuf double-free on queue stop`

2. **Commit body formatting issue - tab in description**
   - Line: `(next_to_clean to next_to_use),` followed by tab-indented text
   - Should use consistent formatting in commit body

3. **Magic numbers in code**
   - The change from `n = 32` to `n = NBL_TX_RS_THRESH` is good, but ensure the macro is defined and documented

### Info

1. The reordering of `nbl_clear_queues()` and `nbl_dev_txrx_stop()` in `nbl_dev_port_stop()` makes sense for proper cleanup ordering.

---

## Patch 4/4: improve exception handling for the mailbox

### Errors

1. **Line length exceeds 100 characters**
   - File: `drivers/net/nbl/nbl_hw/nbl_channel.c`, line 299-302:
   ```c
   NBL_LOG(INFO, "payload_len do not match ack_len!,"
           " srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
           " data_len:%u, ack_data_len:%u",
   ```
   - Some of these concatenated strings may exceed 100 chars when combined with indentation

2. **Grammar issue in log message**
   - `"payload_len do not match ack_len!"` should be `"payload_len does not match ack_len"`

3. **Missing blank line after declarations**
   - File: `drivers/net/nbl/nbl_hw/nbl_channel.c`, function `nbl_chan_init_tx_queue()`:
   ```c
   struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
   size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
   int i;

   txq->desc = nbl_alloc_dma_mem(...);  // OK - blank line present
   ```
   - But later addition lacks proper spacing context

### Warnings

1. **Subject line could be more specific**
   - "improve exception handling for the mailbox" is vague
   - Consider: `net/nbl: add mailbox state machine for reliability`

2. **Use of `int` for status variable that's compared against enum**
   - File: `drivers/net/nbl/nbl_hw/nbl_channel.c`, line 260, 412:
   ```c
   int status = NBL_MBX_STATUS_WAITING;
   ```
   - Consider using a proper enum type or at least adding a comment

3. **Inconsistent initialization location**
   - `int status = NBL_MBX_STATUS_WAITING` initialized at declaration in one place, but used with different initial values in different contexts

4. **Dead code path concern**
   - In `nbl_chan_send_msg()`, if `rte_atomic_compare_exchange_strong_explicit` fails after timeout, the loop continues but `retry_time` keeps increasing, leading to repeated timeout checks

### Info

1. The state machine approach (`IDLE`, `WAITING`, `ACKING`, `ACKED`, `TIMEOUT`) is a significant improvement over the simple `acked` flag.

2. The validation of message ID and type before processing ACKs is good defensive programming.

---

## General Issues Across All Patches

### Warnings

1. **Inconsistent type usage**
   - Mix of `u8`, `u16`, `u64` (kernel-style) and `uint8_t`, `uint16_t`, `uint64_t` (C99 standard)
   - DPDK generally prefers C99 standard types

2. **Missing documentation updates**
   - No updates to release notes for bug fixes that may affect users
   - Consider adding entries to `doc/guides/rel_notes/` for these fixes

### Info

1. All patches have proper `Fixes:` tags with 12-character SHAs
2. All patches have `Cc: stable@dpdk.org` appropriately
3. All patches have valid `Signed-off-by:` lines
4. Subject lines are within 60 character limit
5. SPDX headers are not modified (existing files)

---

## Summary

| Severity | Count |
|----------|-------|
| Error | 5 |
| Warning | 11 |
| Info | 6 |

**Recommended actions:**
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
2. Add missing blank lines between declarations and statements
3. Use consistent C99 types (`uint16_t` instead of `u16`)
4. Fix grammar in log messages
5. Consider more specific subject lines for patches 2, 3, and 4
6. Verify line lengths don't exceed 100 characters in long log statements

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [REVIEW] net/nbl: improve exception handling for the mailbox
  2026-01-23  3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
@ 2026-01-24 18:31   ` Stephen Hemminger
  0 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-24 18:31 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

AI-generated review of bundle-1687-nbl-mbox.mbox
Reviewed using Claude (claude-opus-4-5-20251101)

This is an automated review. Please verify all suggestions.

---

# DPDK Patch Review: net/nbl Mailbox and Queue Fixes

## Overview
This patch series contains 4 patches for the nbl network driver, addressing memory leaks, mbuf handling issues, and mailbox reliability improvements.

---

## Patch 1/4: net/nbl: fix memzone leak in queue release

### Commit Message
**Info**: Subject line is 48 characters - good length.
**Pass**: Proper `Fixes:` tag format with 12-character SHA.
**Pass**: `Cc: stable@dpdk.org` correctly placed after Fixes tag.
**Pass**: Signed-off-by present.
**Pass**: Tag order is correct.

### Code Review

**Warning: Unnecessary NULL checks before rte_free()**
```c
if (tx_ring->tx_entry)
    rte_free(tx_ring->tx_entry);
if (tx_ring->net_hdr_mz)
    rte_memzone_free(tx_ring->net_hdr_mz);
if (tx_ring->desc_mz)
    rte_memzone_free(tx_ring->desc_mz);
```
Per AGENTS.md, `rte_free()` and `rte_memzone_free()` handle NULL pointers safely. These NULL checks are unnecessary.

**Warning: Missing blank line after variable declarations**
```c
struct nbl_res_rx_ring *rx_ring =
        NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
if (!rx_ring)
    return;
```
Should have a blank line between the declaration and the `if` statement.

**Info**: Good fix - storing `desc_mz` pointer for proper cleanup is the right approach.

---

## Patch 2/4: net/nbl: optimize mbuf headroom usage in packet transmission

### Commit Message
**Error**: Subject line is 65 characters - exceeds 60 character limit.
Suggested: `net/nbl: optimize mbuf headroom usage in Tx`

**Warning**: The word "transmission" should be abbreviated as "Tx" per DPDK conventions (words-case.txt).

**Pass**: Proper `Fixes:` tag and `Cc: stable@dpdk.org`.
**Pass**: Signed-off-by present.

### Code Review

**Pass**: The logic to check `rte_mbuf_refcnt_read()`, `RTE_MBUF_DIRECT()`, and `nb_segs` before using headroom is correct for avoiding corruption of shared mbufs.

**Info**: Good defensive programming to ensure exclusive ownership before modifying the mbuf headroom.

---

## Patch 3/4: net/nbl: fix mbuf double-free in queue cleanup

### Commit Message
**Pass**: Subject line is 47 characters - good.
**Pass**: Proper `Fixes:` tag and `Cc: stable@dpdk.org`.
**Pass**: Signed-off-by present.

### Code Review

**Warning: Missing blank line after variable declarations**
```c
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
struct nbl_tx_entry *tx_entry;
u16 i, free_cnt, free_mbuf_cnt;

if (!tx_ring)
```
There should be a blank line between declarations and the first statement.

**Warning: Implicit boolean comparison**
```c
if (tx_entry->mbuf) {
```
Should be:
```c
if (tx_entry->mbuf != NULL) {
```

Same issue in rx_ring cleanup:
```c
if (rx_buf->mbuf) {
```

**Info**: The logic change to only free mbufs in the range `[next_to_clean, next_to_use)` is correct for avoiding double-free of application-owned buffers.

**Info**: Moving `nbl_clear_queues()` after `nbl_dev_txrx_stop()` in `nbl_dev_port_stop()` ensures proper ordering.

---

## Patch 4/4: net/nbl: improve exception handling for the mailbox

### Commit Message
**Pass**: Subject line is 53 characters - good.
**Pass**: Proper `Fixes:` tag and `Cc: stable@dpdk.org`.
**Pass**: Signed-off-by present.

### Code Review

**Warning: Missing blank line after variable declarations**
Multiple instances throughout the code:
```c
struct nbl_chan_waitqueue_head *wait_head;
int status = NBL_MBX_STATUS_WAITING;
if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
```

**Warning: Variable initialization may hide bugs**
```c
int status = NBL_MBX_STATUS_WAITING;
```
This is used with `rte_atomic_compare_exchange_strong_explicit()`, which requires the expected value to be passed by reference. In this specific case, the initialization is intentional and required. However, consider adding a comment to clarify this is not a default value but the expected state for the compare-exchange operation.

**Error: Line exceeds 100 characters**
```c
		if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
								 NBL_MBX_STATUS_ACKING,
```
The continuation lines appear properly indented, but verify the first line stays within 100 characters.

**Warning: Log message formatting**
```c
NBL_LOG(INFO, "payload_len do not match ack_len!,"
```
Grammar: "do not" should be "does not" (payload_len is singular).

**Info**: The state machine approach (IDLE, WAITING, ACKING, ACKED, TIMEOUT) is a good improvement over the simple boolean flag for mailbox reliability.

**Info**: Good addition of message ID and type validation before processing ACKs.

**Info**: Using `rte_atomic_compare_exchange_strong_explicit()` for state transitions is correct for thread-safe mailbox handling.

---

## General Issues Across All Patches

### Type Usage
**Warning**: The patches use `u16`, `u64` types which appear to be driver-specific typedefs. This is acceptable in driver code, but ensure consistency with existing driver conventions.

### Documentation
**Info**: Consider updating release notes if these fixes address user-visible issues, as they are bug fixes being backported to stable.

---

## Summary

| Patch | Errors | Warnings | Info |
|-------|--------|----------|------|
| 1/4 | 0 | 2 | 1 |
| 2/4 | 1 | 1 | 1 |
| 3/4 | 0 | 3 | 2 |
| 4/4 | 1 | 4 | 3 |

### Must Fix (Errors)
1. **Patch 2/4**: Subject line exceeds 60 characters
2. **Patch 4/4**: Verify line length compliance (100 char limit)

### Should Fix (Warnings)
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
2. Add blank lines between variable declarations and statements
3. Use explicit NULL comparisons (`!= NULL`) instead of implicit boolean tests
4. Fix grammar in log message ("does not" instead of "do not")
5. Use "Tx" instead of "transmission" in commit subject

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                   ` (4 preceding siblings ...)
  2026-01-23  4:36 ` [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
@ 2026-01-24 19:08 ` Stephen Hemminger
  2026-01-26  1:37   ` 回复:[PATCH " Dimon
  2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
  7 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-24 19:08 UTC (permalink / raw)
  To: Dimon Zhao; +Cc: dev

On Thu, 22 Jan 2026 19:16:23 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:

> Includes fixes for mbuf/memzone leaks and mailbox state handling.
> 
> Dimon Zhao (4):
>   net/nbl: fix memzone leak in queue release
>   net/nbl: optimize mbuf headroom usage in packet transmission
>   net/nbl: fix mbuf double-free in queue cleanup
>   net/nbl: improve exception handling for the mailbox
> 
>  drivers/net/nbl/nbl_dev/nbl_dev.c     |   2 +-
>  drivers/net/nbl/nbl_hw/nbl_channel.c  |  94 ++++++++++++++++----
>  drivers/net/nbl/nbl_hw/nbl_channel.h  |  11 ++-
>  drivers/net/nbl/nbl_hw/nbl_resource.h |   2 +
>  drivers/net/nbl/nbl_hw/nbl_txrx.c     | 122 +++++++++++++++++++-------
>  drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |   2 +-
>  6 files changed, 179 insertions(+), 54 deletions(-)
> 

Some of the automated review comments are valid, some are not.
Please resubmit, or I can fix during merge

Fix:
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
4. Fix grammar in log message ("does not" instead of "do not")

Ok as is, but would be good to change:
2. Add blank lines between variable declarations and statements
5. Use "Tx" instead of "transmission" in commit subject

Ignore:
3. Use explicit NULL comparisons (`!= NULL`) instead of implicit boolean tests

^ permalink raw reply	[flat|nested] 24+ messages in thread

* 回复:[PATCH v1 0/4] improve NBL memory safety and mailbox reliability
  2026-01-24 19:08 ` Stephen Hemminger
@ 2026-01-26  1:37   ` Dimon
  0 siblings, 0 replies; 24+ messages in thread
From: Dimon @ 2026-01-26  1:37 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

[-- Attachment #1: Type: text/plain, Size: 1672 bytes --]

Hi Stephen,
 Thanks for the review. I'll resubmit the patch shortly.
Thank you.
------------------------------------------------------------------
发件人:Stephen Hemminger <stephen@networkplumber.org>
发送时间:2026年1月25日(周日) 03:08
收件人:Dimon<dimon.zhao@nebula-matrix.com>
抄 送:dev<dev@dpdk.org>
主 题:Re: [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
On Thu, 22 Jan 2026 19:16:23 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
> 
> Dimon Zhao (4):
> net/nbl: fix memzone leak in queue release
> net/nbl: optimize mbuf headroom usage in packet transmission
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve exception handling for the mailbox
> 
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 122 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 179 insertions(+), 54 deletions(-)
> 
Some of the automated review comments are valid, some are not.
Please resubmit, or I can fix during merge
Fix:
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
4. Fix grammar in log message ("does not" instead of "do not")
Ok as is, but would be good to change:
2. Add blank lines between variable declarations and statements
5. Use "Tx" instead of "transmission" in commit subject
Ignore:
3. Use explicit NULL comparisons (`!= NULL`) instead of implicit boolean tests

[-- Attachment #2: Type: text/html, Size: 11029 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 0/4] improve NBL memory safety and mailbox reliability
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                   ` (5 preceding siblings ...)
  2026-01-24 19:08 ` Stephen Hemminger
@ 2026-01-26  7:58 ` Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
                     ` (3 more replies)
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
  7 siblings, 4 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26  7:58 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao

Includes fixes for mbuf/memzone leaks and mailbox state handling.

Dimon Zhao (4):
  net/nbl: fix memzone leak in queue release
  net/nbl: optimize mbuf headroom usage in packet Tx
  net/nbl: fix mbuf double-free in queue cleanup
  net/nbl: improve exception handling for the mailbox

 drivers/net/nbl/nbl_dev/nbl_dev.c     |   2 +-
 drivers/net/nbl/nbl_hw/nbl_channel.c  |  94 +++++++++++++++++----
 drivers/net/nbl/nbl_hw/nbl_channel.h  |  11 ++-
 drivers/net/nbl/nbl_hw/nbl_resource.h |   2 +
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 115 ++++++++++++++++++--------
 drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |   2 +-
 6 files changed, 171 insertions(+), 55 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 1/4] net/nbl: fix memzone leak in queue release
  2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
@ 2026-01-26  7:58   ` Dimon Zhao
  2026-01-27  1:02     ` Stephen Hemminger
  2026-01-26  7:58   ` [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx Dimon Zhao
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26  7:58 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Add memzone free calls in tx_queue_release() and rx_queue_release()
to fix memory leaks. The memzones allocated in tx_queue_setup() and
rx_queue_setup() were not being freed when queues were released.

Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_resource.h |  2 ++
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 37 ++++++++++++++-------------
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index 1f6515f64d..469c3f5827 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -137,6 +137,7 @@ struct nbl_res_tx_ring {
 	volatile struct nbl_packed_desc *desc;
 	struct nbl_tx_entry *tx_entry;
 	const struct rte_memzone *net_hdr_mz;
+	const struct rte_memzone *desc_mz;
 	volatile uint8_t *notify;
 	const struct rte_eth_dev *eth_dev;
 	struct nbl_common_info *common;
@@ -178,6 +179,7 @@ struct nbl_res_tx_ring {
 struct nbl_res_rx_ring {
 	volatile struct nbl_packed_desc *desc;
 	struct nbl_rx_entry *rx_entry;
+	const struct rte_memzone *desc_mz;
 	struct rte_mempool *mempool;
 	volatile uint8_t *notify;
 	const struct rte_eth_dev *eth_dev;
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 650790b4fc..563f011cd3 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -69,9 +69,10 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
 	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
-	if (!tx_ring)
-		return;
+
 	rte_free(tx_ring->tx_entry);
+	rte_memzone_free(tx_ring->net_hdr_mz);
+	rte_memzone_free(tx_ring->desc_mz);
 	rte_free(tx_ring);
 	txrx_mgt->tx_rings[queue_idx] = NULL;
 }
@@ -104,7 +105,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 
 	if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
 		NBL_LOG(WARNING, "re-setup an already allocated tx queue");
-		nbl_res_txrx_stop_tx_ring(priv, param->queue_idx);
+		nbl_res_txrx_release_tx_ring(priv, param->queue_idx);
 		eth_dev->data->tx_queues[param->queue_idx] = NULL;
 	}
 
@@ -173,6 +174,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 	tx_ring->next_to_use = 0;
 	tx_ring->desc = (struct nbl_packed_desc *)memzone->addr;
 	tx_ring->net_hdr_mz = net_hdr_mz;
+	tx_ring->desc_mz = memzone;
 	tx_ring->eth_dev = eth_dev;
 	tx_ring->dma_set_msb = common->dma_set_msb;
 	tx_ring->dma_limit_msb = common->dma_limit_msb;
@@ -226,6 +228,18 @@ static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
 	rx_ring->next_to_use = 0;
 }
 
+static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
+{
+	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
+	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
+	struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+
+	rte_free(rx_ring->rx_entry);
+	rte_memzone_free(rx_ring->desc_mz);
+	rte_free(rx_ring);
+	txrx_mgt->rx_rings[queue_idx] = NULL;
+}
+
 static int nbl_res_txrx_start_rx_ring(void *priv,
 				      struct nbl_start_rx_ring_param *param,
 				      u64 *dma_addr)
@@ -244,7 +258,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
 
 	if (eth_dev->data->rx_queues[param->queue_idx] != NULL) {
 		NBL_LOG(WARNING, "re-setup an already allocated rx queue");
-		nbl_res_txrx_stop_rx_ring(priv, param->queue_idx);
+		nbl_res_txrx_release_rx_ring(priv, param->queue_idx);
 		eth_dev->data->rx_queues[param->queue_idx] = NULL;
 	}
 
@@ -275,6 +289,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
 
 	rx_ring->product = param->product;
 	rx_ring->mempool = param->mempool;
+	rx_ring->desc_mz = memzone;
 	rx_ring->nb_desc = param->nb_desc;
 	rx_ring->queue_id = param->queue_idx;
 	rx_ring->notify_qid =
@@ -376,20 +391,6 @@ static int nbl_res_alloc_rx_bufs(void *priv, u16 queue_idx)
 	return 0;
 }
 
-static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
-{
-	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
-	struct nbl_res_rx_ring *rx_ring =
-			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
-	if (!rx_ring)
-		return;
-
-	rte_free(rx_ring->rx_entry);
-	rte_free(rx_ring);
-	txrx_mgt->rx_rings[queue_idx] = NULL;
-}
-
 static void nbl_res_txrx_update_rx_ring(void *priv, u16 index)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx
  2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
@ 2026-01-26  7:58   ` Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
  3 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26  7:58 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Only use mbuf headroom when mbuf->refcnt == 1,
indicating exclusive ownership. For shared mbufs (refcnt > 1),
use next desc instead to avoid data corruption.

This prevents modifying shared buffers that may be used by other
processing contexts.

Implementation details:
- Check mbuf->refcnt before using headroom
- For refcnt == 1: use existing headroom (performance optimal)
- For refcnt > 1: use next desc

Fixes: e5fc1f78c78c ("net/nbl: support Tx and Rx burst")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_txrx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 563f011cd3..2a6acdcac1 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -505,7 +505,10 @@ nbl_res_txrx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts, u
 			tx_extend_len = required_headroom + sizeof(struct rte_ether_hdr);
 		}
 
-		if (rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
+		if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
+		    RTE_MBUF_DIRECT(tx_pkt) &&
+		    tx_pkt->nb_segs == 1 &&
+		    rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
 			can_push = 1;
 			u = rte_pktmbuf_mtod_offset(tx_pkt, union nbl_tx_extend_head *,
 						    -required_headroom);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup
  2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx Dimon Zhao
@ 2026-01-26  7:58   ` Dimon Zhao
  2026-01-26  7:58   ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
  3 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26  7:58 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Only free hardware-owned mbufs to avoid double-free
corruption of application-owned buffers.

TX ring: free only transmitted mbufs (next_to_clean to next_to_use).
RX ring: free only allocated receive buffers
	 (next_to_clean to next_to_use),
	 mbufs already delivered to the application (before next_to_clean)
	 must not be touched.

Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_dev/nbl_dev.c     |  2 +-
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 73 ++++++++++++++++++++++-----
 drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |  2 +-
 3 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/drivers/net/nbl/nbl_dev/nbl_dev.c b/drivers/net/nbl/nbl_dev/nbl_dev.c
index edff61e52e..2b0413fb7c 100644
--- a/drivers/net/nbl/nbl_dev/nbl_dev.c
+++ b/drivers/net/nbl/nbl_dev/nbl_dev.c
@@ -328,8 +328,8 @@ int nbl_dev_port_stop(struct rte_eth_dev *eth_dev)
 	rte_delay_ms(NBL_SAFE_THREADS_WAIT_TIME);
 
 	nbl_dev_hw_stats_stop(eth_dev);
-	nbl_clear_queues(eth_dev);
 	nbl_dev_txrx_stop(eth_dev);
+	nbl_clear_queues(eth_dev);
 	nbl_userdev_port_config(adapter, NBL_KERNEL_NETWORK);
 	return 0;
 }
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 2a6acdcac1..3fa0b22e42 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -44,19 +44,43 @@ static void nbl_res_txrx_stop_tx_ring(void *priv, u16 queue_idx)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
-	int i;
+	struct nbl_tx_entry *tx_entry;
+	u16 i, free_cnt, free_mbuf_cnt;
 
 	if (!tx_ring)
 		return;
 
-	for (i = 0; i < tx_ring->nb_desc; i++) {
-		if (tx_ring->tx_entry[i].mbuf != NULL) {
-			rte_pktmbuf_free_seg(tx_ring->tx_entry[i].mbuf);
-			memset(&tx_ring->tx_entry[i], 0, sizeof(*tx_ring->tx_entry));
+	i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
+	free_cnt = tx_ring->vq_free_cnt;
+	tx_entry = &tx_ring->tx_entry[i];
+	free_mbuf_cnt = 0;
+
+	while (free_cnt < tx_ring->nb_desc) {
+		if (tx_entry->mbuf) {
+			free_mbuf_cnt++;
+			rte_pktmbuf_free_seg(tx_entry->mbuf);
+		}
+
+		i++;
+		tx_entry++;
+		free_cnt++;
+		if (i == tx_ring->nb_desc) {
+			i = 0;
+			tx_entry = &tx_ring->tx_entry[i];
 		}
+	}
+
+	NBL_LOG(DEBUG, "stop tx ring ntc %u, ntu %u, vq_free_cnt %u, mbuf free_cnt %u",
+		tx_ring->next_to_clean, tx_ring->next_to_use, tx_ring->vq_free_cnt, free_mbuf_cnt);
+
+	for (i = 0; i < tx_ring->nb_desc; i++) {
+		tx_ring->desc[i].addr = 0;
+		tx_ring->desc[i].len = 0;
+		tx_ring->desc[i].id = 0;
 		tx_ring->desc[i].flags = 0;
 	}
 
+	memset(tx_ring->tx_entry, 0, sizeof(*tx_entry) * tx_ring->nb_desc);
 	tx_ring->avail_used_flags = NBL_PACKED_DESC_F_AVAIL_BIT;
 	tx_ring->used_wrap_counter = 1;
 	tx_ring->next_to_clean = NBL_TX_RS_THRESH - 1;
@@ -205,25 +229,46 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-	struct nbl_res_rx_ring *rx_ring =
-			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
-	u16 i;
+	struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+	struct nbl_rx_entry *rx_buf;
+	u16 i, free_cnt, free_mbuf_cnt;
 
 	if (!rx_ring)
 		return;
+
+	i = rx_ring->next_to_clean;
+	free_cnt = rx_ring->vq_free_cnt;
+	free_mbuf_cnt = 0;
+
 	if (rx_ring->rx_entry != NULL) {
-		for (i = 0; i < rx_ring->nb_desc; i++) {
-			if (rx_ring->rx_entry[i].mbuf != NULL) {
-				rte_pktmbuf_free_seg(rx_ring->rx_entry[i].mbuf);
-				rx_ring->rx_entry[i].mbuf = NULL;
+		rx_buf = &rx_ring->rx_entry[i];
+		while (free_cnt < rx_ring->nb_desc) {
+			if (rx_buf->mbuf) {
+				free_mbuf_cnt++;
+				rte_pktmbuf_free_seg(rx_buf->mbuf);
+			}
+
+			i++;
+			rx_buf++;
+			free_cnt++;
+			if (i == rx_ring->nb_desc) {
+				i = 0;
+				rx_buf = &rx_ring->rx_entry[i];
 			}
-			rx_ring->desc[i].flags = 0;
 		}
 
-		for (i = rx_ring->nb_desc; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++)
+		memset(rx_ring->rx_entry, 0, sizeof(struct nbl_rx_entry) * rx_ring->nb_desc);
+
+		for (i = 0; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++) {
+			rx_ring->desc[i].addr = 0;
+			rx_ring->desc[i].len = 0;
+			rx_ring->desc[i].id = 0;
 			rx_ring->desc[i].flags = 0;
+		}
 	}
 
+	NBL_LOG(DEBUG, "stop rx ring ntc %u, ntu %u, vq_free_cnt %u, free_mbuf_cnt %u",
+		rx_ring->next_to_clean, rx_ring->next_to_use, rx_ring->vq_free_cnt, free_mbuf_cnt);
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 }
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
index 2ab4b09683..dc0bab2b7b 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
@@ -43,7 +43,7 @@ nbl_tx_free_bufs(struct nbl_res_tx_ring *txq)
 	if (!desc_is_used(&txq->desc[next_to_clean], txq->used_wrap_counter))
 		return 0;
 
-	n = 32;
+	n = NBL_TX_RS_THRESH;
 
 	 /* first buffer to free from S/W ring is at index
 	  * tx_next_dd - (tx_rs_thresh-1)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox
  2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
                     ` (2 preceding siblings ...)
  2026-01-26  7:58   ` [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
@ 2026-01-26  7:58   ` Dimon Zhao
  2026-01-26 18:18     ` [REVIEW] " Stephen Hemminger
  3 siblings, 1 reply; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26  7:58 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
   reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
   prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
   prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
   is found to be in use or waiting for an ACK during Tx,
   the message will not be sent.

Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
 drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
 2 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..52ad71c833 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -22,6 +22,7 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
 {
 	struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
 	size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+	int i;
 
 	txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
 	if (!txq->desc) {
@@ -36,6 +37,10 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
 		goto req_wait_queue_failed;
 	}
 
+	for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+		rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+					  rte_memory_order_relaxed);
+
 	size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
 	txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
 	if (!txq->buf) {
@@ -253,24 +258,55 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
 	uint32_t ack_msgid;
 	uint32_t ack_msgtype;
 	uint32_t copy_len;
+	int status = NBL_MBX_STATUS_WAITING;
 
 	chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
 	ack_msgtype = *payload;
 	ack_msgid = *(payload + 1);
+
+	if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+		NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+		return;
+	}
+
 	wait_head = &chan_info->mailbox.wait[ack_msgid];
+	if (wait_head->msg_type != ack_msgtype) {
+		NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+			ack_msgid, ack_msgtype, wait_head->msg_type);
+		return;
+	}
+
+	if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+							 NBL_MBX_STATUS_ACKING,
+							 rte_memory_order_acq_rel,
+							 rte_memory_order_relaxed)) {
+		NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+			wait_head->status, ack_msgid, ack_msgtype);
+		return;
+	}
+
 	wait_head->ack_err = *(payload + 2);
 
 	NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
-		" data_len:%u, ack_data_len:%u",
-		srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+		" data_len:%u, ack_data_len:%u, ack_err:%d",
+		srcid, ack_msgtype, msgid, ack_msgid, data_len,
+		wait_head->ack_data_len, wait_head->ack_err);
 
 	if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
 		/* the mailbox msg parameter structure may change */
+		if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+			NBL_LOG(INFO, "payload_len does not match ack_len!,"
+				" srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+				" data_len:%u, ack_data_len:%u",
+				srcid, ack_msgtype, msgid,
+				ack_msgid, data_len, wait_head->ack_data_len);
 		copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
-		memcpy(wait_head->ack_data, payload + 3, copy_len);
+		if (copy_len)
+			memcpy(wait_head->ack_data, payload + 3, copy_len);
 	}
 
-	rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+	rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+				  rte_memory_order_release);
 }
 
 static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
@@ -371,12 +407,27 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
 {
 	struct nbl_chan_ring *txq;
 	struct nbl_chan_tx_desc *tx_desc;
+	struct nbl_chan_waitqueue_head *wait_head;
 	uint64_t pa;
 	void *va;
 	uint16_t next_to_use;
+	int status;
+
+	if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+		NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+		return 0xFFFF;
+	}
 
 	txq = &chan_info->mailbox.txq;
 	next_to_use = txq->next_to_use;
+
+	wait_head = &chan_info->mailbox.wait[next_to_use];
+	status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+	if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+		NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+		return 0xFFFF;
+	}
+
 	va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
 	pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
 	tx_desc = NBL_CHAN_TX_DESC(txq, next_to_use);
@@ -384,10 +435,6 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
 	tx_desc->dstid = dstid;
 	tx_desc->msg_type = msg_type;
 	tx_desc->msgid = next_to_use;
-	if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
-		NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
-		return -1;
-	}
 
 	if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
 		memcpy(va, arg, arg_len);
@@ -418,6 +465,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 	uint16_t msgid;
 	int ret;
 	int retry_time = 0;
+	int status = NBL_MBX_STATUS_WAITING;
 
 	if (chan_mgt->state)
 		return -EIO;
@@ -431,8 +479,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 
 	if (msgid == 0xFFFF) {
 		rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
-		NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
-			" to dstid:%u failed",
+		NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
 			chan_send->msg_type, chan_send->dstid);
 		return -ECOMM;
 	}
@@ -446,23 +493,34 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 	wait_head = &chan_info->mailbox.wait[msgid];
 	wait_head->ack_data = chan_send->resp;
 	wait_head->ack_data_len = chan_send->resp_len;
-	rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
 	wait_head->msg_type = chan_send->msg_type;
-	rte_wmb();
+	rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+				  rte_memory_order_release);
 	nbl_chan_kick_tx_ring(chan_mgt, chan_info);
 	rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
 
 	while (1) {
-		if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
-			return wait_head->ack_err;
+		if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+		    NBL_MBX_STATUS_ACKED) {
+			ret = wait_head->ack_err;
+			rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+						  rte_memory_order_release);
+			return ret;
+		}
 
 		rte_delay_us(50);
 		retry_time++;
-		if (retry_time > NBL_CHAN_RETRY_TIMES)
-			return -EIO;
+		if (retry_time > NBL_CHAN_RETRY_TIMES &&
+		    rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+								&status, NBL_MBX_STATUS_TIMEOUT,
+								rte_memory_order_acq_rel,
+								rte_memory_order_relaxed)) {
+			NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+				chan_send->msg_type, msgid, chan_send->dstid);
+			break;
+		}
 	}
-
-	return 0;
+	return -EIO;
 }
 
 static int nbl_chan_send_ack(void *priv, struct nbl_chan_ack_info *chan_ack)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -41,6 +41,14 @@ enum {
 	NBL_MB_TX_QID = 1,
 };
 
+enum {
+	NBL_MBX_STATUS_IDLE = 0,
+	NBL_MBX_STATUS_WAITING,
+	NBL_MBX_STATUS_ACKING,
+	NBL_MBX_STATUS_ACKED,
+	NBL_MBX_STATUS_TIMEOUT,
+};
+
 struct __rte_packed_begin nbl_chan_tx_desc {
 	uint16_t flags;
 	uint16_t srcid;
@@ -74,8 +82,7 @@ struct nbl_chan_ring {
 
 struct nbl_chan_waitqueue_head {
 	char *ack_data;
-	RTE_ATOMIC(int)
-		acked;
+	RTE_ATOMIC(int) status;
 	int ack_err;
 	uint16_t ack_data_len;
 	uint16_t msg_type;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [REVIEW] net/nbl: improve exception handling for the mailbox
  2026-01-26  7:58   ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
@ 2026-01-26 18:18     ` Stephen Hemminger
  2026-01-26 18:28       ` Stephen Hemminger
  0 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-26 18:18 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

AI-generated review of bundle-1692-nbl-v2.mbox
Reviewed using Claude (claude-sonnet-4-5-20250929)

This is an automated review. Please verify all suggestions.

---

# DPDK Patch Review: bundle-1692-nbl-v2.mbox

## PATCH 1/4: net/nbl: fix memzone leak in queue release

### Commit Message
**Error**: Subject line exceeds 60 characters (61 characters: "net/nbl: fix memzone leak in queue release")
- Current: 61 characters
- Maximum: 60 characters
- Suggested: "net/nbl: fix memzone leak on queue release"

**Warning**: Body does not describe the issue being fixed
- The body jumps directly to the solution without explaining why memzones were leaking
- Should explain: "Memzones allocated during queue setup were not freed during queue release, causing memory leaks when queues are reconfigured or device is stopped."

**Info**: Good use of Fixes tag with 12-char SHA and Cc: stable

### Code Review
**Warning**: Missing NULL checks before rte_memzone_free()
```c
rte_free(tx_ring->tx_entry);
rte_memzone_free(tx_ring->net_hdr_mz);  // Could be NULL if setup failed
rte_memzone_free(tx_ring->desc_mz);     // Could be NULL if setup failed
```
While rte_memzone_free() handles NULL, this is not documented behavior. Better to check explicitly or document assumption.

**Warning**: Logic change mixed with fix
- The patch changes from calling `nbl_res_txrx_stop_tx_ring()` to `nbl_res_txrx_release_tx_ring()` in queue setup path
- This is a separate behavioral change that should be in its own patch or explained in commit message

**Info**: Function reordering (nbl_res_txrx_release_rx_ring moved) makes diff harder to review
- Consider pure addition without moving existing code

---

## PATCH 2/4: net/nbl: optimize mbuf headroom usage in packet Tx

### Commit Message
**Error**: Subject line exceeds 60 characters (62 characters)
- Current: "net/nbl: optimize mbuf headroom usage in packet Tx"
- Suggested: "net/nbl: fix shared mbuf handling in Tx"

**Warning**: Subject line misleading - this is a correctness fix, not optimization
- "optimize" suggests performance improvement, but this fixes data corruption
- Should use "fix" as this prevents writing to shared buffers

**Warning**: Body structure issue
- "Implementation details:" section is too verbose for commit message
- Should focus on what problem is fixed and why

**Info**: Good Fixes tag and stable CC

### Code Review
**Warning**: Unnecessary defensive checks
```c
if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
    RTE_MBUF_DIRECT(tx_pkt) &&           // Unnecessary
    tx_pkt->nb_segs == 1 &&              // Unnecessary
    rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
```
- `RTE_MBUF_DIRECT(tx_pkt)`: Already checked by headroom availability (indirect mbufs have no headroom)
- `nb_segs == 1`: Multi-segment handling should be checked elsewhere; this conflates concerns

**Error**: Incomplete fix - missing boundary check
- The code modifies `tx_pkt` at offset `-required_headroom` but doesn't verify the mbuf wasn't cloned with a non-zero data offset
- Need: `tx_pkt->data_off >= required_headroom`

**Suggested fix**:
```c
if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
    rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
```

---

## PATCH 3/4: net/nbl: fix mbuf double-free in queue cleanup

### Commit Message
**Error**: Subject line exceeds 60 characters (61 characters)
- Suggested: "net/nbl: fix double-free in queue cleanup"

**Warning**: Body formatting issues
- Inconsistent indentation in the description
- "TX ring:" and "RX ring:" descriptions use tabs instead of standard formatting

**Info**: Good explanation of the fix scope

### Code Review
**Error**: Code order violation in nbl_dev.c
```c
nbl_dev_hw_stats_stop(eth_dev);
nbl_clear_queues(eth_dev);  // Moved after txrx_stop
nbl_dev_txrx_stop(eth_dev);
```
This changes the order of operations but the commit message doesn't explain why this reordering is necessary. This should be in a separate patch or thoroughly explained.

**Warning**: Loop iteration complexity
```c
i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
```
This arithmetic is fragile and could wrap around. Needs comment explaining why `+1 - NBL_TX_RS_THRESH`.

**Warning**: Unnecessary variable initialization
```c
u16 i, free_cnt, free_mbuf_cnt;
...
free_mbuf_cnt = 0;  // Only used for logging
```
`free_mbuf_cnt` is only for debug logging; consider removing if not essential.

**Error**: Missing wrap-around handling documentation
The loops handle ring wrap-around but don't document what happens at boundary:
```c
if (i == tx_ring->nb_desc) {
    i = 0;
    tx_entry = &tx_ring->tx_entry[i];
}
```

**Info**: Good use of NBL_LOG(DEBUG) for diagnostics

---

## PATCH 4/4: net/nbl: improve exception handling for the mailbox

### Commit Message
**Error**: Subject line exceeds 60 characters (64 characters)
- Suggested: "net/nbl: improve mailbox exception handling"

**Warning**: Body uses numbered list format which is unusual for DPDK
- Should use prose paragraphs instead of bullet points
- Current style is inconsistent with DPDK conventions

**Warning**: Too much implementation detail in commit message
- "New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT" - this is code-level detail
- Focus on what problems are solved, not how

### Code Review
**Warning**: Missing atomic operation explanation
```c
if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
                         NBL_MBX_STATUS_ACKING,
                         rte_memory_order_acq_rel,
                         rte_memory_order_relaxed)) {
```
This is a critical section but no comment explains the race being prevented.

**Error**: Inconsistent state transition handling
```c
status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
    NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
    return 0xFFFF;
}
```
Returns 0xFFFF on error but caller checks `msgid == 0xFFFF`. Should use a defined constant like `NBL_CHAN_INVALID_MSGID`.

**Warning**: Magic number for retry times
```c
if (retry_time > NBL_CHAN_RETRY_TIMES &&
```
`NBL_CHAN_RETRY_TIMES` is used but the timeout calculation (`50us * NBL_CHAN_RETRY_TIMES`) is not documented.

**Warning**: Log level inconsistency
```c
NBL_LOG(INFO, "payload_len does not match ack_len!,...)
```
Should this be WARNING or ERROR? A mismatch could indicate protocol violation.

**Error**: Race condition in timeout handling
```c
if (retry_time > NBL_CHAN_RETRY_TIMES &&
    rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
                        &status, NBL_MBX_STATUS_TIMEOUT,
```
Between checking `retry_time` and CAS, an ACK could arrive. Need to verify the window is acceptable or tighten the logic.

---

## General Issues Across All Patches

### License and Copyright
**Info**: No license headers visible in the patches, assuming they exist in original files

### Code Style
**Warning**: Inconsistent spacing in conditional expressions across patches
**Info**: General code style appears compliant with DPDK standards

### Testing
**Warning**: No mention of testing performed for these critical bug fixes
- These are data corruption and memory leak fixes that need validation
- Should document test scenarios in cover letter or commit messages

### Documentation
**Warning**: No updates to release notes for these bug fixes
- Significant fixes affecting data integrity should be in release notes
- At minimum, should update doc/guides/rel_notes/release_XX_XX.rst

---

## Summary

**Must Fix (Errors)**:
1. All subject lines exceed 60 characters - shorten them
2. Patch 2: Missing mbuf data_off boundary check (security issue)
3. Patch 3: Unexplained functional change (operation reordering)
4. Patch 4: Magic return value 0xFFFF needs named constant

**Should Fix (Warnings)**:
1. Patch 1: Add NULL checks or document assumptions for memzone pointers
2. Patch 2: Subject says "optimize" but fixes data corruption - use "fix"
3. Patch 2: Remove unnecessary defensive checks (RTE_MBUF_DIRECT, nb_segs)
4. Patch 3: Document wrap-around arithmetic logic
5. Patch 4: Document atomic operation races being prevented
6. All patches: Add testing information
7. All patches: Update release notes for fixes

**Consider (Info)**:
1. Patch 1: Avoid function reordering in diffs for easier review
2. Patch 3: Consider removing debug-only counters
3. Commit message formatting consistency across series

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [REVIEW] net/nbl: improve exception handling for the mailbox
  2026-01-26 18:18     ` [REVIEW] " Stephen Hemminger
@ 2026-01-26 18:28       ` Stephen Hemminger
  0 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-26 18:28 UTC (permalink / raw)
  To: dev

On Mon, 26 Jan 2026 10:18:38 -0800
Stephen Hemminger <stephen@networkplumber.org> wrote:

> ### Code Review
> **Warning**: Missing NULL checks before rte_memzone_free()
> ```c
> rte_free(tx_ring->tx_entry);
> rte_memzone_free(tx_ring->net_hdr_mz);  // Could be NULL if setup failed
> rte_memzone_free(tx_ring->desc_mz);     // Could be NULL if setup failed
> ```
> While rte_memzone_free() handles NULL, this is not documented behavior. Better to check explicitly or document assumption.

Ignore this feedback. It indicates missing comments in rte_memzone, not your problem

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/4] net/nbl: fix memzone leak in queue release
  2026-01-26  7:58   ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
@ 2026-01-27  1:02     ` Stephen Hemminger
  0 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-27  1:02 UTC (permalink / raw)
  To: Dimon Zhao; +Cc: dev, stable, Kyo Liu, Leon Yu, Sam Chen

On Sun, 25 Jan 2026 23:58:12 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:

> diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
> index 650790b4fc..563f011cd3 100644
> --- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
> +++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
> @@ -69,9 +69,10 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
>  	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
>  	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
>  	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
> -	if (!tx_ring)
> -		return;
> +

Automated patch review spotted this removal as possible problem.

Code Review:

ERROR: NULL pointer dereference introduced

The NULL check for tx_ring was removed but the code immediately dereferences it:
c

static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
{
	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);

	rte_free(tx_ring->tx_entry);  /* <-- crash if tx_ring is NULL */

The old code had:
c

	if (!tx_ring)
		return;

This guard must be restored, or the callers must guarantee tx_ring is never NULL.

Similarly for nbl_res_txrx_release_rx_ring — no NULL check before dereferencing rx_ring->rx_entry and rx_ring->desc_mz.



Further AI lead analysis show scenarios where it could be a problem.

Complete Call Chain Analysis for Patch 1/4
Call Flow

DPDK Application
    ↓
rte_eth_dev_tx_queue_release() or eth_dev_ops->tx_queue_release
    ↓
nbl_tx_queues_release() [nbl_dev.c:420]
    ↓
disp_ops->release_tx_ring() → nbl_disp_release_tx_ring() [nbl_dispatch.c:163]
    ↓
res_ops->release_tx_ring() → nbl_res_txrx_release_tx_ring() [nbl_txrx.c]

Where tx_ring Can Be NULL

The macro definition:
c

#define NBL_RES_MGT_TO_TX_RING(res_mgt, index)	\
    (NBL_RES_MGT_TO_TXRX_MGT(res_mgt)->tx_rings[(index)])

Where tx_rings is:
c

struct nbl_res_tx_ring **tx_rings;  // Array of pointers

Scenario 1: Normal close without all queues set up

In nbl_dev_port_close() → nbl_release_queues() (line 337-349):
c

for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
    disp_ops->release_tx_ring(NBL_DEV_MGT_TO_DISP_PRIV(dev_mgt), i);

If the application configured 4 queues but only called tx_queue_setup() on 2 of them, tx_rings[2] and tx_rings[3] will be NULL.

Scenario 2: DPDK ethdev layer calling release on never-setup queue

DPDK's rte_eth_dev_close() can call tx_queue_release for all configured queues without validating they were set up.

Scenario 3: Re-setup path mismatch

In the patched nbl_res_txrx_start_tx_ring():
c

if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
    NBL_LOG(WARNING, "re-setup an already allocated tx queue");
    nbl_res_txrx_release_tx_ring(priv, param->queue_idx);  // <-- Called

The check is on eth_dev->data->tx_queues[] (ethdev layer), but tx_rings[] (resource layer) could differ if a previous setup partially failed.
Verdict: Confirmed Bug in Patch 1/4

The removal of NULL checks causes NULL pointer dereference in both nbl_res_txrx_release_tx_ring() and nbl_res_txrx_release_rx_ring().
Required Fix
c

static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
{
    struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
    struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
    struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);

+   if (!tx_ring)
+       return;
+
    rte_free(tx_ring->tx_entry);
    rte_memzone_free(tx_ring->net_hdr_mz);
    rte_memzone_free(tx_ring->desc_mz);
    rte_free(tx_ring);
    txrx_mgt->tx_rings[queue_idx] = NULL;
}

static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
{
    struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
    struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
    struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);

+   if (!rx_ring)
+       return;
+
    rte_free(rx_ring->rx_entry);
    rte_memzone_free(rx_ring->desc_mz);
    rte_free(rx_ring);
    txrx_mgt->rx_rings[queue_idx] = NULL;
}

The memzone leak fix itself is correct and needed — but the defensive NULL checks must be preserved.

Conclusion put the null checks back into rx and tx and retest/resubmit new version.


Full chat is here if you are interested: https://claude.ai/share/d543bddd-7406-4f00-b5cb-4a92e8718476


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 0/4] improve NBL memory safety and mailbox reliability
  2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                   ` (6 preceding siblings ...)
  2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
@ 2026-01-27  2:52 ` Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
                     ` (4 more replies)
  7 siblings, 5 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27  2:52 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao

Includes fixes for mbuf/memzone leaks and mailbox state handling.

Dimon Zhao (4):
  net/nbl: fix memzone leak on queue release
  net/nbl: fix mbuf headroom usage in packet Tx
  net/nbl: fix mbuf double-free in queue cleanup
  net/nbl: improve mailbox exception handling

 drivers/net/nbl/nbl_dev/nbl_dev.c     |   2 +-
 drivers/net/nbl/nbl_hw/nbl_channel.c  |  94 +++++++++++++++++----
 drivers/net/nbl/nbl_hw/nbl_channel.h  |  11 ++-
 drivers/net/nbl/nbl_hw/nbl_resource.h |   2 +
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 117 +++++++++++++++++++-------
 drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |   2 +-
 6 files changed, 175 insertions(+), 53 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 1/4] net/nbl: fix memzone leak on queue release
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
@ 2026-01-27  2:52   ` Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx Dimon Zhao
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27  2:52 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Add memzone free calls in tx_queue_release() and rx_queue_release()
to fix memory leaks. The memzones allocated in tx_queue_setup() and
rx_queue_setup() were not being freed when queues were released.

Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_resource.h |  2 ++
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 39 ++++++++++++++++-----------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index 1f6515f64d..469c3f5827 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -137,6 +137,7 @@ struct nbl_res_tx_ring {
 	volatile struct nbl_packed_desc *desc;
 	struct nbl_tx_entry *tx_entry;
 	const struct rte_memzone *net_hdr_mz;
+	const struct rte_memzone *desc_mz;
 	volatile uint8_t *notify;
 	const struct rte_eth_dev *eth_dev;
 	struct nbl_common_info *common;
@@ -178,6 +179,7 @@ struct nbl_res_tx_ring {
 struct nbl_res_rx_ring {
 	volatile struct nbl_packed_desc *desc;
 	struct nbl_rx_entry *rx_entry;
+	const struct rte_memzone *desc_mz;
 	struct rte_mempool *mempool;
 	volatile uint8_t *notify;
 	const struct rte_eth_dev *eth_dev;
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 650790b4fc..9d92ab9992 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -69,9 +69,13 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
 	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
+
 	if (!tx_ring)
 		return;
+
 	rte_free(tx_ring->tx_entry);
+	rte_memzone_free(tx_ring->net_hdr_mz);
+	rte_memzone_free(tx_ring->desc_mz);
 	rte_free(tx_ring);
 	txrx_mgt->tx_rings[queue_idx] = NULL;
 }
@@ -104,7 +108,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 
 	if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
 		NBL_LOG(WARNING, "re-setup an already allocated tx queue");
-		nbl_res_txrx_stop_tx_ring(priv, param->queue_idx);
+		nbl_res_txrx_release_tx_ring(priv, param->queue_idx);
 		eth_dev->data->tx_queues[param->queue_idx] = NULL;
 	}
 
@@ -173,6 +177,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 	tx_ring->next_to_use = 0;
 	tx_ring->desc = (struct nbl_packed_desc *)memzone->addr;
 	tx_ring->net_hdr_mz = net_hdr_mz;
+	tx_ring->desc_mz = memzone;
 	tx_ring->eth_dev = eth_dev;
 	tx_ring->dma_set_msb = common->dma_set_msb;
 	tx_ring->dma_limit_msb = common->dma_limit_msb;
@@ -226,6 +231,21 @@ static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
 	rx_ring->next_to_use = 0;
 }
 
+static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
+{
+	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
+	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
+	struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+
+	if (!rx_ring)
+		return;
+
+	rte_free(rx_ring->rx_entry);
+	rte_memzone_free(rx_ring->desc_mz);
+	rte_free(rx_ring);
+	txrx_mgt->rx_rings[queue_idx] = NULL;
+}
+
 static int nbl_res_txrx_start_rx_ring(void *priv,
 				      struct nbl_start_rx_ring_param *param,
 				      u64 *dma_addr)
@@ -244,7 +264,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
 
 	if (eth_dev->data->rx_queues[param->queue_idx] != NULL) {
 		NBL_LOG(WARNING, "re-setup an already allocated rx queue");
-		nbl_res_txrx_stop_rx_ring(priv, param->queue_idx);
+		nbl_res_txrx_release_rx_ring(priv, param->queue_idx);
 		eth_dev->data->rx_queues[param->queue_idx] = NULL;
 	}
 
@@ -275,6 +295,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
 
 	rx_ring->product = param->product;
 	rx_ring->mempool = param->mempool;
+	rx_ring->desc_mz = memzone;
 	rx_ring->nb_desc = param->nb_desc;
 	rx_ring->queue_id = param->queue_idx;
 	rx_ring->notify_qid =
@@ -376,20 +397,6 @@ static int nbl_res_alloc_rx_bufs(void *priv, u16 queue_idx)
 	return 0;
 }
 
-static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
-{
-	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-	struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
-	struct nbl_res_rx_ring *rx_ring =
-			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
-	if (!rx_ring)
-		return;
-
-	rte_free(rx_ring->rx_entry);
-	rte_free(rx_ring);
-	txrx_mgt->rx_rings[queue_idx] = NULL;
-}
-
 static void nbl_res_txrx_update_rx_ring(void *priv, u16 index)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
@ 2026-01-27  2:52   ` Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27  2:52 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Only use mbuf headroom when mbuf->refcnt == 1,
indicating exclusive ownership. For shared mbufs (refcnt > 1),
use next desc instead to avoid data corruption.

This prevents modifying shared buffers that may be used by other
processing contexts.

Implementation details:
- Check mbuf->refcnt before using headroom
- For refcnt == 1: use existing headroom (performance optimal)
- For refcnt > 1: use next desc

Fixes: e5fc1f78c78c ("net/nbl: support Tx and Rx burst")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_txrx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 9d92ab9992..93d9f0f7de 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -511,7 +511,10 @@ nbl_res_txrx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts, u
 			tx_extend_len = required_headroom + sizeof(struct rte_ether_hdr);
 		}
 
-		if (rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
+		if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
+		    RTE_MBUF_DIRECT(tx_pkt) &&
+		    tx_pkt->nb_segs == 1 &&
+		    rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
 			can_push = 1;
 			u = rte_pktmbuf_mtod_offset(tx_pkt, union nbl_tx_extend_head *,
 						    -required_headroom);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx Dimon Zhao
@ 2026-01-27  2:52   ` Dimon Zhao
  2026-01-27  2:52   ` [PATCH v3 4/4] net/nbl: improve mailbox exception handling Dimon Zhao
  2026-01-27 15:10   ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
  4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27  2:52 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Only free hardware-owned mbufs to avoid double-free
corruption of application-owned buffers.

TX ring: free only transmitted mbufs (next_to_clean to next_to_use).
RX ring: free only allocated receive buffers
	 (next_to_clean to next_to_use),
	 mbufs already delivered to the application (before next_to_clean)
	 must not be touched.

Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_dev/nbl_dev.c     |  2 +-
 drivers/net/nbl/nbl_hw/nbl_txrx.c     | 73 ++++++++++++++++++++++-----
 drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |  2 +-
 3 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/drivers/net/nbl/nbl_dev/nbl_dev.c b/drivers/net/nbl/nbl_dev/nbl_dev.c
index edff61e52e..2b0413fb7c 100644
--- a/drivers/net/nbl/nbl_dev/nbl_dev.c
+++ b/drivers/net/nbl/nbl_dev/nbl_dev.c
@@ -328,8 +328,8 @@ int nbl_dev_port_stop(struct rte_eth_dev *eth_dev)
 	rte_delay_ms(NBL_SAFE_THREADS_WAIT_TIME);
 
 	nbl_dev_hw_stats_stop(eth_dev);
-	nbl_clear_queues(eth_dev);
 	nbl_dev_txrx_stop(eth_dev);
+	nbl_clear_queues(eth_dev);
 	nbl_userdev_port_config(adapter, NBL_KERNEL_NETWORK);
 	return 0;
 }
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 93d9f0f7de..48f20e4f4c 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -44,19 +44,43 @@ static void nbl_res_txrx_stop_tx_ring(void *priv, u16 queue_idx)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
 	struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
-	int i;
+	struct nbl_tx_entry *tx_entry;
+	u16 i, free_cnt, free_mbuf_cnt;
 
 	if (!tx_ring)
 		return;
 
-	for (i = 0; i < tx_ring->nb_desc; i++) {
-		if (tx_ring->tx_entry[i].mbuf != NULL) {
-			rte_pktmbuf_free_seg(tx_ring->tx_entry[i].mbuf);
-			memset(&tx_ring->tx_entry[i], 0, sizeof(*tx_ring->tx_entry));
+	i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
+	free_cnt = tx_ring->vq_free_cnt;
+	tx_entry = &tx_ring->tx_entry[i];
+	free_mbuf_cnt = 0;
+
+	while (free_cnt < tx_ring->nb_desc) {
+		if (tx_entry->mbuf) {
+			free_mbuf_cnt++;
+			rte_pktmbuf_free_seg(tx_entry->mbuf);
+		}
+
+		i++;
+		tx_entry++;
+		free_cnt++;
+		if (i == tx_ring->nb_desc) {
+			i = 0;
+			tx_entry = &tx_ring->tx_entry[i];
 		}
+	}
+
+	NBL_LOG(DEBUG, "stop tx ring ntc %u, ntu %u, vq_free_cnt %u, mbuf free_cnt %u",
+		tx_ring->next_to_clean, tx_ring->next_to_use, tx_ring->vq_free_cnt, free_mbuf_cnt);
+
+	for (i = 0; i < tx_ring->nb_desc; i++) {
+		tx_ring->desc[i].addr = 0;
+		tx_ring->desc[i].len = 0;
+		tx_ring->desc[i].id = 0;
 		tx_ring->desc[i].flags = 0;
 	}
 
+	memset(tx_ring->tx_entry, 0, sizeof(*tx_entry) * tx_ring->nb_desc);
 	tx_ring->avail_used_flags = NBL_PACKED_DESC_F_AVAIL_BIT;
 	tx_ring->used_wrap_counter = 1;
 	tx_ring->next_to_clean = NBL_TX_RS_THRESH - 1;
@@ -208,25 +232,46 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
 static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
 {
 	struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
-	struct nbl_res_rx_ring *rx_ring =
-			NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
-	u16 i;
+	struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+	struct nbl_rx_entry *rx_buf;
+	u16 i, free_cnt, free_mbuf_cnt;
 
 	if (!rx_ring)
 		return;
+
+	i = rx_ring->next_to_clean;
+	free_cnt = rx_ring->vq_free_cnt;
+	free_mbuf_cnt = 0;
+
 	if (rx_ring->rx_entry != NULL) {
-		for (i = 0; i < rx_ring->nb_desc; i++) {
-			if (rx_ring->rx_entry[i].mbuf != NULL) {
-				rte_pktmbuf_free_seg(rx_ring->rx_entry[i].mbuf);
-				rx_ring->rx_entry[i].mbuf = NULL;
+		rx_buf = &rx_ring->rx_entry[i];
+		while (free_cnt < rx_ring->nb_desc) {
+			if (rx_buf->mbuf) {
+				free_mbuf_cnt++;
+				rte_pktmbuf_free_seg(rx_buf->mbuf);
+			}
+
+			i++;
+			rx_buf++;
+			free_cnt++;
+			if (i == rx_ring->nb_desc) {
+				i = 0;
+				rx_buf = &rx_ring->rx_entry[i];
 			}
-			rx_ring->desc[i].flags = 0;
 		}
 
-		for (i = rx_ring->nb_desc; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++)
+		memset(rx_ring->rx_entry, 0, sizeof(struct nbl_rx_entry) * rx_ring->nb_desc);
+
+		for (i = 0; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++) {
+			rx_ring->desc[i].addr = 0;
+			rx_ring->desc[i].len = 0;
+			rx_ring->desc[i].id = 0;
 			rx_ring->desc[i].flags = 0;
+		}
 	}
 
+	NBL_LOG(DEBUG, "stop rx ring ntc %u, ntu %u, vq_free_cnt %u, free_mbuf_cnt %u",
+		rx_ring->next_to_clean, rx_ring->next_to_use, rx_ring->vq_free_cnt, free_mbuf_cnt);
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 }
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
index 2ab4b09683..dc0bab2b7b 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
@@ -43,7 +43,7 @@ nbl_tx_free_bufs(struct nbl_res_tx_ring *txq)
 	if (!desc_is_used(&txq->desc[next_to_clean], txq->used_wrap_counter))
 		return 0;
 
-	n = 32;
+	n = NBL_TX_RS_THRESH;
 
 	 /* first buffer to free from S/W ring is at index
 	  * tx_next_dd - (tx_rs_thresh-1)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 4/4] net/nbl: improve mailbox exception handling
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                     ` (2 preceding siblings ...)
  2026-01-27  2:52   ` [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
@ 2026-01-27  2:52   ` Dimon Zhao
  2026-01-27 15:10   ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
  4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27  2:52 UTC (permalink / raw)
  To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen

Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
   reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
   prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
   prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
   is found to be in use or waiting for an ACK during Tx,
   the message will not be sent.

Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Cc: stable@dpdk.org

Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
 drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
 drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
 2 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..52ad71c833 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -22,6 +22,7 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
 {
 	struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
 	size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+	int i;
 
 	txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
 	if (!txq->desc) {
@@ -36,6 +37,10 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
 		goto req_wait_queue_failed;
 	}
 
+	for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+		rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+					  rte_memory_order_relaxed);
+
 	size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
 	txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
 	if (!txq->buf) {
@@ -253,24 +258,55 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
 	uint32_t ack_msgid;
 	uint32_t ack_msgtype;
 	uint32_t copy_len;
+	int status = NBL_MBX_STATUS_WAITING;
 
 	chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
 	ack_msgtype = *payload;
 	ack_msgid = *(payload + 1);
+
+	if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+		NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+		return;
+	}
+
 	wait_head = &chan_info->mailbox.wait[ack_msgid];
+	if (wait_head->msg_type != ack_msgtype) {
+		NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+			ack_msgid, ack_msgtype, wait_head->msg_type);
+		return;
+	}
+
+	if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+							 NBL_MBX_STATUS_ACKING,
+							 rte_memory_order_acq_rel,
+							 rte_memory_order_relaxed)) {
+		NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+			wait_head->status, ack_msgid, ack_msgtype);
+		return;
+	}
+
 	wait_head->ack_err = *(payload + 2);
 
 	NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
-		" data_len:%u, ack_data_len:%u",
-		srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+		" data_len:%u, ack_data_len:%u, ack_err:%d",
+		srcid, ack_msgtype, msgid, ack_msgid, data_len,
+		wait_head->ack_data_len, wait_head->ack_err);
 
 	if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
 		/* the mailbox msg parameter structure may change */
+		if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+			NBL_LOG(INFO, "payload_len does not match ack_len!,"
+				" srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+				" data_len:%u, ack_data_len:%u",
+				srcid, ack_msgtype, msgid,
+				ack_msgid, data_len, wait_head->ack_data_len);
 		copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
-		memcpy(wait_head->ack_data, payload + 3, copy_len);
+		if (copy_len)
+			memcpy(wait_head->ack_data, payload + 3, copy_len);
 	}
 
-	rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+	rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+				  rte_memory_order_release);
 }
 
 static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
@@ -371,12 +407,27 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
 {
 	struct nbl_chan_ring *txq;
 	struct nbl_chan_tx_desc *tx_desc;
+	struct nbl_chan_waitqueue_head *wait_head;
 	uint64_t pa;
 	void *va;
 	uint16_t next_to_use;
+	int status;
+
+	if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+		NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+		return 0xFFFF;
+	}
 
 	txq = &chan_info->mailbox.txq;
 	next_to_use = txq->next_to_use;
+
+	wait_head = &chan_info->mailbox.wait[next_to_use];
+	status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+	if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+		NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+		return 0xFFFF;
+	}
+
 	va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
 	pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
 	tx_desc = NBL_CHAN_TX_DESC(txq, next_to_use);
@@ -384,10 +435,6 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
 	tx_desc->dstid = dstid;
 	tx_desc->msg_type = msg_type;
 	tx_desc->msgid = next_to_use;
-	if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
-		NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
-		return -1;
-	}
 
 	if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
 		memcpy(va, arg, arg_len);
@@ -418,6 +465,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 	uint16_t msgid;
 	int ret;
 	int retry_time = 0;
+	int status = NBL_MBX_STATUS_WAITING;
 
 	if (chan_mgt->state)
 		return -EIO;
@@ -431,8 +479,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 
 	if (msgid == 0xFFFF) {
 		rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
-		NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
-			" to dstid:%u failed",
+		NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
 			chan_send->msg_type, chan_send->dstid);
 		return -ECOMM;
 	}
@@ -446,23 +493,34 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
 	wait_head = &chan_info->mailbox.wait[msgid];
 	wait_head->ack_data = chan_send->resp;
 	wait_head->ack_data_len = chan_send->resp_len;
-	rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
 	wait_head->msg_type = chan_send->msg_type;
-	rte_wmb();
+	rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+				  rte_memory_order_release);
 	nbl_chan_kick_tx_ring(chan_mgt, chan_info);
 	rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
 
 	while (1) {
-		if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
-			return wait_head->ack_err;
+		if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+		    NBL_MBX_STATUS_ACKED) {
+			ret = wait_head->ack_err;
+			rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+						  rte_memory_order_release);
+			return ret;
+		}
 
 		rte_delay_us(50);
 		retry_time++;
-		if (retry_time > NBL_CHAN_RETRY_TIMES)
-			return -EIO;
+		if (retry_time > NBL_CHAN_RETRY_TIMES &&
+		    rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+								&status, NBL_MBX_STATUS_TIMEOUT,
+								rte_memory_order_acq_rel,
+								rte_memory_order_relaxed)) {
+			NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+				chan_send->msg_type, msgid, chan_send->dstid);
+			break;
+		}
 	}
-
-	return 0;
+	return -EIO;
 }
 
 static int nbl_chan_send_ack(void *priv, struct nbl_chan_ack_info *chan_ack)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -41,6 +41,14 @@ enum {
 	NBL_MB_TX_QID = 1,
 };
 
+enum {
+	NBL_MBX_STATUS_IDLE = 0,
+	NBL_MBX_STATUS_WAITING,
+	NBL_MBX_STATUS_ACKING,
+	NBL_MBX_STATUS_ACKED,
+	NBL_MBX_STATUS_TIMEOUT,
+};
+
 struct __rte_packed_begin nbl_chan_tx_desc {
 	uint16_t flags;
 	uint16_t srcid;
@@ -74,8 +82,7 @@ struct nbl_chan_ring {
 
 struct nbl_chan_waitqueue_head {
 	char *ack_data;
-	RTE_ATOMIC(int)
-		acked;
+	RTE_ATOMIC(int) status;
 	int ack_err;
 	uint16_t ack_data_len;
 	uint16_t msg_type;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 0/4] improve NBL memory safety and mailbox reliability
  2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
                     ` (3 preceding siblings ...)
  2026-01-27  2:52   ` [PATCH v3 4/4] net/nbl: improve mailbox exception handling Dimon Zhao
@ 2026-01-27 15:10   ` Stephen Hemminger
  2026-01-28  2:00     ` 回复:[PATCH " Dimon
  4 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-27 15:10 UTC (permalink / raw)
  To: Dimon Zhao; +Cc: dev

On Mon, 26 Jan 2026 18:52:16 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:

> Includes fixes for mbuf/memzone leaks and mailbox state handling.
> 
> Dimon Zhao (4):
>   net/nbl: fix memzone leak on queue release
>   net/nbl: fix mbuf headroom usage in packet Tx
>   net/nbl: fix mbuf double-free in queue cleanup
>   net/nbl: improve mailbox exception handling
> 
>  drivers/net/nbl/nbl_dev/nbl_dev.c     |   2 +-
>  drivers/net/nbl/nbl_hw/nbl_channel.c  |  94 +++++++++++++++++----
>  drivers/net/nbl/nbl_hw/nbl_channel.h  |  11 ++-
>  drivers/net/nbl/nbl_hw/nbl_resource.h |   2 +
>  drivers/net/nbl/nbl_hw/nbl_txrx.c     | 117 +++++++++++++++++++-------
>  drivers/net/nbl/nbl_hw/nbl_txrx_ops.h |   2 +-
>  6 files changed, 175 insertions(+), 53 deletions(-)
> 

Queued to next-net with minor rewording of the commit message in patch 4.

Improve mailbox reliability by replacing the simple 'acked' flag with
a state machine for tracking message status.

Key changes:
1. Replace simple 'acked' flag with a state machine...

^ permalink raw reply	[flat|nested] 24+ messages in thread

* 回复:[PATCH v3 0/4] improve NBL memory safety and mailbox reliability
  2026-01-27 15:10   ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
@ 2026-01-28  2:00     ` Dimon
  0 siblings, 0 replies; 24+ messages in thread
From: Dimon @ 2026-01-28  2:00 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

[-- Attachment #1: Type: text/plain, Size: 1460 bytes --]

Hi Stephen,
 Thanks for queuing the series and improving the commit message.
 The new title better captures the essence of the fix.
------------------------------------------------------------------
发件人:Stephen Hemminger <stephen@networkplumber.org>
发送时间:2026年1月27日(周二) 23:10
收件人:Dimon<dimon.zhao@nebula-matrix.com>
抄 送:dev<dev@dpdk.org>
主 题:Re: [PATCH v3 0/4] improve NBL memory safety and mailbox reliability
On Mon, 26 Jan 2026 18:52:16 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
> 
> Dimon Zhao (4):
> net/nbl: fix memzone leak on queue release
> net/nbl: fix mbuf headroom usage in packet Tx
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve mailbox exception handling
> 
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 +++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 117 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 175 insertions(+), 53 deletions(-)
> 
Queued to next-net with minor rewording of the commit message in patch 4.
Improve mailbox reliability by replacing the simple 'acked' flag with
a state machine for tracking message status.
Key changes:
1. Replace simple 'acked' flag with a state machine...

[-- Attachment #2: Type: text/html, Size: 7855 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2026-01-28  2:00 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-23  3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
2026-01-23  3:16 ` [PATCH v1 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
2026-01-23  3:16 ` [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission Dimon Zhao
2026-01-23  3:16 ` [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
2026-01-23  3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
2026-01-24 18:31   ` [REVIEW] " Stephen Hemminger
2026-01-23  4:36 ` [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
2026-01-24 19:08 ` Stephen Hemminger
2026-01-26  1:37   ` 回复:[PATCH " Dimon
2026-01-26  7:58 ` [PATCH v2 " Dimon Zhao
2026-01-26  7:58   ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
2026-01-27  1:02     ` Stephen Hemminger
2026-01-26  7:58   ` [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx Dimon Zhao
2026-01-26  7:58   ` [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
2026-01-26  7:58   ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
2026-01-26 18:18     ` [REVIEW] " Stephen Hemminger
2026-01-26 18:28       ` Stephen Hemminger
2026-01-27  2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
2026-01-27  2:52   ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
2026-01-27  2:52   ` [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx Dimon Zhao
2026-01-27  2:52   ` [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
2026-01-27  2:52   ` [PATCH v3 4/4] net/nbl: improve mailbox exception handling Dimon Zhao
2026-01-27 15:10   ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
2026-01-28  2:00     ` 回复:[PATCH " Dimon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox