* [PATCH v1 1/4] net/nbl: fix memzone leak in queue release
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
@ 2026-01-23 3:16 ` Dimon Zhao
2026-01-23 3:16 ` [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission Dimon Zhao
` (6 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23 3:16 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Add memzone free calls in tx_queue_release() and rx_queue_release()
to fix memory leaks. The memzones allocated in tx_queue_setup() and
rx_queue_setup() were not being freed when queues were released.
Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_resource.h | 2 ++
drivers/net/nbl/nbl_hw/nbl_txrx.c | 44 ++++++++++++++++-----------
2 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index 1f6515f64d..469c3f5827 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -137,6 +137,7 @@ struct nbl_res_tx_ring {
volatile struct nbl_packed_desc *desc;
struct nbl_tx_entry *tx_entry;
const struct rte_memzone *net_hdr_mz;
+ const struct rte_memzone *desc_mz;
volatile uint8_t *notify;
const struct rte_eth_dev *eth_dev;
struct nbl_common_info *common;
@@ -178,6 +179,7 @@ struct nbl_res_tx_ring {
struct nbl_res_rx_ring {
volatile struct nbl_packed_desc *desc;
struct nbl_rx_entry *rx_entry;
+ const struct rte_memzone *desc_mz;
struct rte_mempool *mempool;
volatile uint8_t *notify;
const struct rte_eth_dev *eth_dev;
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 650790b4fc..103f56c3a7 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -71,7 +71,12 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
if (!tx_ring)
return;
- rte_free(tx_ring->tx_entry);
+ if (tx_ring->tx_entry)
+ rte_free(tx_ring->tx_entry);
+ if (tx_ring->net_hdr_mz)
+ rte_memzone_free(tx_ring->net_hdr_mz);
+ if (tx_ring->desc_mz)
+ rte_memzone_free(tx_ring->desc_mz);
rte_free(tx_ring);
txrx_mgt->tx_rings[queue_idx] = NULL;
}
@@ -104,7 +109,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated tx queue");
- nbl_res_txrx_stop_tx_ring(priv, param->queue_idx);
+ nbl_res_txrx_release_tx_ring(priv, param->queue_idx);
eth_dev->data->tx_queues[param->queue_idx] = NULL;
}
@@ -173,6 +178,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
tx_ring->next_to_use = 0;
tx_ring->desc = (struct nbl_packed_desc *)memzone->addr;
tx_ring->net_hdr_mz = net_hdr_mz;
+ tx_ring->desc_mz = memzone;
tx_ring->eth_dev = eth_dev;
tx_ring->dma_set_msb = common->dma_set_msb;
tx_ring->dma_limit_msb = common->dma_limit_msb;
@@ -226,6 +232,23 @@ static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
rx_ring->next_to_use = 0;
}
+static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
+{
+ struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
+ struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
+ struct nbl_res_rx_ring *rx_ring =
+ NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+ if (!rx_ring)
+ return;
+
+ if (rx_ring->rx_entry)
+ rte_free(rx_ring->rx_entry);
+ if (rx_ring->desc_mz)
+ rte_memzone_free(rx_ring->desc_mz);
+ rte_free(rx_ring);
+ txrx_mgt->rx_rings[queue_idx] = NULL;
+}
+
static int nbl_res_txrx_start_rx_ring(void *priv,
struct nbl_start_rx_ring_param *param,
u64 *dma_addr)
@@ -244,7 +267,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
if (eth_dev->data->rx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated rx queue");
- nbl_res_txrx_stop_rx_ring(priv, param->queue_idx);
+ nbl_res_txrx_release_rx_ring(priv, param->queue_idx);
eth_dev->data->rx_queues[param->queue_idx] = NULL;
}
@@ -275,6 +298,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
rx_ring->product = param->product;
rx_ring->mempool = param->mempool;
+ rx_ring->desc_mz = memzone;
rx_ring->nb_desc = param->nb_desc;
rx_ring->queue_id = param->queue_idx;
rx_ring->notify_qid =
@@ -376,20 +400,6 @@ static int nbl_res_alloc_rx_bufs(void *priv, u16 queue_idx)
return 0;
}
-static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
-{
- struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
- struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
- struct nbl_res_rx_ring *rx_ring =
- NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
- if (!rx_ring)
- return;
-
- rte_free(rx_ring->rx_entry);
- rte_free(rx_ring);
- txrx_mgt->rx_rings[queue_idx] = NULL;
-}
-
static void nbl_res_txrx_update_rx_ring(void *priv, u16 index)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
2026-01-23 3:16 ` [PATCH v1 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
@ 2026-01-23 3:16 ` Dimon Zhao
2026-01-23 3:16 ` [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
` (5 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23 3:16 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Only use mbuf headroom when mbuf->refcnt == 1,
indicating exclusive ownership. For shared mbufs (refcnt > 1),
use next desc instead to avoid data corruption.
This prevents modifying shared buffers that may be used by other
processing contexts.
Implementation details:
- Check mbuf->refcnt before using headroom
- For refcnt == 1: use existing headroom (performance optimal)
- For refcnt > 1: use next desc
Fixes: e5fc1f78c78c ("net/nbl: support Tx and Rx burst")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_txrx.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 103f56c3a7..e906f0707e 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -514,7 +514,10 @@ nbl_res_txrx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts, u
tx_extend_len = required_headroom + sizeof(struct rte_ether_hdr);
}
- if (rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
+ if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
+ RTE_MBUF_DIRECT(tx_pkt) &&
+ tx_pkt->nb_segs == 1 &&
+ rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
can_push = 1;
u = rte_pktmbuf_mtod_offset(tx_pkt, union nbl_tx_extend_head *,
-required_headroom);
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
2026-01-23 3:16 ` [PATCH v1 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
2026-01-23 3:16 ` [PATCH v1 2/4] net/nbl: optimize mbuf headroom usage in packet transmission Dimon Zhao
@ 2026-01-23 3:16 ` Dimon Zhao
2026-01-23 3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
` (4 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23 3:16 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Only free hardware-owned mbufs to avoid double-free
corruption of application-owned buffers.
TX ring: free only transmitted mbufs (next_to_clean to next_to_use).
RX ring: free only allocated receive buffers
(next_to_clean to next_to_use),
mbufs already delivered to the application (before next_to_clean)
must not be touched.
Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
drivers/net/nbl/nbl_hw/nbl_txrx.c | 73 ++++++++++++++++++++++-----
drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
3 files changed, 61 insertions(+), 16 deletions(-)
diff --git a/drivers/net/nbl/nbl_dev/nbl_dev.c b/drivers/net/nbl/nbl_dev/nbl_dev.c
index edff61e52e..2b0413fb7c 100644
--- a/drivers/net/nbl/nbl_dev/nbl_dev.c
+++ b/drivers/net/nbl/nbl_dev/nbl_dev.c
@@ -328,8 +328,8 @@ int nbl_dev_port_stop(struct rte_eth_dev *eth_dev)
rte_delay_ms(NBL_SAFE_THREADS_WAIT_TIME);
nbl_dev_hw_stats_stop(eth_dev);
- nbl_clear_queues(eth_dev);
nbl_dev_txrx_stop(eth_dev);
+ nbl_clear_queues(eth_dev);
nbl_userdev_port_config(adapter, NBL_KERNEL_NETWORK);
return 0;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index e906f0707e..a90b0debc5 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -44,19 +44,43 @@ static void nbl_res_txrx_stop_tx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
- int i;
+ struct nbl_tx_entry *tx_entry;
+ u16 i, free_cnt, free_mbuf_cnt;
if (!tx_ring)
return;
- for (i = 0; i < tx_ring->nb_desc; i++) {
- if (tx_ring->tx_entry[i].mbuf != NULL) {
- rte_pktmbuf_free_seg(tx_ring->tx_entry[i].mbuf);
- memset(&tx_ring->tx_entry[i], 0, sizeof(*tx_ring->tx_entry));
+ i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
+ free_cnt = tx_ring->vq_free_cnt;
+ tx_entry = &tx_ring->tx_entry[i];
+ free_mbuf_cnt = 0;
+
+ while (free_cnt < tx_ring->nb_desc) {
+ if (tx_entry->mbuf) {
+ free_mbuf_cnt++;
+ rte_pktmbuf_free_seg(tx_entry->mbuf);
+ }
+
+ i++;
+ tx_entry++;
+ free_cnt++;
+ if (i == tx_ring->nb_desc) {
+ i = 0;
+ tx_entry = &tx_ring->tx_entry[i];
}
+ }
+
+ NBL_LOG(DEBUG, "stop tx ring ntc %u, ntu %u, vq_free_cnt %u, mbuf free_cnt %u",
+ tx_ring->next_to_clean, tx_ring->next_to_use, tx_ring->vq_free_cnt, free_mbuf_cnt);
+
+ for (i = 0; i < tx_ring->nb_desc; i++) {
+ tx_ring->desc[i].addr = 0;
+ tx_ring->desc[i].len = 0;
+ tx_ring->desc[i].id = 0;
tx_ring->desc[i].flags = 0;
}
+ memset(tx_ring->tx_entry, 0, sizeof(*tx_entry) * tx_ring->nb_desc);
tx_ring->avail_used_flags = NBL_PACKED_DESC_F_AVAIL_BIT;
tx_ring->used_wrap_counter = 1;
tx_ring->next_to_clean = NBL_TX_RS_THRESH - 1;
@@ -209,25 +233,46 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
- struct nbl_res_rx_ring *rx_ring =
- NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
- u16 i;
+ struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+ struct nbl_rx_entry *rx_buf;
+ u16 i, free_cnt, free_mbuf_cnt;
if (!rx_ring)
return;
+
+ i = rx_ring->next_to_clean;
+ free_cnt = rx_ring->vq_free_cnt;
+ free_mbuf_cnt = 0;
+
if (rx_ring->rx_entry != NULL) {
- for (i = 0; i < rx_ring->nb_desc; i++) {
- if (rx_ring->rx_entry[i].mbuf != NULL) {
- rte_pktmbuf_free_seg(rx_ring->rx_entry[i].mbuf);
- rx_ring->rx_entry[i].mbuf = NULL;
+ rx_buf = &rx_ring->rx_entry[i];
+ while (free_cnt < rx_ring->nb_desc) {
+ if (rx_buf->mbuf) {
+ free_mbuf_cnt++;
+ rte_pktmbuf_free_seg(rx_buf->mbuf);
+ }
+
+ i++;
+ rx_buf++;
+ free_cnt++;
+ if (i == rx_ring->nb_desc) {
+ i = 0;
+ rx_buf = &rx_ring->rx_entry[i];
}
- rx_ring->desc[i].flags = 0;
}
- for (i = rx_ring->nb_desc; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++)
+ memset(rx_ring->rx_entry, 0, sizeof(struct nbl_rx_entry) * rx_ring->nb_desc);
+
+ for (i = 0; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++) {
+ rx_ring->desc[i].addr = 0;
+ rx_ring->desc[i].len = 0;
+ rx_ring->desc[i].id = 0;
rx_ring->desc[i].flags = 0;
+ }
}
+ NBL_LOG(DEBUG, "stop rx ring ntc %u, ntu %u, vq_free_cnt %u, free_mbuf_cnt %u",
+ rx_ring->next_to_clean, rx_ring->next_to_use, rx_ring->vq_free_cnt, free_mbuf_cnt);
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
index 2ab4b09683..dc0bab2b7b 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
@@ -43,7 +43,7 @@ nbl_tx_free_bufs(struct nbl_res_tx_ring *txq)
if (!desc_is_used(&txq->desc[next_to_clean], txq->used_wrap_counter))
return 0;
- n = 32;
+ n = NBL_TX_RS_THRESH;
/* first buffer to free from S/W ring is at index
* tx_next_dd - (tx_rs_thresh-1)
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (2 preceding siblings ...)
2026-01-23 3:16 ` [PATCH v1 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
@ 2026-01-23 3:16 ` Dimon Zhao
2026-01-24 18:31 ` [REVIEW] " Stephen Hemminger
2026-01-23 4:36 ` [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
` (3 subsequent siblings)
7 siblings, 1 reply; 24+ messages in thread
From: Dimon Zhao @ 2026-01-23 3:16 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
is found to be in use or waiting for an ACK during transmission,
the message will not be sent.
Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
2 files changed, 85 insertions(+), 20 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..75e4354e18 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -22,6 +22,7 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
{
struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+ int i;
txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
if (!txq->desc) {
@@ -36,6 +37,10 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
goto req_wait_queue_failed;
}
+ for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+ rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_relaxed);
+
size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
if (!txq->buf) {
@@ -253,24 +258,55 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
uint32_t ack_msgid;
uint32_t ack_msgtype;
uint32_t copy_len;
+ int status = NBL_MBX_STATUS_WAITING;
chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
ack_msgtype = *payload;
ack_msgid = *(payload + 1);
+
+ if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head = &chan_info->mailbox.wait[ack_msgid];
+ if (wait_head->msg_type != ack_msgtype) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+ ack_msgid, ack_msgtype, wait_head->msg_type);
+ return;
+ }
+
+ if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+ NBL_MBX_STATUS_ACKING,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+ wait_head->status, ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head->ack_err = *(payload + 2);
NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
- " data_len:%u, ack_data_len:%u",
- srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+ " data_len:%u, ack_data_len:%u, ack_err:%d",
+ srcid, ack_msgtype, msgid, ack_msgid, data_len,
+ wait_head->ack_data_len, wait_head->ack_err);
if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
/* the mailbox msg parameter structure may change */
+ if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+ NBL_LOG(INFO, "payload_len do not match ack_len!,"
+ " srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+ " data_len:%u, ack_data_len:%u",
+ srcid, ack_msgtype, msgid,
+ ack_msgid, data_len, wait_head->ack_data_len);
copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
- memcpy(wait_head->ack_data, payload + 3, copy_len);
+ if (copy_len)
+ memcpy(wait_head->ack_data, payload + 3, copy_len);
}
- rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+ rte_memory_order_release);
}
static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
@@ -371,12 +407,27 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
{
struct nbl_chan_ring *txq;
struct nbl_chan_tx_desc *tx_desc;
+ struct nbl_chan_waitqueue_head *wait_head;
uint64_t pa;
void *va;
uint16_t next_to_use;
+ int status;
+
+ if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+ NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+ return 0xFFFF;
+ }
txq = &chan_info->mailbox.txq;
next_to_use = txq->next_to_use;
+
+ wait_head = &chan_info->mailbox.wait[next_to_use];
+ status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+ if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+ NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+ return 0xFFFF;
+ }
+
va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
tx_desc = NBL_CHAN_TX_DESC(txq, next_to_use);
@@ -384,10 +435,6 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
tx_desc->dstid = dstid;
tx_desc->msg_type = msg_type;
tx_desc->msgid = next_to_use;
- if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
- NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
- return -1;
- }
if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
memcpy(va, arg, arg_len);
@@ -418,6 +465,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
uint16_t msgid;
int ret;
int retry_time = 0;
+ int status = NBL_MBX_STATUS_WAITING;
if (chan_mgt->state)
return -EIO;
@@ -431,8 +479,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
if (msgid == 0xFFFF) {
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
- NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
- " to dstid:%u failed",
+ NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
chan_send->msg_type, chan_send->dstid);
return -ECOMM;
}
@@ -446,23 +493,34 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
wait_head = &chan_info->mailbox.wait[msgid];
wait_head->ack_data = chan_send->resp;
wait_head->ack_data_len = chan_send->resp_len;
- rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
wait_head->msg_type = chan_send->msg_type;
- rte_wmb();
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+ rte_memory_order_release);
nbl_chan_kick_tx_ring(chan_mgt, chan_info);
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
while (1) {
- if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
- return wait_head->ack_err;
+ if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+ NBL_MBX_STATUS_ACKED) {
+ ret = wait_head->ack_err;
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_release);
+ return ret;
+ }
rte_delay_us(50);
retry_time++;
- if (retry_time > NBL_CHAN_RETRY_TIMES)
- return -EIO;
+ if (retry_time > NBL_CHAN_RETRY_TIMES &&
+ rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+ &status, NBL_MBX_STATUS_TIMEOUT,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+ chan_send->msg_type, msgid, chan_send->dstid);
+ break;
+ }
}
-
- return 0;
+ return -EIO;
}
static int nbl_chan_send_ack(void *priv, struct nbl_chan_ack_info *chan_ack)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -41,6 +41,14 @@ enum {
NBL_MB_TX_QID = 1,
};
+enum {
+ NBL_MBX_STATUS_IDLE = 0,
+ NBL_MBX_STATUS_WAITING,
+ NBL_MBX_STATUS_ACKING,
+ NBL_MBX_STATUS_ACKED,
+ NBL_MBX_STATUS_TIMEOUT,
+};
+
struct __rte_packed_begin nbl_chan_tx_desc {
uint16_t flags;
uint16_t srcid;
@@ -74,8 +82,7 @@ struct nbl_chan_ring {
struct nbl_chan_waitqueue_head {
char *ack_data;
- RTE_ATOMIC(int)
- acked;
+ RTE_ATOMIC(int) status;
int ack_err;
uint16_t ack_data_len;
uint16_t msg_type;
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [REVIEW] net/nbl: improve exception handling for the mailbox
2026-01-23 3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
@ 2026-01-24 18:31 ` Stephen Hemminger
0 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-24 18:31 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
AI-generated review of bundle-1687-nbl-mbox.mbox
Reviewed using Claude (claude-opus-4-5-20251101)
This is an automated review. Please verify all suggestions.
---
# DPDK Patch Review: net/nbl Mailbox and Queue Fixes
## Overview
This patch series contains 4 patches for the nbl network driver, addressing memory leaks, mbuf handling issues, and mailbox reliability improvements.
---
## Patch 1/4: net/nbl: fix memzone leak in queue release
### Commit Message
**Info**: Subject line is 48 characters - good length.
**Pass**: Proper `Fixes:` tag format with 12-character SHA.
**Pass**: `Cc: stable@dpdk.org` correctly placed after Fixes tag.
**Pass**: Signed-off-by present.
**Pass**: Tag order is correct.
### Code Review
**Warning: Unnecessary NULL checks before rte_free()**
```c
if (tx_ring->tx_entry)
rte_free(tx_ring->tx_entry);
if (tx_ring->net_hdr_mz)
rte_memzone_free(tx_ring->net_hdr_mz);
if (tx_ring->desc_mz)
rte_memzone_free(tx_ring->desc_mz);
```
Per AGENTS.md, `rte_free()` and `rte_memzone_free()` handle NULL pointers safely. These NULL checks are unnecessary.
**Warning: Missing blank line after variable declarations**
```c
struct nbl_res_rx_ring *rx_ring =
NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
if (!rx_ring)
return;
```
Should have a blank line between the declaration and the `if` statement.
**Info**: Good fix - storing `desc_mz` pointer for proper cleanup is the right approach.
---
## Patch 2/4: net/nbl: optimize mbuf headroom usage in packet transmission
### Commit Message
**Error**: Subject line is 65 characters - exceeds 60 character limit.
Suggested: `net/nbl: optimize mbuf headroom usage in Tx`
**Warning**: The word "transmission" should be abbreviated as "Tx" per DPDK conventions (words-case.txt).
**Pass**: Proper `Fixes:` tag and `Cc: stable@dpdk.org`.
**Pass**: Signed-off-by present.
### Code Review
**Pass**: The logic to check `rte_mbuf_refcnt_read()`, `RTE_MBUF_DIRECT()`, and `nb_segs` before using headroom is correct for avoiding corruption of shared mbufs.
**Info**: Good defensive programming to ensure exclusive ownership before modifying the mbuf headroom.
---
## Patch 3/4: net/nbl: fix mbuf double-free in queue cleanup
### Commit Message
**Pass**: Subject line is 47 characters - good.
**Pass**: Proper `Fixes:` tag and `Cc: stable@dpdk.org`.
**Pass**: Signed-off-by present.
### Code Review
**Warning: Missing blank line after variable declarations**
```c
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
struct nbl_tx_entry *tx_entry;
u16 i, free_cnt, free_mbuf_cnt;
if (!tx_ring)
```
There should be a blank line between declarations and the first statement.
**Warning: Implicit boolean comparison**
```c
if (tx_entry->mbuf) {
```
Should be:
```c
if (tx_entry->mbuf != NULL) {
```
Same issue in rx_ring cleanup:
```c
if (rx_buf->mbuf) {
```
**Info**: The logic change to only free mbufs in the range `[next_to_clean, next_to_use)` is correct for avoiding double-free of application-owned buffers.
**Info**: Moving `nbl_clear_queues()` after `nbl_dev_txrx_stop()` in `nbl_dev_port_stop()` ensures proper ordering.
---
## Patch 4/4: net/nbl: improve exception handling for the mailbox
### Commit Message
**Pass**: Subject line is 53 characters - good.
**Pass**: Proper `Fixes:` tag and `Cc: stable@dpdk.org`.
**Pass**: Signed-off-by present.
### Code Review
**Warning: Missing blank line after variable declarations**
Multiple instances throughout the code:
```c
struct nbl_chan_waitqueue_head *wait_head;
int status = NBL_MBX_STATUS_WAITING;
if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
```
**Warning: Variable initialization may hide bugs**
```c
int status = NBL_MBX_STATUS_WAITING;
```
This is used with `rte_atomic_compare_exchange_strong_explicit()`, which requires the expected value to be passed by reference. In this specific case, the initialization is intentional and required. However, consider adding a comment to clarify this is not a default value but the expected state for the compare-exchange operation.
**Error: Line exceeds 100 characters**
```c
if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
NBL_MBX_STATUS_ACKING,
```
The continuation lines appear properly indented, but verify the first line stays within 100 characters.
**Warning: Log message formatting**
```c
NBL_LOG(INFO, "payload_len do not match ack_len!,"
```
Grammar: "do not" should be "does not" (payload_len is singular).
**Info**: The state machine approach (IDLE, WAITING, ACKING, ACKED, TIMEOUT) is a good improvement over the simple boolean flag for mailbox reliability.
**Info**: Good addition of message ID and type validation before processing ACKs.
**Info**: Using `rte_atomic_compare_exchange_strong_explicit()` for state transitions is correct for thread-safe mailbox handling.
---
## General Issues Across All Patches
### Type Usage
**Warning**: The patches use `u16`, `u64` types which appear to be driver-specific typedefs. This is acceptable in driver code, but ensure consistency with existing driver conventions.
### Documentation
**Info**: Consider updating release notes if these fixes address user-visible issues, as they are bug fixes being backported to stable.
---
## Summary
| Patch | Errors | Warnings | Info |
|-------|--------|----------|------|
| 1/4 | 0 | 2 | 1 |
| 2/4 | 1 | 1 | 1 |
| 3/4 | 0 | 3 | 2 |
| 4/4 | 1 | 4 | 3 |
### Must Fix (Errors)
1. **Patch 2/4**: Subject line exceeds 60 characters
2. **Patch 4/4**: Verify line length compliance (100 char limit)
### Should Fix (Warnings)
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
2. Add blank lines between variable declarations and statements
3. Use explicit NULL comparisons (`!= NULL`) instead of implicit boolean tests
4. Fix grammar in log message ("does not" instead of "do not")
5. Use "Tx" instead of "transmission" in commit subject
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (3 preceding siblings ...)
2026-01-23 3:16 ` [PATCH v1 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
@ 2026-01-23 4:36 ` Stephen Hemminger
2026-01-24 19:08 ` Stephen Hemminger
` (2 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-23 4:36 UTC (permalink / raw)
To: Dimon Zhao; +Cc: dev
On Thu, 22 Jan 2026 19:16:23 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
>
> Dimon Zhao (4):
> net/nbl: fix memzone leak in queue release
> net/nbl: optimize mbuf headroom usage in packet transmission
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve exception handling for the mailbox
>
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 122 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 179 insertions(+), 54 deletions(-)
>
This is the result of AI code review of this series.
Note: AI can and does flag things that are not a problem, do not blindly follow
# Code Review: DPDK Patch Bundle for net/nbl Driver
## Overview
This patch series contains 4 patches fixing various issues in the net/nbl driver: memzone leaks, mbuf headroom usage, double-free issues, and mailbox exception handling.
---
## Patch 1/4: fix memzone leak in queue release
### Errors
1. **Unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`**
- File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`, lines 74-79, 245-248
- `rte_free()` and `rte_memzone_free()` safely handle NULL pointers
```c
// Current (unnecessary checks):
if (tx_ring->tx_entry)
rte_free(tx_ring->tx_entry);
if (tx_ring->net_hdr_mz)
rte_memzone_free(tx_ring->net_hdr_mz);
// Should be:
rte_free(tx_ring->tx_entry);
rte_memzone_free(tx_ring->net_hdr_mz);
rte_memzone_free(tx_ring->desc_mz);
```
### Warnings
1. **Missing blank line between declarations and statements**
- File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`, function `nbl_res_txrx_release_rx_ring()`
```c
struct nbl_res_rx_ring *rx_ring =
NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
if (!rx_ring) // Missing blank line before this
return;
```
### Info
1. The fix correctly addresses the memzone leak by storing memzone pointers and freeing them on release.
---
## Patch 2/4: optimize mbuf headroom usage in packet transmission
### Errors
None.
### Warnings
1. **Subject line could be clearer about the fix nature**
- The subject says "optimize" but this is actually a **bug fix** that prevents data corruption
- Consider: `net/nbl: fix data corruption in shared mbuf transmission`
2. **Commit body mentions "Fixes" tag but issue is more of a safety improvement**
- The `Fixes:` tag is appropriate since this prevents potential data corruption
### Info
1. The logic change is correct - checking `refcnt == 1`, `RTE_MBUF_DIRECT()`, and `nb_segs == 1` before modifying headroom ensures exclusive ownership.
---
## Patch 3/4: fix mbuf double-free in queue cleanup
### Errors
1. **Missing blank line between declaration and code**
- File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`, function `nbl_res_txrx_stop_tx_ring()`
```c
struct nbl_tx_entry *tx_entry;
u16 i, free_cnt, free_mbuf_cnt;
if (!tx_ring) // Need blank line before this
return;
```
2. **Inconsistent use of types**
- Mix of `u16` and `uint16_t` - DPDK prefers standard C types (`uint16_t`)
- File: `drivers/net/nbl/nbl_hw/nbl_txrx.c`
```c
u16 i, free_cnt, free_mbuf_cnt; // Should use uint16_t
```
### Warnings
1. **Subject line formatting**
- "queue cleanup" is vague; consider more specific wording
- Suggest: `net/nbl: fix mbuf double-free on queue stop`
2. **Commit body formatting issue - tab in description**
- Line: `(next_to_clean to next_to_use),` followed by tab-indented text
- Should use consistent formatting in commit body
3. **Magic numbers in code**
- The change from `n = 32` to `n = NBL_TX_RS_THRESH` is good, but ensure the macro is defined and documented
### Info
1. The reordering of `nbl_clear_queues()` and `nbl_dev_txrx_stop()` in `nbl_dev_port_stop()` makes sense for proper cleanup ordering.
---
## Patch 4/4: improve exception handling for the mailbox
### Errors
1. **Line length exceeds 100 characters**
- File: `drivers/net/nbl/nbl_hw/nbl_channel.c`, line 299-302:
```c
NBL_LOG(INFO, "payload_len do not match ack_len!,"
" srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
" data_len:%u, ack_data_len:%u",
```
- Some of these concatenated strings may exceed 100 chars when combined with indentation
2. **Grammar issue in log message**
- `"payload_len do not match ack_len!"` should be `"payload_len does not match ack_len"`
3. **Missing blank line after declarations**
- File: `drivers/net/nbl/nbl_hw/nbl_channel.c`, function `nbl_chan_init_tx_queue()`:
```c
struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
int i;
txq->desc = nbl_alloc_dma_mem(...); // OK - blank line present
```
- But later addition lacks proper spacing context
### Warnings
1. **Subject line could be more specific**
- "improve exception handling for the mailbox" is vague
- Consider: `net/nbl: add mailbox state machine for reliability`
2. **Use of `int` for status variable that's compared against enum**
- File: `drivers/net/nbl/nbl_hw/nbl_channel.c`, line 260, 412:
```c
int status = NBL_MBX_STATUS_WAITING;
```
- Consider using a proper enum type or at least adding a comment
3. **Inconsistent initialization location**
- `int status = NBL_MBX_STATUS_WAITING` initialized at declaration in one place, but used with different initial values in different contexts
4. **Dead code path concern**
- In `nbl_chan_send_msg()`, if `rte_atomic_compare_exchange_strong_explicit` fails after timeout, the loop continues but `retry_time` keeps increasing, leading to repeated timeout checks
### Info
1. The state machine approach (`IDLE`, `WAITING`, `ACKING`, `ACKED`, `TIMEOUT`) is a significant improvement over the simple `acked` flag.
2. The validation of message ID and type before processing ACKs is good defensive programming.
---
## General Issues Across All Patches
### Warnings
1. **Inconsistent type usage**
- Mix of `u8`, `u16`, `u64` (kernel-style) and `uint8_t`, `uint16_t`, `uint64_t` (C99 standard)
- DPDK generally prefers C99 standard types
2. **Missing documentation updates**
- No updates to release notes for bug fixes that may affect users
- Consider adding entries to `doc/guides/rel_notes/` for these fixes
### Info
1. All patches have proper `Fixes:` tags with 12-character SHAs
2. All patches have `Cc: stable@dpdk.org` appropriately
3. All patches have valid `Signed-off-by:` lines
4. Subject lines are within 60 character limit
5. SPDX headers are not modified (existing files)
---
## Summary
| Severity | Count |
|----------|-------|
| Error | 5 |
| Warning | 11 |
| Info | 6 |
**Recommended actions:**
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
2. Add missing blank lines between declarations and statements
3. Use consistent C99 types (`uint16_t` instead of `u16`)
4. Fix grammar in log messages
5. Consider more specific subject lines for patches 2, 3, and 4
6. Verify line lengths don't exceed 100 characters in long log statements
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (4 preceding siblings ...)
2026-01-23 4:36 ` [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
@ 2026-01-24 19:08 ` Stephen Hemminger
2026-01-26 1:37 ` 回复:[PATCH " Dimon
2026-01-26 7:58 ` [PATCH v2 " Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
7 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-24 19:08 UTC (permalink / raw)
To: Dimon Zhao; +Cc: dev
On Thu, 22 Jan 2026 19:16:23 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
>
> Dimon Zhao (4):
> net/nbl: fix memzone leak in queue release
> net/nbl: optimize mbuf headroom usage in packet transmission
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve exception handling for the mailbox
>
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 122 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 179 insertions(+), 54 deletions(-)
>
Some of the automated review comments are valid, some are not.
Please resubmit, or I can fix during merge
Fix:
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
4. Fix grammar in log message ("does not" instead of "do not")
Ok as is, but would be good to change:
2. Add blank lines between variable declarations and statements
5. Use "Tx" instead of "transmission" in commit subject
Ignore:
3. Use explicit NULL comparisons (`!= NULL`) instead of implicit boolean tests
^ permalink raw reply [flat|nested] 24+ messages in thread* 回复:[PATCH v1 0/4] improve NBL memory safety and mailbox reliability
2026-01-24 19:08 ` Stephen Hemminger
@ 2026-01-26 1:37 ` Dimon
0 siblings, 0 replies; 24+ messages in thread
From: Dimon @ 2026-01-26 1:37 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev
[-- Attachment #1: Type: text/plain, Size: 1672 bytes --]
Hi Stephen,
Thanks for the review. I'll resubmit the patch shortly.
Thank you.
------------------------------------------------------------------
发件人:Stephen Hemminger <stephen@networkplumber.org>
发送时间:2026年1月25日(周日) 03:08
收件人:Dimon<dimon.zhao@nebula-matrix.com>
抄 送:dev<dev@dpdk.org>
主 题:Re: [PATCH v1 0/4] improve NBL memory safety and mailbox reliability
On Thu, 22 Jan 2026 19:16:23 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
>
> Dimon Zhao (4):
> net/nbl: fix memzone leak in queue release
> net/nbl: optimize mbuf headroom usage in packet transmission
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve exception handling for the mailbox
>
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 122 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 179 insertions(+), 54 deletions(-)
>
Some of the automated review comments are valid, some are not.
Please resubmit, or I can fix during merge
Fix:
1. Remove unnecessary NULL checks before `rte_free()` and `rte_memzone_free()`
4. Fix grammar in log message ("does not" instead of "do not")
Ok as is, but would be good to change:
2. Add blank lines between variable declarations and statements
5. Use "Tx" instead of "transmission" in commit subject
Ignore:
3. Use explicit NULL comparisons (`!= NULL`) instead of implicit boolean tests
[-- Attachment #2: Type: text/html, Size: 11029 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v2 0/4] improve NBL memory safety and mailbox reliability
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (5 preceding siblings ...)
2026-01-24 19:08 ` Stephen Hemminger
@ 2026-01-26 7:58 ` Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
` (3 more replies)
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
7 siblings, 4 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26 7:58 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao
Includes fixes for mbuf/memzone leaks and mailbox state handling.
Dimon Zhao (4):
net/nbl: fix memzone leak in queue release
net/nbl: optimize mbuf headroom usage in packet Tx
net/nbl: fix mbuf double-free in queue cleanup
net/nbl: improve exception handling for the mailbox
drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
drivers/net/nbl/nbl_hw/nbl_channel.c | 94 +++++++++++++++++----
drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
drivers/net/nbl/nbl_hw/nbl_txrx.c | 115 ++++++++++++++++++--------
drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
6 files changed, 171 insertions(+), 55 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 24+ messages in thread* [PATCH v2 1/4] net/nbl: fix memzone leak in queue release
2026-01-26 7:58 ` [PATCH v2 " Dimon Zhao
@ 2026-01-26 7:58 ` Dimon Zhao
2026-01-27 1:02 ` Stephen Hemminger
2026-01-26 7:58 ` [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx Dimon Zhao
` (2 subsequent siblings)
3 siblings, 1 reply; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26 7:58 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Add memzone free calls in tx_queue_release() and rx_queue_release()
to fix memory leaks. The memzones allocated in tx_queue_setup() and
rx_queue_setup() were not being freed when queues were released.
Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_resource.h | 2 ++
drivers/net/nbl/nbl_hw/nbl_txrx.c | 37 ++++++++++++++-------------
2 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index 1f6515f64d..469c3f5827 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -137,6 +137,7 @@ struct nbl_res_tx_ring {
volatile struct nbl_packed_desc *desc;
struct nbl_tx_entry *tx_entry;
const struct rte_memzone *net_hdr_mz;
+ const struct rte_memzone *desc_mz;
volatile uint8_t *notify;
const struct rte_eth_dev *eth_dev;
struct nbl_common_info *common;
@@ -178,6 +179,7 @@ struct nbl_res_tx_ring {
struct nbl_res_rx_ring {
volatile struct nbl_packed_desc *desc;
struct nbl_rx_entry *rx_entry;
+ const struct rte_memzone *desc_mz;
struct rte_mempool *mempool;
volatile uint8_t *notify;
const struct rte_eth_dev *eth_dev;
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 650790b4fc..563f011cd3 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -69,9 +69,10 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
- if (!tx_ring)
- return;
+
rte_free(tx_ring->tx_entry);
+ rte_memzone_free(tx_ring->net_hdr_mz);
+ rte_memzone_free(tx_ring->desc_mz);
rte_free(tx_ring);
txrx_mgt->tx_rings[queue_idx] = NULL;
}
@@ -104,7 +105,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated tx queue");
- nbl_res_txrx_stop_tx_ring(priv, param->queue_idx);
+ nbl_res_txrx_release_tx_ring(priv, param->queue_idx);
eth_dev->data->tx_queues[param->queue_idx] = NULL;
}
@@ -173,6 +174,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
tx_ring->next_to_use = 0;
tx_ring->desc = (struct nbl_packed_desc *)memzone->addr;
tx_ring->net_hdr_mz = net_hdr_mz;
+ tx_ring->desc_mz = memzone;
tx_ring->eth_dev = eth_dev;
tx_ring->dma_set_msb = common->dma_set_msb;
tx_ring->dma_limit_msb = common->dma_limit_msb;
@@ -226,6 +228,18 @@ static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
rx_ring->next_to_use = 0;
}
+static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
+{
+ struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
+ struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
+ struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+
+ rte_free(rx_ring->rx_entry);
+ rte_memzone_free(rx_ring->desc_mz);
+ rte_free(rx_ring);
+ txrx_mgt->rx_rings[queue_idx] = NULL;
+}
+
static int nbl_res_txrx_start_rx_ring(void *priv,
struct nbl_start_rx_ring_param *param,
u64 *dma_addr)
@@ -244,7 +258,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
if (eth_dev->data->rx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated rx queue");
- nbl_res_txrx_stop_rx_ring(priv, param->queue_idx);
+ nbl_res_txrx_release_rx_ring(priv, param->queue_idx);
eth_dev->data->rx_queues[param->queue_idx] = NULL;
}
@@ -275,6 +289,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
rx_ring->product = param->product;
rx_ring->mempool = param->mempool;
+ rx_ring->desc_mz = memzone;
rx_ring->nb_desc = param->nb_desc;
rx_ring->queue_id = param->queue_idx;
rx_ring->notify_qid =
@@ -376,20 +391,6 @@ static int nbl_res_alloc_rx_bufs(void *priv, u16 queue_idx)
return 0;
}
-static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
-{
- struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
- struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
- struct nbl_res_rx_ring *rx_ring =
- NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
- if (!rx_ring)
- return;
-
- rte_free(rx_ring->rx_entry);
- rte_free(rx_ring);
- txrx_mgt->rx_rings[queue_idx] = NULL;
-}
-
static void nbl_res_txrx_update_rx_ring(void *priv, u16 index)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v2 1/4] net/nbl: fix memzone leak in queue release
2026-01-26 7:58 ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
@ 2026-01-27 1:02 ` Stephen Hemminger
0 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-27 1:02 UTC (permalink / raw)
To: Dimon Zhao; +Cc: dev, stable, Kyo Liu, Leon Yu, Sam Chen
On Sun, 25 Jan 2026 23:58:12 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
> index 650790b4fc..563f011cd3 100644
> --- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
> +++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
> @@ -69,9 +69,10 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
> struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
> struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
> struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
> - if (!tx_ring)
> - return;
> +
Automated patch review spotted this removal as possible problem.
Code Review:
ERROR: NULL pointer dereference introduced
The NULL check for tx_ring was removed but the code immediately dereferences it:
c
static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
rte_free(tx_ring->tx_entry); /* <-- crash if tx_ring is NULL */
The old code had:
c
if (!tx_ring)
return;
This guard must be restored, or the callers must guarantee tx_ring is never NULL.
Similarly for nbl_res_txrx_release_rx_ring — no NULL check before dereferencing rx_ring->rx_entry and rx_ring->desc_mz.
Further AI lead analysis show scenarios where it could be a problem.
Complete Call Chain Analysis for Patch 1/4
Call Flow
DPDK Application
↓
rte_eth_dev_tx_queue_release() or eth_dev_ops->tx_queue_release
↓
nbl_tx_queues_release() [nbl_dev.c:420]
↓
disp_ops->release_tx_ring() → nbl_disp_release_tx_ring() [nbl_dispatch.c:163]
↓
res_ops->release_tx_ring() → nbl_res_txrx_release_tx_ring() [nbl_txrx.c]
Where tx_ring Can Be NULL
The macro definition:
c
#define NBL_RES_MGT_TO_TX_RING(res_mgt, index) \
(NBL_RES_MGT_TO_TXRX_MGT(res_mgt)->tx_rings[(index)])
Where tx_rings is:
c
struct nbl_res_tx_ring **tx_rings; // Array of pointers
Scenario 1: Normal close without all queues set up
In nbl_dev_port_close() → nbl_release_queues() (line 337-349):
c
for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
disp_ops->release_tx_ring(NBL_DEV_MGT_TO_DISP_PRIV(dev_mgt), i);
If the application configured 4 queues but only called tx_queue_setup() on 2 of them, tx_rings[2] and tx_rings[3] will be NULL.
Scenario 2: DPDK ethdev layer calling release on never-setup queue
DPDK's rte_eth_dev_close() can call tx_queue_release for all configured queues without validating they were set up.
Scenario 3: Re-setup path mismatch
In the patched nbl_res_txrx_start_tx_ring():
c
if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated tx queue");
nbl_res_txrx_release_tx_ring(priv, param->queue_idx); // <-- Called
The check is on eth_dev->data->tx_queues[] (ethdev layer), but tx_rings[] (resource layer) could differ if a previous setup partially failed.
Verdict: Confirmed Bug in Patch 1/4
The removal of NULL checks causes NULL pointer dereference in both nbl_res_txrx_release_tx_ring() and nbl_res_txrx_release_rx_ring().
Required Fix
c
static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
+ if (!tx_ring)
+ return;
+
rte_free(tx_ring->tx_entry);
rte_memzone_free(tx_ring->net_hdr_mz);
rte_memzone_free(tx_ring->desc_mz);
rte_free(tx_ring);
txrx_mgt->tx_rings[queue_idx] = NULL;
}
static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+ if (!rx_ring)
+ return;
+
rte_free(rx_ring->rx_entry);
rte_memzone_free(rx_ring->desc_mz);
rte_free(rx_ring);
txrx_mgt->rx_rings[queue_idx] = NULL;
}
The memzone leak fix itself is correct and needed — but the defensive NULL checks must be preserved.
Conclusion put the null checks back into rx and tx and retest/resubmit new version.
Full chat is here if you are interested: https://claude.ai/share/d543bddd-7406-4f00-b5cb-4a92e8718476
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx
2026-01-26 7:58 ` [PATCH v2 " Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
@ 2026-01-26 7:58 ` Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
3 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26 7:58 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Only use mbuf headroom when mbuf->refcnt == 1,
indicating exclusive ownership. For shared mbufs (refcnt > 1),
use next desc instead to avoid data corruption.
This prevents modifying shared buffers that may be used by other
processing contexts.
Implementation details:
- Check mbuf->refcnt before using headroom
- For refcnt == 1: use existing headroom (performance optimal)
- For refcnt > 1: use next desc
Fixes: e5fc1f78c78c ("net/nbl: support Tx and Rx burst")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_txrx.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 563f011cd3..2a6acdcac1 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -505,7 +505,10 @@ nbl_res_txrx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts, u
tx_extend_len = required_headroom + sizeof(struct rte_ether_hdr);
}
- if (rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
+ if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
+ RTE_MBUF_DIRECT(tx_pkt) &&
+ tx_pkt->nb_segs == 1 &&
+ rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
can_push = 1;
u = rte_pktmbuf_mtod_offset(tx_pkt, union nbl_tx_extend_head *,
-required_headroom);
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup
2026-01-26 7:58 ` [PATCH v2 " Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 1/4] net/nbl: fix memzone leak in queue release Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 2/4] net/nbl: optimize mbuf headroom usage in packet Tx Dimon Zhao
@ 2026-01-26 7:58 ` Dimon Zhao
2026-01-26 7:58 ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
3 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26 7:58 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Only free hardware-owned mbufs to avoid double-free
corruption of application-owned buffers.
TX ring: free only transmitted mbufs (next_to_clean to next_to_use).
RX ring: free only allocated receive buffers
(next_to_clean to next_to_use),
mbufs already delivered to the application (before next_to_clean)
must not be touched.
Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
drivers/net/nbl/nbl_hw/nbl_txrx.c | 73 ++++++++++++++++++++++-----
drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
3 files changed, 61 insertions(+), 16 deletions(-)
diff --git a/drivers/net/nbl/nbl_dev/nbl_dev.c b/drivers/net/nbl/nbl_dev/nbl_dev.c
index edff61e52e..2b0413fb7c 100644
--- a/drivers/net/nbl/nbl_dev/nbl_dev.c
+++ b/drivers/net/nbl/nbl_dev/nbl_dev.c
@@ -328,8 +328,8 @@ int nbl_dev_port_stop(struct rte_eth_dev *eth_dev)
rte_delay_ms(NBL_SAFE_THREADS_WAIT_TIME);
nbl_dev_hw_stats_stop(eth_dev);
- nbl_clear_queues(eth_dev);
nbl_dev_txrx_stop(eth_dev);
+ nbl_clear_queues(eth_dev);
nbl_userdev_port_config(adapter, NBL_KERNEL_NETWORK);
return 0;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 2a6acdcac1..3fa0b22e42 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -44,19 +44,43 @@ static void nbl_res_txrx_stop_tx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
- int i;
+ struct nbl_tx_entry *tx_entry;
+ u16 i, free_cnt, free_mbuf_cnt;
if (!tx_ring)
return;
- for (i = 0; i < tx_ring->nb_desc; i++) {
- if (tx_ring->tx_entry[i].mbuf != NULL) {
- rte_pktmbuf_free_seg(tx_ring->tx_entry[i].mbuf);
- memset(&tx_ring->tx_entry[i], 0, sizeof(*tx_ring->tx_entry));
+ i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
+ free_cnt = tx_ring->vq_free_cnt;
+ tx_entry = &tx_ring->tx_entry[i];
+ free_mbuf_cnt = 0;
+
+ while (free_cnt < tx_ring->nb_desc) {
+ if (tx_entry->mbuf) {
+ free_mbuf_cnt++;
+ rte_pktmbuf_free_seg(tx_entry->mbuf);
+ }
+
+ i++;
+ tx_entry++;
+ free_cnt++;
+ if (i == tx_ring->nb_desc) {
+ i = 0;
+ tx_entry = &tx_ring->tx_entry[i];
}
+ }
+
+ NBL_LOG(DEBUG, "stop tx ring ntc %u, ntu %u, vq_free_cnt %u, mbuf free_cnt %u",
+ tx_ring->next_to_clean, tx_ring->next_to_use, tx_ring->vq_free_cnt, free_mbuf_cnt);
+
+ for (i = 0; i < tx_ring->nb_desc; i++) {
+ tx_ring->desc[i].addr = 0;
+ tx_ring->desc[i].len = 0;
+ tx_ring->desc[i].id = 0;
tx_ring->desc[i].flags = 0;
}
+ memset(tx_ring->tx_entry, 0, sizeof(*tx_entry) * tx_ring->nb_desc);
tx_ring->avail_used_flags = NBL_PACKED_DESC_F_AVAIL_BIT;
tx_ring->used_wrap_counter = 1;
tx_ring->next_to_clean = NBL_TX_RS_THRESH - 1;
@@ -205,25 +229,46 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
- struct nbl_res_rx_ring *rx_ring =
- NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
- u16 i;
+ struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+ struct nbl_rx_entry *rx_buf;
+ u16 i, free_cnt, free_mbuf_cnt;
if (!rx_ring)
return;
+
+ i = rx_ring->next_to_clean;
+ free_cnt = rx_ring->vq_free_cnt;
+ free_mbuf_cnt = 0;
+
if (rx_ring->rx_entry != NULL) {
- for (i = 0; i < rx_ring->nb_desc; i++) {
- if (rx_ring->rx_entry[i].mbuf != NULL) {
- rte_pktmbuf_free_seg(rx_ring->rx_entry[i].mbuf);
- rx_ring->rx_entry[i].mbuf = NULL;
+ rx_buf = &rx_ring->rx_entry[i];
+ while (free_cnt < rx_ring->nb_desc) {
+ if (rx_buf->mbuf) {
+ free_mbuf_cnt++;
+ rte_pktmbuf_free_seg(rx_buf->mbuf);
+ }
+
+ i++;
+ rx_buf++;
+ free_cnt++;
+ if (i == rx_ring->nb_desc) {
+ i = 0;
+ rx_buf = &rx_ring->rx_entry[i];
}
- rx_ring->desc[i].flags = 0;
}
- for (i = rx_ring->nb_desc; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++)
+ memset(rx_ring->rx_entry, 0, sizeof(struct nbl_rx_entry) * rx_ring->nb_desc);
+
+ for (i = 0; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++) {
+ rx_ring->desc[i].addr = 0;
+ rx_ring->desc[i].len = 0;
+ rx_ring->desc[i].id = 0;
rx_ring->desc[i].flags = 0;
+ }
}
+ NBL_LOG(DEBUG, "stop rx ring ntc %u, ntu %u, vq_free_cnt %u, free_mbuf_cnt %u",
+ rx_ring->next_to_clean, rx_ring->next_to_use, rx_ring->vq_free_cnt, free_mbuf_cnt);
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
index 2ab4b09683..dc0bab2b7b 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
@@ -43,7 +43,7 @@ nbl_tx_free_bufs(struct nbl_res_tx_ring *txq)
if (!desc_is_used(&txq->desc[next_to_clean], txq->used_wrap_counter))
return 0;
- n = 32;
+ n = NBL_TX_RS_THRESH;
/* first buffer to free from S/W ring is at index
* tx_next_dd - (tx_rs_thresh-1)
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox
2026-01-26 7:58 ` [PATCH v2 " Dimon Zhao
` (2 preceding siblings ...)
2026-01-26 7:58 ` [PATCH v2 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
@ 2026-01-26 7:58 ` Dimon Zhao
2026-01-26 18:18 ` [REVIEW] " Stephen Hemminger
3 siblings, 1 reply; 24+ messages in thread
From: Dimon Zhao @ 2026-01-26 7:58 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
is found to be in use or waiting for an ACK during Tx,
the message will not be sent.
Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
2 files changed, 85 insertions(+), 20 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..52ad71c833 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -22,6 +22,7 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
{
struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+ int i;
txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
if (!txq->desc) {
@@ -36,6 +37,10 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
goto req_wait_queue_failed;
}
+ for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+ rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_relaxed);
+
size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
if (!txq->buf) {
@@ -253,24 +258,55 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
uint32_t ack_msgid;
uint32_t ack_msgtype;
uint32_t copy_len;
+ int status = NBL_MBX_STATUS_WAITING;
chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
ack_msgtype = *payload;
ack_msgid = *(payload + 1);
+
+ if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head = &chan_info->mailbox.wait[ack_msgid];
+ if (wait_head->msg_type != ack_msgtype) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+ ack_msgid, ack_msgtype, wait_head->msg_type);
+ return;
+ }
+
+ if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+ NBL_MBX_STATUS_ACKING,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+ wait_head->status, ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head->ack_err = *(payload + 2);
NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
- " data_len:%u, ack_data_len:%u",
- srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+ " data_len:%u, ack_data_len:%u, ack_err:%d",
+ srcid, ack_msgtype, msgid, ack_msgid, data_len,
+ wait_head->ack_data_len, wait_head->ack_err);
if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
/* the mailbox msg parameter structure may change */
+ if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+ NBL_LOG(INFO, "payload_len does not match ack_len!,"
+ " srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+ " data_len:%u, ack_data_len:%u",
+ srcid, ack_msgtype, msgid,
+ ack_msgid, data_len, wait_head->ack_data_len);
copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
- memcpy(wait_head->ack_data, payload + 3, copy_len);
+ if (copy_len)
+ memcpy(wait_head->ack_data, payload + 3, copy_len);
}
- rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+ rte_memory_order_release);
}
static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
@@ -371,12 +407,27 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
{
struct nbl_chan_ring *txq;
struct nbl_chan_tx_desc *tx_desc;
+ struct nbl_chan_waitqueue_head *wait_head;
uint64_t pa;
void *va;
uint16_t next_to_use;
+ int status;
+
+ if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+ NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+ return 0xFFFF;
+ }
txq = &chan_info->mailbox.txq;
next_to_use = txq->next_to_use;
+
+ wait_head = &chan_info->mailbox.wait[next_to_use];
+ status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+ if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+ NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+ return 0xFFFF;
+ }
+
va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
tx_desc = NBL_CHAN_TX_DESC(txq, next_to_use);
@@ -384,10 +435,6 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
tx_desc->dstid = dstid;
tx_desc->msg_type = msg_type;
tx_desc->msgid = next_to_use;
- if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
- NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
- return -1;
- }
if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
memcpy(va, arg, arg_len);
@@ -418,6 +465,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
uint16_t msgid;
int ret;
int retry_time = 0;
+ int status = NBL_MBX_STATUS_WAITING;
if (chan_mgt->state)
return -EIO;
@@ -431,8 +479,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
if (msgid == 0xFFFF) {
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
- NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
- " to dstid:%u failed",
+ NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
chan_send->msg_type, chan_send->dstid);
return -ECOMM;
}
@@ -446,23 +493,34 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
wait_head = &chan_info->mailbox.wait[msgid];
wait_head->ack_data = chan_send->resp;
wait_head->ack_data_len = chan_send->resp_len;
- rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
wait_head->msg_type = chan_send->msg_type;
- rte_wmb();
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+ rte_memory_order_release);
nbl_chan_kick_tx_ring(chan_mgt, chan_info);
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
while (1) {
- if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
- return wait_head->ack_err;
+ if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+ NBL_MBX_STATUS_ACKED) {
+ ret = wait_head->ack_err;
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_release);
+ return ret;
+ }
rte_delay_us(50);
retry_time++;
- if (retry_time > NBL_CHAN_RETRY_TIMES)
- return -EIO;
+ if (retry_time > NBL_CHAN_RETRY_TIMES &&
+ rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+ &status, NBL_MBX_STATUS_TIMEOUT,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+ chan_send->msg_type, msgid, chan_send->dstid);
+ break;
+ }
}
-
- return 0;
+ return -EIO;
}
static int nbl_chan_send_ack(void *priv, struct nbl_chan_ack_info *chan_ack)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -41,6 +41,14 @@ enum {
NBL_MB_TX_QID = 1,
};
+enum {
+ NBL_MBX_STATUS_IDLE = 0,
+ NBL_MBX_STATUS_WAITING,
+ NBL_MBX_STATUS_ACKING,
+ NBL_MBX_STATUS_ACKED,
+ NBL_MBX_STATUS_TIMEOUT,
+};
+
struct __rte_packed_begin nbl_chan_tx_desc {
uint16_t flags;
uint16_t srcid;
@@ -74,8 +82,7 @@ struct nbl_chan_ring {
struct nbl_chan_waitqueue_head {
char *ack_data;
- RTE_ATOMIC(int)
- acked;
+ RTE_ATOMIC(int) status;
int ack_err;
uint16_t ack_data_len;
uint16_t msg_type;
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [REVIEW] net/nbl: improve exception handling for the mailbox
2026-01-26 7:58 ` [PATCH v2 4/4] net/nbl: improve exception handling for the mailbox Dimon Zhao
@ 2026-01-26 18:18 ` Stephen Hemminger
2026-01-26 18:28 ` Stephen Hemminger
0 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-26 18:18 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
AI-generated review of bundle-1692-nbl-v2.mbox
Reviewed using Claude (claude-sonnet-4-5-20250929)
This is an automated review. Please verify all suggestions.
---
# DPDK Patch Review: bundle-1692-nbl-v2.mbox
## PATCH 1/4: net/nbl: fix memzone leak in queue release
### Commit Message
**Error**: Subject line exceeds 60 characters (61 characters: "net/nbl: fix memzone leak in queue release")
- Current: 61 characters
- Maximum: 60 characters
- Suggested: "net/nbl: fix memzone leak on queue release"
**Warning**: Body does not describe the issue being fixed
- The body jumps directly to the solution without explaining why memzones were leaking
- Should explain: "Memzones allocated during queue setup were not freed during queue release, causing memory leaks when queues are reconfigured or device is stopped."
**Info**: Good use of Fixes tag with 12-char SHA and Cc: stable
### Code Review
**Warning**: Missing NULL checks before rte_memzone_free()
```c
rte_free(tx_ring->tx_entry);
rte_memzone_free(tx_ring->net_hdr_mz); // Could be NULL if setup failed
rte_memzone_free(tx_ring->desc_mz); // Could be NULL if setup failed
```
While rte_memzone_free() handles NULL, this is not documented behavior. Better to check explicitly or document assumption.
**Warning**: Logic change mixed with fix
- The patch changes from calling `nbl_res_txrx_stop_tx_ring()` to `nbl_res_txrx_release_tx_ring()` in queue setup path
- This is a separate behavioral change that should be in its own patch or explained in commit message
**Info**: Function reordering (nbl_res_txrx_release_rx_ring moved) makes diff harder to review
- Consider pure addition without moving existing code
---
## PATCH 2/4: net/nbl: optimize mbuf headroom usage in packet Tx
### Commit Message
**Error**: Subject line exceeds 60 characters (62 characters)
- Current: "net/nbl: optimize mbuf headroom usage in packet Tx"
- Suggested: "net/nbl: fix shared mbuf handling in Tx"
**Warning**: Subject line misleading - this is a correctness fix, not optimization
- "optimize" suggests performance improvement, but this fixes data corruption
- Should use "fix" as this prevents writing to shared buffers
**Warning**: Body structure issue
- "Implementation details:" section is too verbose for commit message
- Should focus on what problem is fixed and why
**Info**: Good Fixes tag and stable CC
### Code Review
**Warning**: Unnecessary defensive checks
```c
if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
RTE_MBUF_DIRECT(tx_pkt) && // Unnecessary
tx_pkt->nb_segs == 1 && // Unnecessary
rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
```
- `RTE_MBUF_DIRECT(tx_pkt)`: Already checked by headroom availability (indirect mbufs have no headroom)
- `nb_segs == 1`: Multi-segment handling should be checked elsewhere; this conflates concerns
**Error**: Incomplete fix - missing boundary check
- The code modifies `tx_pkt` at offset `-required_headroom` but doesn't verify the mbuf wasn't cloned with a non-zero data offset
- Need: `tx_pkt->data_off >= required_headroom`
**Suggested fix**:
```c
if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
```
---
## PATCH 3/4: net/nbl: fix mbuf double-free in queue cleanup
### Commit Message
**Error**: Subject line exceeds 60 characters (61 characters)
- Suggested: "net/nbl: fix double-free in queue cleanup"
**Warning**: Body formatting issues
- Inconsistent indentation in the description
- "TX ring:" and "RX ring:" descriptions use tabs instead of standard formatting
**Info**: Good explanation of the fix scope
### Code Review
**Error**: Code order violation in nbl_dev.c
```c
nbl_dev_hw_stats_stop(eth_dev);
nbl_clear_queues(eth_dev); // Moved after txrx_stop
nbl_dev_txrx_stop(eth_dev);
```
This changes the order of operations but the commit message doesn't explain why this reordering is necessary. This should be in a separate patch or thoroughly explained.
**Warning**: Loop iteration complexity
```c
i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
```
This arithmetic is fragile and could wrap around. Needs comment explaining why `+1 - NBL_TX_RS_THRESH`.
**Warning**: Unnecessary variable initialization
```c
u16 i, free_cnt, free_mbuf_cnt;
...
free_mbuf_cnt = 0; // Only used for logging
```
`free_mbuf_cnt` is only for debug logging; consider removing if not essential.
**Error**: Missing wrap-around handling documentation
The loops handle ring wrap-around but don't document what happens at boundary:
```c
if (i == tx_ring->nb_desc) {
i = 0;
tx_entry = &tx_ring->tx_entry[i];
}
```
**Info**: Good use of NBL_LOG(DEBUG) for diagnostics
---
## PATCH 4/4: net/nbl: improve exception handling for the mailbox
### Commit Message
**Error**: Subject line exceeds 60 characters (64 characters)
- Suggested: "net/nbl: improve mailbox exception handling"
**Warning**: Body uses numbered list format which is unusual for DPDK
- Should use prose paragraphs instead of bullet points
- Current style is inconsistent with DPDK conventions
**Warning**: Too much implementation detail in commit message
- "New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT" - this is code-level detail
- Focus on what problems are solved, not how
### Code Review
**Warning**: Missing atomic operation explanation
```c
if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
NBL_MBX_STATUS_ACKING,
rte_memory_order_acq_rel,
rte_memory_order_relaxed)) {
```
This is a critical section but no comment explains the race being prevented.
**Error**: Inconsistent state transition handling
```c
status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
return 0xFFFF;
}
```
Returns 0xFFFF on error but caller checks `msgid == 0xFFFF`. Should use a defined constant like `NBL_CHAN_INVALID_MSGID`.
**Warning**: Magic number for retry times
```c
if (retry_time > NBL_CHAN_RETRY_TIMES &&
```
`NBL_CHAN_RETRY_TIMES` is used but the timeout calculation (`50us * NBL_CHAN_RETRY_TIMES`) is not documented.
**Warning**: Log level inconsistency
```c
NBL_LOG(INFO, "payload_len does not match ack_len!,...)
```
Should this be WARNING or ERROR? A mismatch could indicate protocol violation.
**Error**: Race condition in timeout handling
```c
if (retry_time > NBL_CHAN_RETRY_TIMES &&
rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
&status, NBL_MBX_STATUS_TIMEOUT,
```
Between checking `retry_time` and CAS, an ACK could arrive. Need to verify the window is acceptable or tighten the logic.
---
## General Issues Across All Patches
### License and Copyright
**Info**: No license headers visible in the patches, assuming they exist in original files
### Code Style
**Warning**: Inconsistent spacing in conditional expressions across patches
**Info**: General code style appears compliant with DPDK standards
### Testing
**Warning**: No mention of testing performed for these critical bug fixes
- These are data corruption and memory leak fixes that need validation
- Should document test scenarios in cover letter or commit messages
### Documentation
**Warning**: No updates to release notes for these bug fixes
- Significant fixes affecting data integrity should be in release notes
- At minimum, should update doc/guides/rel_notes/release_XX_XX.rst
---
## Summary
**Must Fix (Errors)**:
1. All subject lines exceed 60 characters - shorten them
2. Patch 2: Missing mbuf data_off boundary check (security issue)
3. Patch 3: Unexplained functional change (operation reordering)
4. Patch 4: Magic return value 0xFFFF needs named constant
**Should Fix (Warnings)**:
1. Patch 1: Add NULL checks or document assumptions for memzone pointers
2. Patch 2: Subject says "optimize" but fixes data corruption - use "fix"
3. Patch 2: Remove unnecessary defensive checks (RTE_MBUF_DIRECT, nb_segs)
4. Patch 3: Document wrap-around arithmetic logic
5. Patch 4: Document atomic operation races being prevented
6. All patches: Add testing information
7. All patches: Update release notes for fixes
**Consider (Info)**:
1. Patch 1: Avoid function reordering in diffs for easier review
2. Patch 3: Consider removing debug-only counters
3. Commit message formatting consistency across series
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [REVIEW] net/nbl: improve exception handling for the mailbox
2026-01-26 18:18 ` [REVIEW] " Stephen Hemminger
@ 2026-01-26 18:28 ` Stephen Hemminger
0 siblings, 0 replies; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-26 18:28 UTC (permalink / raw)
To: dev
On Mon, 26 Jan 2026 10:18:38 -0800
Stephen Hemminger <stephen@networkplumber.org> wrote:
> ### Code Review
> **Warning**: Missing NULL checks before rte_memzone_free()
> ```c
> rte_free(tx_ring->tx_entry);
> rte_memzone_free(tx_ring->net_hdr_mz); // Could be NULL if setup failed
> rte_memzone_free(tx_ring->desc_mz); // Could be NULL if setup failed
> ```
> While rte_memzone_free() handles NULL, this is not documented behavior. Better to check explicitly or document assumption.
Ignore this feedback. It indicates missing comments in rte_memzone, not your problem
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v3 0/4] improve NBL memory safety and mailbox reliability
2026-01-23 3:16 [PATCH v1 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (6 preceding siblings ...)
2026-01-26 7:58 ` [PATCH v2 " Dimon Zhao
@ 2026-01-27 2:52 ` Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
` (4 more replies)
7 siblings, 5 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27 2:52 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao
Includes fixes for mbuf/memzone leaks and mailbox state handling.
Dimon Zhao (4):
net/nbl: fix memzone leak on queue release
net/nbl: fix mbuf headroom usage in packet Tx
net/nbl: fix mbuf double-free in queue cleanup
net/nbl: improve mailbox exception handling
drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
drivers/net/nbl/nbl_hw/nbl_channel.c | 94 +++++++++++++++++----
drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
drivers/net/nbl/nbl_hw/nbl_txrx.c | 117 +++++++++++++++++++-------
drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
6 files changed, 175 insertions(+), 53 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 24+ messages in thread* [PATCH v3 1/4] net/nbl: fix memzone leak on queue release
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
@ 2026-01-27 2:52 ` Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx Dimon Zhao
` (3 subsequent siblings)
4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27 2:52 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Add memzone free calls in tx_queue_release() and rx_queue_release()
to fix memory leaks. The memzones allocated in tx_queue_setup() and
rx_queue_setup() were not being freed when queues were released.
Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_resource.h | 2 ++
drivers/net/nbl/nbl_hw/nbl_txrx.c | 39 ++++++++++++++++-----------
2 files changed, 25 insertions(+), 16 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_resource.h b/drivers/net/nbl/nbl_hw/nbl_resource.h
index 1f6515f64d..469c3f5827 100644
--- a/drivers/net/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/nbl/nbl_hw/nbl_resource.h
@@ -137,6 +137,7 @@ struct nbl_res_tx_ring {
volatile struct nbl_packed_desc *desc;
struct nbl_tx_entry *tx_entry;
const struct rte_memzone *net_hdr_mz;
+ const struct rte_memzone *desc_mz;
volatile uint8_t *notify;
const struct rte_eth_dev *eth_dev;
struct nbl_common_info *common;
@@ -178,6 +179,7 @@ struct nbl_res_tx_ring {
struct nbl_res_rx_ring {
volatile struct nbl_packed_desc *desc;
struct nbl_rx_entry *rx_entry;
+ const struct rte_memzone *desc_mz;
struct rte_mempool *mempool;
volatile uint8_t *notify;
const struct rte_eth_dev *eth_dev;
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 650790b4fc..9d92ab9992 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -69,9 +69,13 @@ static void nbl_res_txrx_release_tx_ring(void *priv, u16 queue_idx)
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
+
if (!tx_ring)
return;
+
rte_free(tx_ring->tx_entry);
+ rte_memzone_free(tx_ring->net_hdr_mz);
+ rte_memzone_free(tx_ring->desc_mz);
rte_free(tx_ring);
txrx_mgt->tx_rings[queue_idx] = NULL;
}
@@ -104,7 +108,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
if (eth_dev->data->tx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated tx queue");
- nbl_res_txrx_stop_tx_ring(priv, param->queue_idx);
+ nbl_res_txrx_release_tx_ring(priv, param->queue_idx);
eth_dev->data->tx_queues[param->queue_idx] = NULL;
}
@@ -173,6 +177,7 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
tx_ring->next_to_use = 0;
tx_ring->desc = (struct nbl_packed_desc *)memzone->addr;
tx_ring->net_hdr_mz = net_hdr_mz;
+ tx_ring->desc_mz = memzone;
tx_ring->eth_dev = eth_dev;
tx_ring->dma_set_msb = common->dma_set_msb;
tx_ring->dma_limit_msb = common->dma_limit_msb;
@@ -226,6 +231,21 @@ static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
rx_ring->next_to_use = 0;
}
+static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
+{
+ struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
+ struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
+ struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+
+ if (!rx_ring)
+ return;
+
+ rte_free(rx_ring->rx_entry);
+ rte_memzone_free(rx_ring->desc_mz);
+ rte_free(rx_ring);
+ txrx_mgt->rx_rings[queue_idx] = NULL;
+}
+
static int nbl_res_txrx_start_rx_ring(void *priv,
struct nbl_start_rx_ring_param *param,
u64 *dma_addr)
@@ -244,7 +264,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
if (eth_dev->data->rx_queues[param->queue_idx] != NULL) {
NBL_LOG(WARNING, "re-setup an already allocated rx queue");
- nbl_res_txrx_stop_rx_ring(priv, param->queue_idx);
+ nbl_res_txrx_release_rx_ring(priv, param->queue_idx);
eth_dev->data->rx_queues[param->queue_idx] = NULL;
}
@@ -275,6 +295,7 @@ static int nbl_res_txrx_start_rx_ring(void *priv,
rx_ring->product = param->product;
rx_ring->mempool = param->mempool;
+ rx_ring->desc_mz = memzone;
rx_ring->nb_desc = param->nb_desc;
rx_ring->queue_id = param->queue_idx;
rx_ring->notify_qid =
@@ -376,20 +397,6 @@ static int nbl_res_alloc_rx_bufs(void *priv, u16 queue_idx)
return 0;
}
-static void nbl_res_txrx_release_rx_ring(void *priv, u16 queue_idx)
-{
- struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
- struct nbl_txrx_mgt *txrx_mgt = NBL_RES_MGT_TO_TXRX_MGT(res_mgt);
- struct nbl_res_rx_ring *rx_ring =
- NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
- if (!rx_ring)
- return;
-
- rte_free(rx_ring->rx_entry);
- rte_free(rx_ring);
- txrx_mgt->rx_rings[queue_idx] = NULL;
-}
-
static void nbl_res_txrx_update_rx_ring(void *priv, u16 index)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
@ 2026-01-27 2:52 ` Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
` (2 subsequent siblings)
4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27 2:52 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Only use mbuf headroom when mbuf->refcnt == 1,
indicating exclusive ownership. For shared mbufs (refcnt > 1),
use next desc instead to avoid data corruption.
This prevents modifying shared buffers that may be used by other
processing contexts.
Implementation details:
- Check mbuf->refcnt before using headroom
- For refcnt == 1: use existing headroom (performance optimal)
- For refcnt > 1: use next desc
Fixes: e5fc1f78c78c ("net/nbl: support Tx and Rx burst")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_txrx.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 9d92ab9992..93d9f0f7de 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -511,7 +511,10 @@ nbl_res_txrx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts, u
tx_extend_len = required_headroom + sizeof(struct rte_ether_hdr);
}
- if (rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
+ if (rte_mbuf_refcnt_read(tx_pkt) == 1 &&
+ RTE_MBUF_DIRECT(tx_pkt) &&
+ tx_pkt->nb_segs == 1 &&
+ rte_pktmbuf_headroom(tx_pkt) >= required_headroom) {
can_push = 1;
u = rte_pktmbuf_mtod_offset(tx_pkt, union nbl_tx_extend_head *,
-required_headroom);
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 1/4] net/nbl: fix memzone leak on queue release Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 2/4] net/nbl: fix mbuf headroom usage in packet Tx Dimon Zhao
@ 2026-01-27 2:52 ` Dimon Zhao
2026-01-27 2:52 ` [PATCH v3 4/4] net/nbl: improve mailbox exception handling Dimon Zhao
2026-01-27 15:10 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27 2:52 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Only free hardware-owned mbufs to avoid double-free
corruption of application-owned buffers.
TX ring: free only transmitted mbufs (next_to_clean to next_to_use).
RX ring: free only allocated receive buffers
(next_to_clean to next_to_use),
mbufs already delivered to the application (before next_to_clean)
must not be touched.
Fixes: 5d910b2789da ("net/nbl: support queue setup and release")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
drivers/net/nbl/nbl_hw/nbl_txrx.c | 73 ++++++++++++++++++++++-----
drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
3 files changed, 61 insertions(+), 16 deletions(-)
diff --git a/drivers/net/nbl/nbl_dev/nbl_dev.c b/drivers/net/nbl/nbl_dev/nbl_dev.c
index edff61e52e..2b0413fb7c 100644
--- a/drivers/net/nbl/nbl_dev/nbl_dev.c
+++ b/drivers/net/nbl/nbl_dev/nbl_dev.c
@@ -328,8 +328,8 @@ int nbl_dev_port_stop(struct rte_eth_dev *eth_dev)
rte_delay_ms(NBL_SAFE_THREADS_WAIT_TIME);
nbl_dev_hw_stats_stop(eth_dev);
- nbl_clear_queues(eth_dev);
nbl_dev_txrx_stop(eth_dev);
+ nbl_clear_queues(eth_dev);
nbl_userdev_port_config(adapter, NBL_KERNEL_NETWORK);
return 0;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx.c b/drivers/net/nbl/nbl_hw/nbl_txrx.c
index 93d9f0f7de..48f20e4f4c 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx.c
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx.c
@@ -44,19 +44,43 @@ static void nbl_res_txrx_stop_tx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
struct nbl_res_tx_ring *tx_ring = NBL_RES_MGT_TO_TX_RING(res_mgt, queue_idx);
- int i;
+ struct nbl_tx_entry *tx_entry;
+ u16 i, free_cnt, free_mbuf_cnt;
if (!tx_ring)
return;
- for (i = 0; i < tx_ring->nb_desc; i++) {
- if (tx_ring->tx_entry[i].mbuf != NULL) {
- rte_pktmbuf_free_seg(tx_ring->tx_entry[i].mbuf);
- memset(&tx_ring->tx_entry[i], 0, sizeof(*tx_ring->tx_entry));
+ i = tx_ring->next_to_clean + 1 - NBL_TX_RS_THRESH;
+ free_cnt = tx_ring->vq_free_cnt;
+ tx_entry = &tx_ring->tx_entry[i];
+ free_mbuf_cnt = 0;
+
+ while (free_cnt < tx_ring->nb_desc) {
+ if (tx_entry->mbuf) {
+ free_mbuf_cnt++;
+ rte_pktmbuf_free_seg(tx_entry->mbuf);
+ }
+
+ i++;
+ tx_entry++;
+ free_cnt++;
+ if (i == tx_ring->nb_desc) {
+ i = 0;
+ tx_entry = &tx_ring->tx_entry[i];
}
+ }
+
+ NBL_LOG(DEBUG, "stop tx ring ntc %u, ntu %u, vq_free_cnt %u, mbuf free_cnt %u",
+ tx_ring->next_to_clean, tx_ring->next_to_use, tx_ring->vq_free_cnt, free_mbuf_cnt);
+
+ for (i = 0; i < tx_ring->nb_desc; i++) {
+ tx_ring->desc[i].addr = 0;
+ tx_ring->desc[i].len = 0;
+ tx_ring->desc[i].id = 0;
tx_ring->desc[i].flags = 0;
}
+ memset(tx_ring->tx_entry, 0, sizeof(*tx_entry) * tx_ring->nb_desc);
tx_ring->avail_used_flags = NBL_PACKED_DESC_F_AVAIL_BIT;
tx_ring->used_wrap_counter = 1;
tx_ring->next_to_clean = NBL_TX_RS_THRESH - 1;
@@ -208,25 +232,46 @@ static int nbl_res_txrx_start_tx_ring(void *priv,
static void nbl_res_txrx_stop_rx_ring(void *priv, u16 queue_idx)
{
struct nbl_resource_mgt *res_mgt = (struct nbl_resource_mgt *)priv;
- struct nbl_res_rx_ring *rx_ring =
- NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
- u16 i;
+ struct nbl_res_rx_ring *rx_ring = NBL_RES_MGT_TO_RX_RING(res_mgt, queue_idx);
+ struct nbl_rx_entry *rx_buf;
+ u16 i, free_cnt, free_mbuf_cnt;
if (!rx_ring)
return;
+
+ i = rx_ring->next_to_clean;
+ free_cnt = rx_ring->vq_free_cnt;
+ free_mbuf_cnt = 0;
+
if (rx_ring->rx_entry != NULL) {
- for (i = 0; i < rx_ring->nb_desc; i++) {
- if (rx_ring->rx_entry[i].mbuf != NULL) {
- rte_pktmbuf_free_seg(rx_ring->rx_entry[i].mbuf);
- rx_ring->rx_entry[i].mbuf = NULL;
+ rx_buf = &rx_ring->rx_entry[i];
+ while (free_cnt < rx_ring->nb_desc) {
+ if (rx_buf->mbuf) {
+ free_mbuf_cnt++;
+ rte_pktmbuf_free_seg(rx_buf->mbuf);
+ }
+
+ i++;
+ rx_buf++;
+ free_cnt++;
+ if (i == rx_ring->nb_desc) {
+ i = 0;
+ rx_buf = &rx_ring->rx_entry[i];
}
- rx_ring->desc[i].flags = 0;
}
- for (i = rx_ring->nb_desc; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++)
+ memset(rx_ring->rx_entry, 0, sizeof(struct nbl_rx_entry) * rx_ring->nb_desc);
+
+ for (i = 0; i < rx_ring->nb_desc + NBL_DESC_PER_LOOP_VEC_MAX; i++) {
+ rx_ring->desc[i].addr = 0;
+ rx_ring->desc[i].len = 0;
+ rx_ring->desc[i].id = 0;
rx_ring->desc[i].flags = 0;
+ }
}
+ NBL_LOG(DEBUG, "stop rx ring ntc %u, ntu %u, vq_free_cnt %u, free_mbuf_cnt %u",
+ rx_ring->next_to_clean, rx_ring->next_to_use, rx_ring->vq_free_cnt, free_mbuf_cnt);
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
index 2ab4b09683..dc0bab2b7b 100644
--- a/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
+++ b/drivers/net/nbl/nbl_hw/nbl_txrx_ops.h
@@ -43,7 +43,7 @@ nbl_tx_free_bufs(struct nbl_res_tx_ring *txq)
if (!desc_is_used(&txq->desc[next_to_clean], txq->used_wrap_counter))
return 0;
- n = 32;
+ n = NBL_TX_RS_THRESH;
/* first buffer to free from S/W ring is at index
* tx_next_dd - (tx_rs_thresh-1)
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v3 4/4] net/nbl: improve mailbox exception handling
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (2 preceding siblings ...)
2026-01-27 2:52 ` [PATCH v3 3/4] net/nbl: fix mbuf double-free in queue cleanup Dimon Zhao
@ 2026-01-27 2:52 ` Dimon Zhao
2026-01-27 15:10 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
4 siblings, 0 replies; 24+ messages in thread
From: Dimon Zhao @ 2026-01-27 2:52 UTC (permalink / raw)
To: dev; +Cc: Dimon Zhao, stable, Kyo Liu, Leon Yu, Sam Chen
Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
is found to be in use or waiting for an ACK during Tx,
the message will not be sent.
Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Cc: stable@dpdk.org
Signed-off-by: Dimon Zhao <dimon.zhao@nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
2 files changed, 85 insertions(+), 20 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..52ad71c833 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -22,6 +22,7 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
{
struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+ int i;
txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
if (!txq->desc) {
@@ -36,6 +37,10 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
goto req_wait_queue_failed;
}
+ for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+ rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_relaxed);
+
size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
if (!txq->buf) {
@@ -253,24 +258,55 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
uint32_t ack_msgid;
uint32_t ack_msgtype;
uint32_t copy_len;
+ int status = NBL_MBX_STATUS_WAITING;
chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
ack_msgtype = *payload;
ack_msgid = *(payload + 1);
+
+ if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head = &chan_info->mailbox.wait[ack_msgid];
+ if (wait_head->msg_type != ack_msgtype) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+ ack_msgid, ack_msgtype, wait_head->msg_type);
+ return;
+ }
+
+ if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+ NBL_MBX_STATUS_ACKING,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+ wait_head->status, ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head->ack_err = *(payload + 2);
NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
- " data_len:%u, ack_data_len:%u",
- srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+ " data_len:%u, ack_data_len:%u, ack_err:%d",
+ srcid, ack_msgtype, msgid, ack_msgid, data_len,
+ wait_head->ack_data_len, wait_head->ack_err);
if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
/* the mailbox msg parameter structure may change */
+ if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+ NBL_LOG(INFO, "payload_len does not match ack_len!,"
+ " srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+ " data_len:%u, ack_data_len:%u",
+ srcid, ack_msgtype, msgid,
+ ack_msgid, data_len, wait_head->ack_data_len);
copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
- memcpy(wait_head->ack_data, payload + 3, copy_len);
+ if (copy_len)
+ memcpy(wait_head->ack_data, payload + 3, copy_len);
}
- rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+ rte_memory_order_release);
}
static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
@@ -371,12 +407,27 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
{
struct nbl_chan_ring *txq;
struct nbl_chan_tx_desc *tx_desc;
+ struct nbl_chan_waitqueue_head *wait_head;
uint64_t pa;
void *va;
uint16_t next_to_use;
+ int status;
+
+ if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+ NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+ return 0xFFFF;
+ }
txq = &chan_info->mailbox.txq;
next_to_use = txq->next_to_use;
+
+ wait_head = &chan_info->mailbox.wait[next_to_use];
+ status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+ if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+ NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+ return 0xFFFF;
+ }
+
va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
tx_desc = NBL_CHAN_TX_DESC(txq, next_to_use);
@@ -384,10 +435,6 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
tx_desc->dstid = dstid;
tx_desc->msg_type = msg_type;
tx_desc->msgid = next_to_use;
- if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
- NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
- return -1;
- }
if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
memcpy(va, arg, arg_len);
@@ -418,6 +465,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
uint16_t msgid;
int ret;
int retry_time = 0;
+ int status = NBL_MBX_STATUS_WAITING;
if (chan_mgt->state)
return -EIO;
@@ -431,8 +479,7 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
if (msgid == 0xFFFF) {
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
- NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
- " to dstid:%u failed",
+ NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
chan_send->msg_type, chan_send->dstid);
return -ECOMM;
}
@@ -446,23 +493,34 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
wait_head = &chan_info->mailbox.wait[msgid];
wait_head->ack_data = chan_send->resp;
wait_head->ack_data_len = chan_send->resp_len;
- rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
wait_head->msg_type = chan_send->msg_type;
- rte_wmb();
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+ rte_memory_order_release);
nbl_chan_kick_tx_ring(chan_mgt, chan_info);
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
while (1) {
- if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
- return wait_head->ack_err;
+ if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+ NBL_MBX_STATUS_ACKED) {
+ ret = wait_head->ack_err;
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_release);
+ return ret;
+ }
rte_delay_us(50);
retry_time++;
- if (retry_time > NBL_CHAN_RETRY_TIMES)
- return -EIO;
+ if (retry_time > NBL_CHAN_RETRY_TIMES &&
+ rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+ &status, NBL_MBX_STATUS_TIMEOUT,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+ chan_send->msg_type, msgid, chan_send->dstid);
+ break;
+ }
}
-
- return 0;
+ return -EIO;
}
static int nbl_chan_send_ack(void *priv, struct nbl_chan_ack_info *chan_ack)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -41,6 +41,14 @@ enum {
NBL_MB_TX_QID = 1,
};
+enum {
+ NBL_MBX_STATUS_IDLE = 0,
+ NBL_MBX_STATUS_WAITING,
+ NBL_MBX_STATUS_ACKING,
+ NBL_MBX_STATUS_ACKED,
+ NBL_MBX_STATUS_TIMEOUT,
+};
+
struct __rte_packed_begin nbl_chan_tx_desc {
uint16_t flags;
uint16_t srcid;
@@ -74,8 +82,7 @@ struct nbl_chan_ring {
struct nbl_chan_waitqueue_head {
char *ack_data;
- RTE_ATOMIC(int)
- acked;
+ RTE_ATOMIC(int) status;
int ack_err;
uint16_t ack_data_len;
uint16_t msg_type;
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v3 0/4] improve NBL memory safety and mailbox reliability
2026-01-27 2:52 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Dimon Zhao
` (3 preceding siblings ...)
2026-01-27 2:52 ` [PATCH v3 4/4] net/nbl: improve mailbox exception handling Dimon Zhao
@ 2026-01-27 15:10 ` Stephen Hemminger
2026-01-28 2:00 ` 回复:[PATCH " Dimon
4 siblings, 1 reply; 24+ messages in thread
From: Stephen Hemminger @ 2026-01-27 15:10 UTC (permalink / raw)
To: Dimon Zhao; +Cc: dev
On Mon, 26 Jan 2026 18:52:16 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
>
> Dimon Zhao (4):
> net/nbl: fix memzone leak on queue release
> net/nbl: fix mbuf headroom usage in packet Tx
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve mailbox exception handling
>
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 +++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 117 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 175 insertions(+), 53 deletions(-)
>
Queued to next-net with minor rewording of the commit message in patch 4.
Improve mailbox reliability by replacing the simple 'acked' flag with
a state machine for tracking message status.
Key changes:
1. Replace simple 'acked' flag with a state machine...
^ permalink raw reply [flat|nested] 24+ messages in thread* 回复:[PATCH v3 0/4] improve NBL memory safety and mailbox reliability
2026-01-27 15:10 ` [PATCH v3 0/4] improve NBL memory safety and mailbox reliability Stephen Hemminger
@ 2026-01-28 2:00 ` Dimon
0 siblings, 0 replies; 24+ messages in thread
From: Dimon @ 2026-01-28 2:00 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev
[-- Attachment #1: Type: text/plain, Size: 1460 bytes --]
Hi Stephen,
Thanks for queuing the series and improving the commit message.
The new title better captures the essence of the fix.
------------------------------------------------------------------
发件人:Stephen Hemminger <stephen@networkplumber.org>
发送时间:2026年1月27日(周二) 23:10
收件人:Dimon<dimon.zhao@nebula-matrix.com>
抄 送:dev<dev@dpdk.org>
主 题:Re: [PATCH v3 0/4] improve NBL memory safety and mailbox reliability
On Mon, 26 Jan 2026 18:52:16 -0800
Dimon Zhao <dimon.zhao@nebula-matrix.com> wrote:
> Includes fixes for mbuf/memzone leaks and mailbox state handling.
>
> Dimon Zhao (4):
> net/nbl: fix memzone leak on queue release
> net/nbl: fix mbuf headroom usage in packet Tx
> net/nbl: fix mbuf double-free in queue cleanup
> net/nbl: improve mailbox exception handling
>
> drivers/net/nbl/nbl_dev/nbl_dev.c | 2 +-
> drivers/net/nbl/nbl_hw/nbl_channel.c | 94 +++++++++++++++++----
> drivers/net/nbl/nbl_hw/nbl_channel.h | 11 ++-
> drivers/net/nbl/nbl_hw/nbl_resource.h | 2 +
> drivers/net/nbl/nbl_hw/nbl_txrx.c | 117 +++++++++++++++++++-------
> drivers/net/nbl/nbl_hw/nbl_txrx_ops.h | 2 +-
> 6 files changed, 175 insertions(+), 53 deletions(-)
>
Queued to next-net with minor rewording of the commit message in patch 4.
Improve mailbox reliability by replacing the simple 'acked' flag with
a state machine for tracking message status.
Key changes:
1. Replace simple 'acked' flag with a state machine...
[-- Attachment #2: Type: text/html, Size: 7855 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread