* [PATCH v1] net/zxdh: optimize Rx/Tx path performance
@ 2026-03-26 2:28 Junlong Wang
2026-03-26 3:27 ` Stephen Hemminger
` (2 more replies)
0 siblings, 3 replies; 23+ messages in thread
From: Junlong Wang @ 2026-03-26 2:28 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 46849 bytes --]
This patch optimizes the ZXDH PMD's receive and transmit path for better
performance through several improvements:
- Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
zxdh_recv_single_pkts) for single-segment packet scenarios.
- Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
- Optimize descriptor management with prefetching and simplified
cleanup.
- Reorganize structure fields for better cache locality.
These changes reduce CPU cycles and memory bandwidth consumption,
resulting in improved packet processing throughput.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 95 +++---
drivers/net/zxdh/zxdh_ethdev_ops.c | 24 +-
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 11 +-
drivers/net/zxdh/zxdh_queue.h | 120 ++++---
drivers/net/zxdh/zxdh_rxtx.c | 518 +++++++++++++++++++++--------
drivers/net/zxdh/zxdh_rxtx.h | 27 +-
8 files changed, 534 insertions(+), 267 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index aeb01f4652..a5238fc6f8 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -490,7 +490,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
if (!vq)
continue;
while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
- rte_pktmbuf_free(buf);
+ rte_pktmbuf_free_seg(buf);
PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
"rxq", i * 2);
}
@@ -499,7 +499,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
if (!vq)
continue;
while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
- rte_pktmbuf_free(buf);
+ rte_pktmbuf_free_seg(buf);
PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
"txq", i * 2 + 1);
}
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
struct zxdh_virtnet_tx *txvq = NULL;
struct zxdh_virtqueue *vq = NULL;
size_t sz_hdr_mz = 0;
- void *sw_ring = NULL;
int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
int32_t numa_node = dev->device->numa_node;
uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
vq->vq_queue_index = vtpci_phy_qidx;
vq->vq_nentries = vq_size;
- vq->vq_packed.used_wrap_counter = 1;
- vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
- vq->vq_packed.event_flags_shadow = 0;
+ vq->used_wrap_counter = 1;
+ vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
if (queue_type == ZXDH_VTNET_RQ)
- vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+ vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
/*
* Reserve a memzone for vring elements
@@ -741,46 +739,28 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
if (queue_type == ZXDH_VTNET_RQ) {
- size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
-
- sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
- if (!sw_ring) {
- PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
- ret = -ENOMEM;
- goto fail_q_alloc;
- }
-
- vq->sw_ring = sw_ring;
rxvq = &vq->rxq;
rxvq->vq = vq;
rxvq->port_id = dev->data->port_id;
rxvq->mz = mz;
} else { /* queue_type == VTNET_TQ */
+ if (hdr_mz == NULL) {
+ ret = -ENOMEM;
+ PMD_DRV_LOG(ERR, "can not allocate TX soft ring: %d", ret);
+ goto fail_q_alloc;
+ }
txvq = &vq->txq;
txvq->vq = vq;
txvq->port_id = dev->data->port_id;
txvq->mz = mz;
txvq->zxdh_net_hdr_mz = hdr_mz;
- txvq->zxdh_net_hdr_mem = hdr_mz->iova;
+ if (hdr_mz)
+ txvq->zxdh_net_hdr_mem = hdr_mz->iova;
}
- vq->offset = offsetof(struct rte_mbuf, buf_iova);
- if (queue_type == ZXDH_VTNET_TQ) {
+ if (queue_type == ZXDH_VTNET_TQ && hdr_mz) {
struct zxdh_tx_region *txr = hdr_mz->addr;
- uint32_t i;
-
memset(txr, 0, vq_size * sizeof(*txr));
- for (i = 0; i < vq_size; i++) {
- /* first indirect descriptor is always the tx header */
- struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
-
- zxdh_vring_desc_init_indirect_packed(start_dp,
- RTE_DIM(txr[i].tx_packed_indir));
- start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
- offsetof(struct zxdh_tx_region, tx_hdr);
- /* length will be updated to actual pi hdr size when xmit pkt */
- start_dp->len = 0;
- }
}
if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,8 +768,8 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
return 0;
fail_q_alloc:
- rte_free(sw_ring);
- rte_memzone_free(hdr_mz);
+ if (hdr_mz)
+ rte_memzone_free(hdr_mz);
rte_memzone_free(mz);
rte_free(vq);
return ret;
@@ -1290,18 +1270,49 @@ zxdh_dev_close(struct rte_eth_dev *dev)
return ret;
}
+/*
+ * Determine whether the current configuration requires support for scattered
+ * receive; return 1 if scattered receive is required and 0 if not.
+ */
+static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
+{
+ uint16_t buf_size;
+
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
+ eth_dev->data->lro = 1;
+ return 1;
+ }
+
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ return 1;
+
+
+ PMD_DRV_LOG(DEBUG, "port %d min_rx_buf_size %d",
+ eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
+ buf_size = eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+ if (eth_dev->data->mtu + ZXDH_ETH_OVERHEAD > buf_size)
+ return 1;
+
+ return 0;
+}
+
static int32_t
zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
{
- struct zxdh_hw *hw = eth_dev->data->dev_private;
+ uint64_t tx_offloads = eth_dev->data->dev_conf.txmode.offloads;
- if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
- PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
- return -1;
- }
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
- eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
- eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+ eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
+
+ if (!(tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_simple;
+ else
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+
+ if (eth_dev->data->scattered_rx)
+ eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+ else
+ eth_dev->rx_pkt_burst = &zxdh_recv_single_pkts;
return 0;
}
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.c b/drivers/net/zxdh/zxdh_ethdev_ops.c
index 50247116d9..e2c2885add 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.c
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.c
@@ -95,10 +95,6 @@ static const struct rte_zxdh_xstats_name_off zxdh_rxq_stat_strings[] = {
{"good_bytes", offsetof(struct zxdh_virtnet_rx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_rx, stats.errors)},
{"idle", offsetof(struct zxdh_virtnet_rx, stats.idle)},
- {"full", offsetof(struct zxdh_virtnet_rx, stats.full)},
- {"norefill", offsetof(struct zxdh_virtnet_rx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_rx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_rx, stats.broadcast)},
{"truncated_err", offsetof(struct zxdh_virtnet_rx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_rx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_rx, stats.invalid_hdr_len_err)},
@@ -117,14 +113,12 @@ static const struct rte_zxdh_xstats_name_off zxdh_txq_stat_strings[] = {
{"good_packets", offsetof(struct zxdh_virtnet_tx, stats.packets)},
{"good_bytes", offsetof(struct zxdh_virtnet_tx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_tx, stats.errors)},
- {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
- {"norefill", offsetof(struct zxdh_virtnet_tx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_tx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_tx, stats.broadcast)},
+ {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
{"truncated_err", offsetof(struct zxdh_virtnet_tx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_tx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_tx, stats.invalid_hdr_len_err)},
{"no_segs_err", offsetof(struct zxdh_virtnet_tx, stats.no_segs_err)},
+ {"no_free_tx_desc_err", offsetof(struct zxdh_virtnet_tx, stats.no_free_tx_desc_err)},
{"undersize_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[0])},
{"size_64_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[1])},
{"size_65_127_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[2])},
@@ -2026,6 +2020,20 @@ int zxdh_dev_mtu_set(struct rte_eth_dev *dev, uint16_t new_mtu)
uint16_t vfid = zxdh_vport_to_vfid(hw->vport);
int ret;
+ /* If device is started, refuse mtu that requires the support of
+ * scattered packets when this feature has not been enabled before.
+ */
+ if (dev->data->dev_started &&
+ ((!dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) >
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))) ||
+ (dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) <=
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))))) {
+ PMD_DRV_LOG(ERR, "Stop port first.");
+ return -EINVAL;
+ }
+
if (hw->is_pf) {
ret = zxdh_get_panel_attr(dev, &panel);
if (ret != 0) {
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.h b/drivers/net/zxdh/zxdh_ethdev_ops.h
index 6dfe4be473..c49d79c232 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.h
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.h
@@ -40,6 +40,10 @@
#define ZXDH_SPM_SPEED_4X_100G RTE_BIT32(10)
#define ZXDH_SPM_SPEED_4X_200G RTE_BIT32(11)
+#define ZXDH_VLAN_TAG_LEN 4
+#define ZXDH_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
+#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
+
struct zxdh_np_stats_data {
uint64_t n_pkts_dropped;
uint64_t n_bytes_dropped;
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq)
notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
- (vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
+ (vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
notify_data |= RTE_BIT32(31);
PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..4668cb5d13 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
{
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vq_desc_extra *dxp;
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
int32_t i;
uint16_t idx;
@@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
idx = vq->vq_avail_idx;
dxp = &vq->vq_descx[idx];
dxp->cookie = (void *)cookie[i];
- dxp->ndescs = 1;
/* rx pkt fill in data_off */
start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
zxdh_queue_store_flags_packed(&start_dp[idx], flags);
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
- flags = vq->vq_packed.cached_flags;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ flags = vq->cached_flags;
}
}
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
int32_t cnt = 0;
i = vq->vq_used_cons_idx;
- while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+ while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
dxp = &vq->vq_descx[descs[i].id];
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
i = vq->vq_used_cons_idx;
}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..94101c8269 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
#include <rte_common.h>
#include <rte_atomic.h>
+#include <rte_io.h>
#include "zxdh_ethdev.h"
#include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
};
struct zxdh_vring_packed {
- uint32_t num;
struct zxdh_vring_packed_desc *desc;
struct zxdh_vring_packed_desc_event *driver;
struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
uint16_t next;
};
+struct zxdh_vring {
+ uint32_t num;
+ struct zxdh_vring_desc *desc;
+ struct zxdh_vring_avail *avail;
+ struct zxdh_vring_used *used;
+};
+
struct zxdh_virtqueue {
+ union {
+ struct {
+ struct zxdh_vring ring; /**< vring keeping desc, used and avail */
+ } vq_split;
+ struct __rte_packed_begin {
+ struct zxdh_vring_packed ring;
+ } __rte_packed_end vq_packed;
+ };
struct zxdh_hw *hw; /* < zxdh_hw structure pointer. */
- struct {
- /* vring keeping descs and events */
- struct zxdh_vring_packed ring;
- uint8_t used_wrap_counter;
- uint8_t rsv;
- uint16_t cached_flags; /* < cached flags for descs */
- uint16_t event_flags_shadow;
- uint16_t rsv1;
- } vq_packed;
-
- uint16_t vq_used_cons_idx; /* < last consumed descriptor */
- uint16_t vq_nentries; /* < vring desc numbers */
- uint16_t vq_free_cnt; /* < num of desc available */
- uint16_t vq_avail_idx; /* < sync until needed */
- uint16_t vq_free_thresh; /* < free threshold */
- uint16_t rsv2;
-
- void *vq_ring_virt_mem; /* < linear address of vring */
- uint32_t vq_ring_size;
+ uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+ uint16_t vq_avail_idx; /**< sync until needed */
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+
+ uint16_t cached_flags; /**< cached flags for descs */
+ uint8_t used_wrap_counter;
+ uint8_t rsv;
+ uint16_t vq_free_thresh; /**< free threshold */
+ uint16_t next_qidx;
+
+ void *notify_addr;
union {
struct zxdh_virtnet_rx rxq;
struct zxdh_virtnet_tx txq;
};
- /*
- * physical address of vring, or virtual address
- */
- rte_iova_t vq_ring_mem;
+ uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+ uint16_t event_flags_shadow;
+ uint32_t vq_ring_size;
- /*
+ /**
* Head of the free chain in the descriptor table. If
* there are no free descriptors, this will be set to
* VQ_RING_DESC_CHAIN_END.
- */
+ **/
uint16_t vq_desc_head_idx;
uint16_t vq_desc_tail_idx;
- uint16_t vq_queue_index; /* < PCI queue index */
- uint16_t offset; /* < relative offset to obtain addr in mbuf */
- uint16_t *notify_addr;
- struct rte_mbuf **sw_ring; /* < RX software ring. */
+ uint32_t rsv_8B;
+
+ void *vq_ring_virt_mem; /**< linear address of vring*/
+ /* physical address of vring, or virtual address for virtio_user. */
+ rte_iova_t vq_ring_mem;
+
struct zxdh_vq_desc_extra vq_descx[];
};
@@ -296,10 +305,9 @@ static inline void
zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
unsigned long align, uint32_t num)
{
- vr->num = num;
vr->desc = (struct zxdh_vring_packed_desc *)p;
vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
- vr->num * sizeof(struct zxdh_vring_packed_desc));
+ num * sizeof(struct zxdh_vring_packed_desc));
vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
sizeof(struct zxdh_vring_packed_desc_event)), align);
}
@@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
static inline void
zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
static inline void
zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
- if (weak_barriers)
- rte_atomic_thread_fence(rte_memory_order_seq_cst);
- else
- rte_mb();
-}
-
static inline
int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
{
@@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue
rte_io_rmb();
used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
+ return avail == used && used == vq->used_wrap_counter;
}
static inline int32_t
@@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
dp->flags = flags;
}
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
- uint16_t flags;
- uint16_t used, avail;
-
- flags = desc->flags;
- rte_io_rmb();
- used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
- avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
{
- ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+ /* Bit[0:15]: vq queue index
+ * Bit[16:30]: avail index
+ * Bit[31]: avail wrap counter
+ */
+ uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+ ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+ ((uint32_t)vq->vq_avail_idx << 16) |
+ vq->vq_queue_index;
+ rte_write32(notify_data, vq->notify_addr);
}
static inline int32_t
@@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
{
uint16_t flags = 0;
- zxdh_mb(1);
+ rte_mb();
flags = vq->vq_packed.ring.device->desc_event_flags;
return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..111cf54b0d 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,6 +114,22 @@
RTE_MBUF_F_TX_SEC_OFFLOAD | \
RTE_MBUF_F_TX_UDP_SEG)
+#if RTE_CACHE_LINE_SIZE == 128
+#define NEXT_CACHELINE_OFF_16B 8
+#define NEXT_CACHELINE_OFF_8B 16
+#elif RTE_CACHE_LINE_SIZE == 64
+#define NEXT_CACHELINE_OFF_16B 4
+#define NEXT_CACHELINE_OFF_8B 8
+#else
+#define NEXT_CACHELINE_OFF_16B (RTE_CACHE_LINE_SIZE / 16)
+#define NEXT_CACHELINE_OFF_8B (RTE_CACHE_LINE_SIZE / 8)
+#endif
+#define N_PER_LOOP NEXT_CACHELINE_OFF_8B
+#define N_PER_LOOP_MASK (N_PER_LOOP - 1)
+
+#define rxq_get_vq(q) ((q)->vq)
+#define txq_get_vq(q) ((q)->vq)
+
uint32_t zxdh_outer_l2_type[16] = {
0,
RTE_PTYPE_L2_ETHER,
@@ -201,43 +217,6 @@ uint32_t zxdh_inner_l4_type[16] = {
0,
};
-static void
-zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
- uint16_t used_idx = 0;
- uint16_t id = 0;
- uint16_t curr_id = 0;
- uint16_t free_cnt = 0;
- uint16_t size = vq->vq_nentries;
- struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
- struct zxdh_vq_desc_extra *dxp = NULL;
-
- used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_io_rmb inside
- * and wait for used desc in virtqueue.
- */
- while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
- id = desc[used_idx].id;
- do {
- curr_id = used_idx;
- dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- num -= dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
- }
- if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
- dxp->cookie = NULL;
- }
- } while (curr_id != id);
- }
- vq->vq_used_cons_idx = used_idx;
- vq->vq_free_cnt += free_cnt;
-}
-
static inline uint16_t
zxdh_get_mtu(struct zxdh_virtqueue *vq)
{
@@ -334,18 +313,17 @@ zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
}
static inline void
-zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_push(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie)
{
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t id = vq->vq_avail_idx;
struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
struct zxdh_net_hdr_dl *hdr = NULL;
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
- dxp->ndescs = 1;
dxp->cookie = cookie;
hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -355,69 +333,65 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
dp->id = id;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
vq->vq_free_cnt--;
zxdh_queue_store_flags_packed(dp, flags);
}
static inline void
-zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_append(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie,
uint16_t needed)
{
struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
struct zxdh_virtqueue *vq = txvq->vq;
- uint16_t id = vq->vq_avail_idx;
- struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ struct zxdh_vq_desc_extra *dep = &vq->vq_descx[0];
uint16_t head_idx = vq->vq_avail_idx;
uint16_t idx = head_idx;
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
struct zxdh_net_hdr_dl *hdr = NULL;
-
- uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+ uint16_t head_flags = 0;
- dxp->ndescs = needed;
- dxp->cookie = cookie;
- head_flags |= vq->vq_packed.cached_flags;
+ dxp->cookie = NULL;
+ /* setup first tx ring slot to point to header stored in reserved region. */
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
- head_flags |= ZXDH_VRING_DESC_F_NEXT;
+ start_dp[idx].id = idx;
+ head_flags |= vq->cached_flags | ZXDH_VRING_DESC_F_NEXT;
hdr = (void *)&txr[idx].tx_hdr;
- rte_prefetch1(hdr);
+ zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
+
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
- zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
-
do {
start_dp[idx].addr = rte_pktmbuf_iova(cookie);
start_dp[idx].len = cookie->data_len;
- start_dp[idx].id = id;
- if (likely(idx != head_idx)) {
- uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-
- flags |= vq->vq_packed.cached_flags;
- start_dp[idx].flags = flags;
- }
+ start_dp[idx].id = idx;
+ dep[idx].cookie = cookie;
+ uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ flags |= vq->cached_flags;
+ start_dp[idx].flags = flags;
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
} while ((cookie = cookie->next) != NULL);
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq->vq_avail_idx = idx;
-
zxdh_queue_store_flags_packed(head_dp, head_flags);
}
@@ -456,7 +430,7 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
}
static void
-zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+zxdh_xmit_fast_flush(struct zxdh_virtqueue *vq)
{
uint16_t id = 0;
uint16_t curr_id = 0;
@@ -472,20 +446,22 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
* for a used descriptor in the virtqueue.
*/
while (desc_is_used(&desc[used_idx], vq)) {
+ rte_prefetch0(&desc[used_idx + NEXT_CACHELINE_OFF_16B]);
id = desc[used_idx].id;
do {
+ desc[used_idx].id = used_idx;
curr_id = used_idx;
dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
- }
if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
+ rte_pktmbuf_free_seg(dxp->cookie);
dxp->cookie = NULL;
}
+ used_idx += 1;
+ free_cnt += 1;
+ if (unlikely(used_idx == size)) {
+ used_idx = 0;
+ vq->used_wrap_counter ^= 1;
+ }
} while (curr_id != id);
}
vq->vq_used_cons_idx = used_idx;
@@ -499,13 +475,12 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t nb_tx = 0;
- zxdh_xmit_flush(vq);
+ zxdh_xmit_fast_flush(vq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *txm = tx_pkts[nb_tx];
int32_t can_push = 0;
int32_t slots = 0;
- int32_t need = 0;
rte_prefetch0(txm);
/* optimize ring usage */
@@ -522,26 +497,15 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
* default => number of segments + 1
**/
slots = txm->nb_segs + !can_push;
- need = slots - vq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
- if (unlikely(need > 0)) {
- zxdh_xmit_cleanup_inorder_packed(vq, need);
- need = slots - vq->vq_free_cnt;
- if (unlikely(need > 0)) {
- PMD_TX_LOG(ERR,
- " No enough %d free tx descriptors to transmit."
- "freecnt %d",
- need,
- vq->vq_free_cnt);
- break;
- }
- }
+ if (unlikely(slots > vq->vq_free_cnt))
+ break;
/* Enqueue Packet buffers */
if (can_push)
- zxdh_enqueue_xmit_packed_fast(txvq, txm);
+ zxdh_xmit_enqueue_push(txvq, txm);
else
- zxdh_enqueue_xmit_packed(txvq, txm, slots);
+ zxdh_xmit_enqueue_append(txvq, txm, slots);
zxdh_update_packet_stats(&txvq->stats, txm);
}
txvq->stats.packets += nb_tx;
@@ -579,11 +543,6 @@ uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts,
}
#endif
- error = rte_net_intel_cksum_prepare(m);
- if (unlikely(error)) {
- rte_errno = -error;
- break;
- }
if (m->nb_segs > ZXDH_TX_MAX_SEGS) {
PMD_TX_LOG(ERR, "%d segs dropped", m->nb_segs);
txvq->stats.truncated_err += nb_pkts - nb_tx;
@@ -613,13 +572,15 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
uint16_t i, used_idx;
uint16_t id;
+ used_idx = vq->vq_used_cons_idx;
+ rte_prefetch0(&desc[used_idx]);
+
for (i = 0; i < num; i++) {
used_idx = vq->vq_used_cons_idx;
- /**
- * desc_is_used has a load-acquire or rte_io_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- if (!zxdh_desc_used(&desc[used_idx], vq))
+ if (!desc_is_used(&desc[used_idx], vq))
return i;
len[i] = desc[used_idx].len;
id = desc[used_idx].id;
@@ -637,7 +598,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
}
return i;
@@ -823,17 +784,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
}
}
-static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
+static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **cookie, uint16_t nb_pkts)
{
- int32_t error = 0;
- /*
- * Requeue the discarded mbuf. This should always be
- * successful since it was just dequeued.
- */
- error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
- if (unlikely(error)) {
- PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
- rte_pktmbuf_free(m);
+ struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+ struct zxdh_vq_desc_extra *dxp;
+ uint16_t flags = vq->cached_flags;
+ int32_t i;
+ uint16_t idx;
+
+ idx = vq->vq_avail_idx;
+ for (i = 0; i < nb_pkts; i++) {
+ dxp = &vq->vq_descx[idx];
+ dxp->cookie = (void *)cookie[i];
+ start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
+ start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
+ start_dp[idx].flags = flags;
+ idx++;
+ }
+ vq->vq_avail_idx += nb_pkts;
+ vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
+}
+
+static void refill_que_descs(struct zxdh_virtqueue *vq, struct rte_eth_dev *dev)
+{
+ /* free_cnt may include mrg descs */
+ struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
+ uint16_t free_cnt = RTE_MIN(ZXDH_MBUF_BURST_SZ, vq->vq_free_cnt);
+ struct zxdh_virtnet_rx *rxvq = &vq->rxq;
+ uint16_t unwrap_cnt, left_cnt;
+
+ if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
+ left_cnt = free_cnt;
+ unwrap_cnt = 0;
+ if ((vq->vq_avail_idx + free_cnt) >= vq->vq_nentries) {
+ unwrap_cnt = vq->vq_nentries - vq->vq_avail_idx;
+ left_cnt = free_cnt - unwrap_cnt;
+ refill_desc_unwrap(vq, new_pkts, unwrap_cnt);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ }
+ if (left_cnt)
+ refill_desc_unwrap(vq, new_pkts + unwrap_cnt, left_cnt);
+
+ rte_io_wmb();
+ } else {
+ dev->data->rx_mbuf_alloc_failed += free_cnt;
}
}
@@ -842,7 +838,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
{
struct zxdh_virtnet_rx *rxvq = rx_queue;
- struct zxdh_virtqueue *vq = rxvq->vq;
+ struct zxdh_virtqueue *vq = rxq_get_vq(rxvq);
struct zxdh_hw *hw = vq->hw;
struct rte_mbuf *rxm = NULL;
struct rte_mbuf *prev = NULL;
@@ -852,7 +848,6 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t len = 0;
uint32_t seg_num = 0;
uint32_t seg_res = 0;
- uint32_t error = 0;
uint16_t hdr_size = 0;
uint16_t nb_rx = 0;
uint16_t i;
@@ -873,7 +868,8 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_pkts[nb_rx] = rxm;
prev = rxm;
len = lens[i];
- header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+ header = (struct zxdh_net_hdr_ul *)((char *)
+ rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
seg_num = header->type_hdr.num_buffers;
@@ -886,7 +882,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.invalid_hdr_len_err++;
continue;
}
- rxm->data_off += hdr_size;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
rxm->nb_segs = seg_num;
rxm->ol_flags = 0;
rcvd_pkt_len = len - hdr_size;
@@ -902,18 +898,19 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
len = lens[i];
rxm = rcv_pkts[i];
rxm->data_len = len;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM;
rcvd_pkt_len += len;
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- seg_res -= 1;
+ seg_res--;
}
if (!seg_res) {
if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -942,14 +939,14 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- extra_idx += 1;
+ extra_idx++;
}
seg_res -= rcv_cnt;
if (!seg_res) {
if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -961,26 +958,285 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.packets += nb_rx;
refill:
- /* Allocate new mbuf for the used descriptor */
- if (likely(!zxdh_queue_full(vq))) {
- struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
- /* free_cnt may include mrg descs */
- uint16_t free_cnt = RTE_MIN(vq->vq_free_cnt, ZXDH_MBUF_BURST_SZ);
-
- if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
- error = zxdh_enqueue_recv_refill_packed(vq, new_pkts, free_cnt);
- if (unlikely(error)) {
- for (i = 0; i < free_cnt; i++)
- rte_pktmbuf_free(new_pkts[i]);
- }
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
+ }
+
+ return nb_rx;
+}
+
+static inline int pkt_padding(struct rte_mbuf *cookie, struct zxdh_hw *hw)
+{
+ uint16_t mtu_or_mss = 0;
+ uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
+ uint16_t l3_offset;
+ uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+ uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+ struct zxdh_pi_hdr *pi_hdr;
+ struct zxdh_pd_hdr_dl *pd_hdr;
+ struct zxdh_net_hdr_dl *net_hdr_dl = hw->net_hdr_dl;
+ uint8_t hdr_len = hw->dl_net_hdr_len;
+ uint16_t ol_flag = 0;
+ struct zxdh_net_hdr_dl *hdr = NULL;
+ hdr = (struct zxdh_net_hdr_dl *)rte_pktmbuf_prepend(cookie, hdr_len);
+ if (unlikely(hdr == NULL))
+ return -1;
+
+ rte_memcpy(hdr, net_hdr_dl, hdr_len);
+
+ if (hw->has_tx_offload) {
+ pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+ pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
+
+ pcode = ZXDH_PCODE_IP_PKT_TYPE;
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+ l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+ else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+ l3_ptype = ZXDH_PI_L3TYPE_IP;
+ else
+ pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
- if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
- zxdh_queue_notify(vq);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS) ?
+ cookie->tso_segsz : ZXDH_MIN_MSS;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ } else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+ mtu_or_mss = hw->eth_dev->data->mtu;
+ mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
} else {
- struct rte_eth_dev *dev = hw->eth_dev;
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+ }
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+ if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ } else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+ RTE_MBUF_F_TX_TCP_CKSUM) {
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ }
+ pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+ pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+ pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+ l3_offset = hdr_len + cookie->l2_len;
+ l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+ cookie->outer_l2_len + cookie->outer_l3_len : 0;
+ pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+ pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+ ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+ } else {
+ pd_hdr = &hdr->pd_hdr;
+ }
+
+ pd_hdr->dst_vfid = rte_be_to_cpu_16(cookie->port);
+
+ if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+ pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
+ ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+ pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+ }
+ }
+
+ pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
+ return 0;
+}
+
+/* Populate 4 descriptors with data from 4 mbufs */
+static inline void
+tx_bunch(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf **pkts)
+{
+ uint16_t flags = vq->cached_flags;
+ int i;
+ for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
+ /* write data to descriptor */
+ txdp->addr = rte_mbuf_data_iova(*pkts);
+ txdp->len = (*pkts)->data_len;
+ txdp->flags = flags;
+ }
+}
+
+/* Populate 1 descriptor with data from 1 mbuf */
+static inline void
+tx1(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf *pkts)
+{
+ uint16_t flags = vq->cached_flags;
+ txdp->addr = rte_mbuf_data_iova(pkts);
+ txdp->len = pkts->data_len;
+ txdp->flags = flags;
+}
+
+static void submit_to_backend_simple(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_hw *hw = vq->hw;
+ struct rte_mbuf *m = NULL;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vring_packed_desc *txdp = &vq->vq_packed.ring.desc[id];
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ int mainpart, leftover;
+ int i, j;
+
+ /*
+ * Process most of the packets in chunks of N pkts. Any
+ * leftover packets will get processed one at a time.
+ */
+ mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));
+ leftover = (nb_pkts & ((uint32_t)N_PER_LOOP_MASK));
+
+ for (i = 0; i < mainpart; i += N_PER_LOOP) {
+ rte_prefetch0(dxp + i);
+ rte_prefetch0(tx_pkts + i);
+ for (j = 0; j < N_PER_LOOP; ++j) {
+ m = *(tx_pkts + i + j);
+ pkt_padding(m, hw);
+ (dxp + i + j)->cookie = (void *)m;
+ }
+ /* write data to descriptor */
+ tx_bunch(vq, txdp + i, tx_pkts + i);
+ }
- dev->data->rx_mbuf_alloc_failed += free_cnt;
+ if (leftover > 0) {
+ rte_prefetch0(dxp + mainpart);
+ rte_prefetch0(tx_pkts + mainpart);
+
+ for (i = 0; i < leftover; ++i) {
+ m = *(tx_pkts + mainpart + i);
+ pkt_padding(m, hw);
+ (dxp + mainpart + i)->cookie = m;
+ tx1(vq, txdp + mainpart + i, *(tx_pkts + mainpart + i));
}
}
+}
+
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_tx *txvq = tx_queue;
+ struct zxdh_virtqueue *vq = txq_get_vq(txvq);
+ uint16_t nb_tx = 0, nb_tx_left;
+
+ zxdh_xmit_fast_flush(vq);
+
+ nb_pkts = (uint16_t)RTE_MIN(nb_pkts, vq->vq_free_cnt);
+ if (unlikely(nb_pkts == 0)) {
+ txvq->stats.idle++;
+ return 0;
+ }
+
+ nb_tx_left = nb_pkts;
+ if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
+ nb_tx = vq->vq_nentries - vq->vq_avail_idx;
+ nb_tx_left = nb_pkts - nb_tx;
+ submit_to_backend_simple(vq, tx_pkts, nb_tx);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+
+ vq->vq_free_cnt -= nb_tx;
+ tx_pkts += nb_tx;
+ }
+ if (nb_tx_left) {
+ submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
+ vq->vq_avail_idx += nb_tx_left;
+ vq->vq_free_cnt -= nb_tx_left;
+ }
+
+ zxdh_queue_notify(vq);
+ txvq->stats.packets += nb_pkts;
+
+ return nb_pkts;
+}
+
+static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
+ struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
+{
+ uint16_t hdr_size = 0;
+ struct zxdh_net_hdr_ul *header;
+
+ header = (struct zxdh_net_hdr_ul *)((char *)
+ rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
+ rxm->ol_flags = 0;
+ rxm->vlan_tci = 0;
+ rxm->vlan_tci_outer = 0;
+
+ hdr_size = header->type_hdr.pd_len << 1;
+ if (unlikely(header->type_hdr.num_buffers != 1)) {
+ PMD_RX_LOG(DEBUG, "hdr_size:%u nb_segs %d is invalid",
+ hdr_size, header->type_hdr.num_buffers);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.invalid_hdr_len_err++;
+ return -1;
+ }
+ zxdh_rx_update_mbuf(hw, rxm, header);
+
+ rxm->nb_segs = 1;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+ rxm->data_len = len - hdr_size;
+ rxm->port = hw->port_id;
+
+ if (rxm->data_len != rxm->pkt_len) {
+ PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d bufaddr %p.",
+ rxm->data_len, rxm->pkt_len, rxm->buf_addr);
+ rte_pktmbuf_dump(stdout, rxm, 40);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.truncated_err++;
+ rxvq->stats.errors++;
+ return -1;
+ }
+ return 0;
+}
+
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_rx *rxvq = rx_queue;
+ struct zxdh_virtqueue *vq = rxq_get_vq(rxvq);
+ struct zxdh_hw *hw = vq->hw;
+ struct rte_mbuf *rxm;
+ uint32_t lens[ZXDH_MBUF_BURST_SZ];
+ uint16_t len = 0;
+ uint16_t nb_rx = 0;
+ uint16_t num;
+ uint16_t i = 0;
+
+ num = nb_pkts;
+ if (unlikely(num > ZXDH_MBUF_BURST_SZ))
+ num = ZXDH_MBUF_BURST_SZ;
+ num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+ if (num == 0) {
+ rxvq->stats.idle++;
+ goto refill;
+ }
+
+ for (i = 0; i < num; i++) {
+ rxm = rcv_pkts[i];
+ len = lens[i];
+ if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
+ break;
+
+ nb_rx++;
+ }
+ rxvq->stats.packets += nb_rx;
+
+refill:
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
+ }
return nb_rx;
}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 424048607e..6fce04b803 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -36,44 +36,39 @@ struct zxdh_virtnet_stats {
uint64_t bytes;
uint64_t errors;
uint64_t idle;
- uint64_t full;
- uint64_t norefill;
- uint64_t multicast;
- uint64_t broadcast;
uint64_t truncated_err;
uint64_t offload_cfg_err;
uint64_t invalid_hdr_len_err;
uint64_t no_segs_err;
+ uint64_t no_free_tx_desc_err;
uint64_t size_bins[8];
};
struct __rte_cache_aligned zxdh_virtnet_rx {
struct zxdh_virtqueue *vq;
-
- uint64_t mbuf_initializer; /* value to init mbufs. */
struct rte_mempool *mpool; /* mempool for mbuf allocation */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate RX ring. */
-
- /* dummy mbuf, for wraparound when processing RX ring. */
- struct rte_mbuf fake_mbuf;
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
struct __rte_cache_aligned zxdh_virtnet_tx {
struct zxdh_virtqueue *vq;
-
- rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
+ const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate TX ring. */
- const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 115946 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v1] net/zxdh: optimize Rx/Tx path performance
2026-03-26 2:28 [PATCH v1] net/zxdh: optimize Rx/Tx path performance Junlong Wang
@ 2026-03-26 3:27 ` Stephen Hemminger
2026-04-06 4:26 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
2 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-03-26 3:27 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Thu, 26 Mar 2026 10:28:28 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> This patch optimizes the ZXDH PMD's receive and transmit path for better
> performance through several improvements:
>
> - Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
> zxdh_recv_single_pkts) for single-segment packet scenarios.
> - Remove RX software ring (sw_ring) to reduce memory allocation and
> copy.
> - Optimize descriptor management with prefetching and simplified
> cleanup.
> - Reorganize structure fields for better cache locality.
>
> These changes reduce CPU cycles and memory bandwidth consumption,
> resulting in improved packet processing throughput.
>
> Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
> ---
Two late for new features in 26.03, will look at it for 26.07
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v1] net/zxdh: optimize Rx/Tx path performance
2026-03-26 2:28 [PATCH v1] net/zxdh: optimize Rx/Tx path performance Junlong Wang
2026-03-26 3:27 ` Stephen Hemminger
@ 2026-04-06 4:26 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
2 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-04-06 4:26 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Thu, 26 Mar 2026 10:28:28 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> This patch optimizes the ZXDH PMD's receive and transmit path for better
> performance through several improvements:
>
> - Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
> zxdh_recv_single_pkts) for single-segment packet scenarios.
> - Remove RX software ring (sw_ring) to reduce memory allocation and
> copy.
> - Optimize descriptor management with prefetching and simplified
> cleanup.
> - Reorganize structure fields for better cache locality.
>
> These changes reduce CPU cycles and memory bandwidth consumption,
> resulting in improved packet processing throughput.
>
> Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
I saw some things when reviewing but AI found lots more
On Thu, 26 Mar 2026 10:28:28 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> This patch optimizes the ZXDH PMD's receive and transmit path for better
> performance through several improvements:
Several issues found in review.
Errors:
1. zxdh_rxtx.c, pkt_padding(): The return value is never checked by
the caller submit_to_backend_simple(). If rte_pktmbuf_prepend()
fails and pkt_padding() returns -1, the descriptor is still
written with the mbuf's iova and data_len, submitting a corrupt
packet to the device. Must check the return value and skip the
packet on failure.
2. zxdh_rxtx.c, zxdh_recv_single_pkts(): When zxdh_init_mbuf() fails
the loop does "break" instead of continuing or freeing the
remaining mbufs. The mbufs at rcv_pkts[i+1] through
rcv_pkts[num-1] were already dequeued from the virtqueue by
zxdh_dequeue_burst_rx_packed() but are never freed, leaking them.
3. zxdh_rxtx.c, refill_desc_unwrap(): Descriptors are written with a
plain store "start_dp[idx].flags = flags" instead of using
zxdh_queue_store_flags_packed(). The original
zxdh_enqueue_recv_refill_packed() uses the store-barrier version
to ensure addr/len are visible before the flags. Without the
barrier, the device could see the available flag before the
descriptor data is committed. The rte_io_wmb() at the end of
refill_que_descs() is after all flags are already written, so
it does not help.
4. zxdh_rxtx.c, zxdh_xmit_pkts_prepare(): The removal of
rte_net_intel_cksum_prepare() means packets requesting checksum
offload will not have their pseudo-headers prepared. If the HW
expects a pseudo-header, transmitted checksums will be incorrect.
5. zxdh_queue.h, zxdh_queue_enable_intr(): This function checks
"if (event_flags_shadow == DISABLE)" then sets it to DISABLE
again. It never actually enables interrupts. Pre-existing bug
but this patch touches the function and should fix it.
6. zxdh_ethdev.c, zxdh_init_queue(): The hdr_mz NULL check logic is
contradictory. Lines 158-162 check "if (hdr_mz == NULL)" and goto
fail_q_alloc, but line 169 then checks "if (hdr_mz)" before
assigning zxdh_net_hdr_mem. If the first check fires, the second
is unreachable. If it doesn't fire, the second is always true.
Pick one guard and use it consistently.
Warnings:
1. zxdh_rxtx.c, zxdh_xmit_pkts_simple(): stats.bytes is never
incremented. The packed path uses zxdh_update_packet_stats() but
the simple path only counts packets and idle. The good_bytes
xstat will always read zero on the simple TX path.
2. zxdh_rxtx.c, zxdh_recv_single_pkts(): Same issue -- stats.bytes
is never incremented, so good_bytes will always be zero on the
single-packet receive path.
3. zxdh_rxtx.c, zxdh_init_mbuf(): rte_pktmbuf_dump(stdout, rxm, 40)
should not be in production code. It writes to stdout
unconditionally on the error path. Use PMD_RX_LOG or remove it.
4. zxdh_ethdev.c, zxdh_dev_free_mbufs(): Changed from
rte_pktmbuf_free() to rte_pktmbuf_free_seg(). If any mbufs in
the TX queue are multi-segment (from the packed path which
handles multi-seg via zxdh_xmit_enqueue_append), only the first
segment will be freed, leaking the rest.
5. This patch is large (~800 lines, 8 files) and combines multiple
independent changes: structure reorganization, new fast-path
functions, sw_ring removal, descriptor management, removal of
rte_net_intel_cksum_prepare, and MTU validation. Splitting into
separate patches would make review and bisection easier.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v2 0/3] net/zxdh: optimize Rx/Tx path performance
2026-03-26 2:28 [PATCH v1] net/zxdh: optimize Rx/Tx path performance Junlong Wang
2026-03-26 3:27 ` Stephen Hemminger
2026-04-06 4:26 ` Stephen Hemminger
@ 2026-04-23 1:18 ` Junlong Wang
2026-04-23 1:18 ` [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
` (4 more replies)
2 siblings, 5 replies; 23+ messages in thread
From: Junlong Wang @ 2026-04-23 1:18 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 2126 bytes --]
v2:
- zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
- zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
the loop does "continue" and free mbufs;
- zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
in the refill_que_descs();
- zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
- zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;
- zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
- zxdh_rxtx.c, zxdh_init_mbuf():remove rte_pktmbuf_dump(stdout, rxm, 40);
- zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
- Splitting into separate patches, structure reorganization and sw_ring removal、
RX recv optimize、Tx xmit optimize、Tx;
v1:
This patch optimizes the ZXDH PMD's receive and transmit path for better
performance through several improvements:
- Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
zxdh_recv_single_pkts) for single-segment packet scenarios.
- Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
- Optimize descriptor management with prefetching and simplified
cleanup.
- Reorganize structure fields for better cache locality.
These changes reduce CPU cycles and memory bandwidth consumption,
resulting in improved packet processing throughput.
Junlong Wang (3):
net/zxdh: optimize queue structure to improve performance
net/zxdh: optimize Rx recv pkts performance
net/zxdh: optimize Tx xmit pkts performance
drivers/net/zxdh/zxdh_ethdev.c | 88 ++---
drivers/net/zxdh/zxdh_ethdev_ops.c | 24 +-
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 31 +-
drivers/net/zxdh/zxdh_queue.h | 125 +++----
drivers/net/zxdh/zxdh_rxtx.c | 520 +++++++++++++++++++++--------
drivers/net/zxdh/zxdh_rxtx.h | 27 +-
8 files changed, 527 insertions(+), 294 deletions(-)
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 3769 bytes --]
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
@ 2026-04-23 1:18 ` Junlong Wang
2026-04-23 18:57 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
` (3 subsequent siblings)
4 siblings, 1 reply; 23+ messages in thread
From: Junlong Wang @ 2026-04-23 1:18 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 18541 bytes --]
Reorganize structure fields for better cache locality.
Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 43 +++---------
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 31 ++------
drivers/net/zxdh/zxdh_queue.h | 125 ++++++++++++++-------------------
drivers/net/zxdh/zxdh_rxtx.c | 22 +++---
5 files changed, 81 insertions(+), 142 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index aeb01f4652..625ce9d74c 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
struct zxdh_virtnet_tx *txvq = NULL;
struct zxdh_virtqueue *vq = NULL;
size_t sz_hdr_mz = 0;
- void *sw_ring = NULL;
int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
int32_t numa_node = dev->device->numa_node;
uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
vq->vq_queue_index = vtpci_phy_qidx;
vq->vq_nentries = vq_size;
- vq->vq_packed.used_wrap_counter = 1;
- vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
- vq->vq_packed.event_flags_shadow = 0;
+ vq->used_wrap_counter = 1;
+ vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
if (queue_type == ZXDH_VTNET_RQ)
- vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+ vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
/*
* Reserve a memzone for vring elements
@@ -741,21 +739,16 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
if (queue_type == ZXDH_VTNET_RQ) {
- size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
-
- sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
- if (!sw_ring) {
- PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
- ret = -ENOMEM;
- goto fail_q_alloc;
- }
-
- vq->sw_ring = sw_ring;
rxvq = &vq->rxq;
rxvq->vq = vq;
rxvq->port_id = dev->data->port_id;
rxvq->mz = mz;
} else { /* queue_type == VTNET_TQ */
+ if (hdr_mz == NULL) {
+ ret = -ENOMEM;
+ PMD_DRV_LOG(ERR, "can not allocate TX soft ring: %d", ret);
+ goto fail_q_alloc;
+ }
txvq = &vq->txq;
txvq->vq = vq;
txvq->port_id = dev->data->port_id;
@@ -764,23 +757,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
txvq->zxdh_net_hdr_mem = hdr_mz->iova;
}
- vq->offset = offsetof(struct rte_mbuf, buf_iova);
if (queue_type == ZXDH_VTNET_TQ) {
struct zxdh_tx_region *txr = hdr_mz->addr;
- uint32_t i;
-
memset(txr, 0, vq_size * sizeof(*txr));
- for (i = 0; i < vq_size; i++) {
- /* first indirect descriptor is always the tx header */
- struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
-
- zxdh_vring_desc_init_indirect_packed(start_dp,
- RTE_DIM(txr[i].tx_packed_indir));
- start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
- offsetof(struct zxdh_tx_region, tx_hdr);
- /* length will be updated to actual pi hdr size when xmit pkt */
- start_dp->len = 0;
- }
}
if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,8 +767,8 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
return 0;
fail_q_alloc:
- rte_free(sw_ring);
- rte_memzone_free(hdr_mz);
+ if (hdr_mz)
+ rte_memzone_free(hdr_mz);
rte_memzone_free(mz);
rte_free(vq);
return ret;
@@ -1537,8 +1516,6 @@ static const struct eth_dev_ops zxdh_eth_dev_ops = {
.dev_infos_get = zxdh_dev_infos_get,
.rx_queue_setup = zxdh_dev_rx_queue_setup,
.tx_queue_setup = zxdh_dev_tx_queue_setup,
- .rx_queue_intr_enable = zxdh_dev_rx_queue_intr_enable,
- .rx_queue_intr_disable = zxdh_dev_rx_queue_intr_disable,
.rxq_info_get = zxdh_rxq_info_get,
.txq_info_get = zxdh_txq_info_get,
.link_update = zxdh_dev_link_update,
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq)
notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
- (vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
+ (vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
notify_data |= RTE_BIT32(31);
PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..1c60265d8f 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -382,32 +382,12 @@ zxdh_tx_queue_config(struct rte_eth_dev *dev, uint16_t queue_idx)
return 0;
}
-int32_t
-zxdh_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
-{
- struct zxdh_virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
- struct zxdh_virtqueue *vq = rxvq->vq;
-
- zxdh_queue_enable_intr(vq);
- return 0;
-}
-
-int32_t
-zxdh_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
-{
- struct zxdh_virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
- struct zxdh_virtqueue *vq = rxvq->vq;
-
- zxdh_queue_disable_intr(vq);
- return 0;
-}
-
int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
struct rte_mbuf **cookie, uint16_t num)
{
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vq_desc_extra *dxp;
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
int32_t i;
uint16_t idx;
@@ -415,7 +395,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
idx = vq->vq_avail_idx;
dxp = &vq->vq_descx[idx];
dxp->cookie = (void *)cookie[i];
- dxp->ndescs = 1;
/* rx pkt fill in data_off */
start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +402,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
zxdh_queue_store_flags_packed(&start_dp[idx], flags);
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
- flags = vq->vq_packed.cached_flags;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ flags = vq->cached_flags;
}
}
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +446,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
int32_t cnt = 0;
i = vq->vq_used_cons_idx;
- while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+ while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
dxp = &vq->vq_descx[descs[i].id];
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +456,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
i = vq->vq_used_cons_idx;
}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..49744df54e 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
#include <rte_common.h>
#include <rte_atomic.h>
+#include <rte_io.h>
#include "zxdh_ethdev.h"
#include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
};
struct zxdh_vring_packed {
- uint32_t num;
struct zxdh_vring_packed_desc *desc;
struct zxdh_vring_packed_desc_event *driver;
struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
uint16_t next;
};
+struct zxdh_vring {
+ uint32_t num;
+ struct zxdh_vring_desc *desc;
+ struct zxdh_vring_avail *avail;
+ struct zxdh_vring_used *used;
+};
+
struct zxdh_virtqueue {
+ union {
+ struct {
+ struct zxdh_vring ring; /**< vring keeping desc, used and avail */
+ } vq_split;
+ struct __rte_packed_begin {
+ struct zxdh_vring_packed ring;
+ } __rte_packed_end vq_packed;
+ };
struct zxdh_hw *hw; /* < zxdh_hw structure pointer. */
- struct {
- /* vring keeping descs and events */
- struct zxdh_vring_packed ring;
- uint8_t used_wrap_counter;
- uint8_t rsv;
- uint16_t cached_flags; /* < cached flags for descs */
- uint16_t event_flags_shadow;
- uint16_t rsv1;
- } vq_packed;
-
- uint16_t vq_used_cons_idx; /* < last consumed descriptor */
- uint16_t vq_nentries; /* < vring desc numbers */
- uint16_t vq_free_cnt; /* < num of desc available */
- uint16_t vq_avail_idx; /* < sync until needed */
- uint16_t vq_free_thresh; /* < free threshold */
- uint16_t rsv2;
-
- void *vq_ring_virt_mem; /* < linear address of vring */
- uint32_t vq_ring_size;
+ uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+ uint16_t vq_avail_idx; /**< sync until needed */
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+
+ uint16_t cached_flags; /**< cached flags for descs */
+ uint8_t used_wrap_counter;
+ uint8_t rsv;
+ uint16_t vq_free_thresh; /**< free threshold */
+ uint16_t next_qidx;
+
+ void *notify_addr;
union {
struct zxdh_virtnet_rx rxq;
struct zxdh_virtnet_tx txq;
};
- /*
- * physical address of vring, or virtual address
- */
- rte_iova_t vq_ring_mem;
+ uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+ uint16_t event_flags_shadow;
+ uint32_t vq_ring_size;
- /*
+ /**
* Head of the free chain in the descriptor table. If
* there are no free descriptors, this will be set to
* VQ_RING_DESC_CHAIN_END.
- */
+ **/
uint16_t vq_desc_head_idx;
uint16_t vq_desc_tail_idx;
- uint16_t vq_queue_index; /* < PCI queue index */
- uint16_t offset; /* < relative offset to obtain addr in mbuf */
- uint16_t *notify_addr;
- struct rte_mbuf **sw_ring; /* < RX software ring. */
+ uint32_t rsv_8B;
+
+ void *vq_ring_virt_mem; /**< linear address of vring*/
+ /* physical address of vring, or virtual address for virtio_user. */
+ rte_iova_t vq_ring_mem;
+
struct zxdh_vq_desc_extra vq_descx[];
};
@@ -296,10 +305,9 @@ static inline void
zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
unsigned long align, uint32_t num)
{
- vr->num = num;
vr->desc = (struct zxdh_vring_packed_desc *)p;
vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
- vr->num * sizeof(struct zxdh_vring_packed_desc));
+ num * sizeof(struct zxdh_vring_packed_desc));
vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
sizeof(struct zxdh_vring_packed_desc_event)), align);
}
@@ -331,30 +339,12 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
static inline void
zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
-static inline void
-zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
-{
- if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
- }
-}
-
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
- if (weak_barriers)
- rte_atomic_thread_fence(rte_memory_order_seq_cst);
- else
- rte_mb();
-}
-
static inline
int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
{
@@ -365,7 +355,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue
rte_io_rmb();
used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
+ return avail == used && used == vq->used_wrap_counter;
}
static inline int32_t
@@ -381,22 +371,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
dp->flags = flags;
}
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
- uint16_t flags;
- uint16_t used, avail;
-
- flags = desc->flags;
- rte_io_rmb();
- used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
- avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
{
- ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+ /* Bit[0:15]: vq queue index
+ * Bit[16:30]: avail index
+ * Bit[31]: avail wrap counter
+ */
+ uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+ ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+ ((uint32_t)vq->vq_avail_idx << 16) |
+ vq->vq_queue_index;
+ rte_write32(notify_data, vq->notify_addr);
}
static inline int32_t
@@ -404,7 +389,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
{
uint16_t flags = 0;
- zxdh_mb(1);
+ rte_mb();
flags = vq->vq_packed.ring.device->desc_event_flags;
return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
@@ -425,8 +410,6 @@ int32_t zxdh_dev_rx_queue_setup(struct rte_eth_dev *dev,
uint32_t socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp);
-int32_t zxdh_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
-int32_t zxdh_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
int32_t zxdh_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t logic_qidx);
void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq);
int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..93506a4b49 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
/* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
+ while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
id = desc[used_idx].id;
do {
curr_id = used_idx;
@@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
num -= dxp->ndescs;
if (used_idx >= size) {
used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t id = vq->vq_avail_idx;
struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
struct zxdh_net_hdr_dl *hdr = NULL;
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
@@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
dp->id = id;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
vq->vq_free_cnt--;
zxdh_queue_store_flags_packed(dp, flags);
@@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
dxp->ndescs = needed;
dxp->cookie = cookie;
- head_flags |= vq->vq_packed.cached_flags;
+ head_flags |= vq->cached_flags;
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
@@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
if (likely(idx != head_idx)) {
uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
- flags |= vq->vq_packed.cached_flags;
+ flags |= vq->cached_flags;
start_dp[idx].flags = flags;
}
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
} while ((cookie = cookie->next) != NULL);
@@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
free_cnt += dxp->ndescs;
if (used_idx >= size) {
used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- if (!zxdh_desc_used(&desc[used_idx], vq))
+ if (!desc_is_used(&desc[used_idx], vq))
return i;
len[i] = desc[used_idx].len;
id = desc[used_idx].id;
@@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
}
return i;
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 43059 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
2026-04-23 1:18 ` [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
@ 2026-04-23 1:18 ` Junlong Wang
2026-04-23 18:54 ` Stephen Hemminger
2026-04-23 23:39 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
` (2 subsequent siblings)
4 siblings, 2 replies; 23+ messages in thread
From: Junlong Wang @ 2026-04-23 1:18 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 16771 bytes --]
Add simple RX recv functions (zxdh_recv_single_pkts)
for single-segment packet recv.
And optimize Rx recv pkts packed ops.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 40 +++++--
drivers/net/zxdh/zxdh_ethdev_ops.c | 24 ++--
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_rxtx.c | 179 +++++++++++++++++++++++------
drivers/net/zxdh/zxdh_rxtx.h | 16 +--
5 files changed, 200 insertions(+), 63 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 625ce9d74c..7363d4c3d3 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -1269,18 +1269,44 @@ zxdh_dev_close(struct rte_eth_dev *dev)
return ret;
}
-static int32_t
-zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+/*
+ * Determine whether the current configuration requires support for scattered
+ * receive; return 1 if scattered receive is required and 0 if not.
+ */
+static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
{
- struct zxdh_hw *hw = eth_dev->data->dev_private;
+ uint16_t buf_size;
- if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
- PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
- return -1;
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
+ eth_dev->data->lro = 1;
+ return 1;
}
+
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ return 1;
+
+
+ PMD_DRV_LOG(DEBUG, "port %d min_rx_buf_size %d",
+ eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
+ buf_size = eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+ if (eth_dev->data->mtu + ZXDH_ETH_OVERHEAD > buf_size)
+ return 1;
+
+ return 0;
+}
+
+static int32_t
+zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+{
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+ eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
+
eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
- eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+
+ if (eth_dev->data->scattered_rx)
+ eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+ else
+ eth_dev->rx_pkt_burst = &zxdh_recv_single_pkts;
return 0;
}
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.c b/drivers/net/zxdh/zxdh_ethdev_ops.c
index 50247116d9..e2c2885add 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.c
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.c
@@ -95,10 +95,6 @@ static const struct rte_zxdh_xstats_name_off zxdh_rxq_stat_strings[] = {
{"good_bytes", offsetof(struct zxdh_virtnet_rx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_rx, stats.errors)},
{"idle", offsetof(struct zxdh_virtnet_rx, stats.idle)},
- {"full", offsetof(struct zxdh_virtnet_rx, stats.full)},
- {"norefill", offsetof(struct zxdh_virtnet_rx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_rx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_rx, stats.broadcast)},
{"truncated_err", offsetof(struct zxdh_virtnet_rx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_rx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_rx, stats.invalid_hdr_len_err)},
@@ -117,14 +113,12 @@ static const struct rte_zxdh_xstats_name_off zxdh_txq_stat_strings[] = {
{"good_packets", offsetof(struct zxdh_virtnet_tx, stats.packets)},
{"good_bytes", offsetof(struct zxdh_virtnet_tx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_tx, stats.errors)},
- {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
- {"norefill", offsetof(struct zxdh_virtnet_tx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_tx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_tx, stats.broadcast)},
+ {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
{"truncated_err", offsetof(struct zxdh_virtnet_tx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_tx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_tx, stats.invalid_hdr_len_err)},
{"no_segs_err", offsetof(struct zxdh_virtnet_tx, stats.no_segs_err)},
+ {"no_free_tx_desc_err", offsetof(struct zxdh_virtnet_tx, stats.no_free_tx_desc_err)},
{"undersize_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[0])},
{"size_64_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[1])},
{"size_65_127_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[2])},
@@ -2026,6 +2020,20 @@ int zxdh_dev_mtu_set(struct rte_eth_dev *dev, uint16_t new_mtu)
uint16_t vfid = zxdh_vport_to_vfid(hw->vport);
int ret;
+ /* If device is started, refuse mtu that requires the support of
+ * scattered packets when this feature has not been enabled before.
+ */
+ if (dev->data->dev_started &&
+ ((!dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) >
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))) ||
+ (dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) <=
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))))) {
+ PMD_DRV_LOG(ERR, "Stop port first.");
+ return -EINVAL;
+ }
+
if (hw->is_pf) {
ret = zxdh_get_panel_attr(dev, &panel);
if (ret != 0) {
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.h b/drivers/net/zxdh/zxdh_ethdev_ops.h
index 6dfe4be473..c49d79c232 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.h
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.h
@@ -40,6 +40,10 @@
#define ZXDH_SPM_SPEED_4X_100G RTE_BIT32(10)
#define ZXDH_SPM_SPEED_4X_200G RTE_BIT32(11)
+#define ZXDH_VLAN_TAG_LEN 4
+#define ZXDH_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
+#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
+
struct zxdh_np_stats_data {
uint64_t n_pkts_dropped;
uint64_t n_bytes_dropped;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 93506a4b49..cc266e8659 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,6 +114,8 @@
RTE_MBUF_F_TX_SEC_OFFLOAD | \
RTE_MBUF_F_TX_UDP_SEG)
+#define rxq_get_vq(q) ((q)->vq)
+
uint32_t zxdh_outer_l2_type[16] = {
0,
RTE_PTYPE_L2_ETHER,
@@ -613,10 +615,12 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
uint16_t i, used_idx;
uint16_t id;
+ used_idx = vq->vq_used_cons_idx;
+ rte_prefetch0(&desc[used_idx]);
+
for (i = 0; i < num; i++) {
used_idx = vq->vq_used_cons_idx;
- /**
- * desc_is_used has a load-acquire or rte_io_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
if (!desc_is_used(&desc[used_idx], vq))
@@ -823,17 +827,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
}
}
-static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
+static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **cookie, uint16_t nb_pkts)
{
- int32_t error = 0;
- /*
- * Requeue the discarded mbuf. This should always be
- * successful since it was just dequeued.
- */
- error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
- if (unlikely(error)) {
- PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
- rte_pktmbuf_free(m);
+ struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+ struct zxdh_vq_desc_extra *dxp;
+ uint16_t flags = vq->cached_flags;
+ int32_t i;
+ uint16_t idx;
+
+ idx = vq->vq_avail_idx;
+ for (i = 0; i < nb_pkts; i++) {
+ dxp = &vq->vq_descx[idx];
+ dxp->cookie = (void *)cookie[i];
+ start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
+ start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
+ zxdh_queue_store_flags_packed(&start_dp[idx], flags);
+ idx++;
+ }
+ vq->vq_avail_idx += nb_pkts;
+ vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
+}
+
+static void refill_que_descs(struct zxdh_virtqueue *vq, struct rte_eth_dev *dev)
+{
+ /* free_cnt may include mrg descs */
+ struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
+ uint16_t free_cnt = RTE_MIN(ZXDH_MBUF_BURST_SZ, vq->vq_free_cnt);
+ struct zxdh_virtnet_rx *rxvq = &vq->rxq;
+ uint16_t unwrap_cnt, left_cnt;
+
+ if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
+ left_cnt = free_cnt;
+ unwrap_cnt = 0;
+ if ((vq->vq_avail_idx + free_cnt) >= vq->vq_nentries) {
+ unwrap_cnt = vq->vq_nentries - vq->vq_avail_idx;
+ left_cnt = free_cnt - unwrap_cnt;
+ refill_desc_unwrap(vq, new_pkts, unwrap_cnt);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ }
+ if (left_cnt)
+ refill_desc_unwrap(vq, new_pkts + unwrap_cnt, left_cnt);
+
+ rte_io_wmb();
+ } else {
+ dev->data->rx_mbuf_alloc_failed += free_cnt;
}
}
@@ -842,7 +881,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
{
struct zxdh_virtnet_rx *rxvq = rx_queue;
- struct zxdh_virtqueue *vq = rxvq->vq;
+ struct zxdh_virtqueue *vq = rxq_get_vq(rxvq);
struct zxdh_hw *hw = vq->hw;
struct rte_mbuf *rxm = NULL;
struct rte_mbuf *prev = NULL;
@@ -852,7 +891,6 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t len = 0;
uint32_t seg_num = 0;
uint32_t seg_res = 0;
- uint32_t error = 0;
uint16_t hdr_size = 0;
uint16_t nb_rx = 0;
uint16_t i;
@@ -873,7 +911,8 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_pkts[nb_rx] = rxm;
prev = rxm;
len = lens[i];
- header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+ header = (struct zxdh_net_hdr_ul *)((char *)
+ rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
seg_num = header->type_hdr.num_buffers;
@@ -886,7 +925,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.invalid_hdr_len_err++;
continue;
}
- rxm->data_off += hdr_size;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
rxm->nb_segs = seg_num;
rxm->ol_flags = 0;
rcvd_pkt_len = len - hdr_size;
@@ -902,18 +941,19 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
len = lens[i];
rxm = rcv_pkts[i];
rxm->data_len = len;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM;
rcvd_pkt_len += len;
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- seg_res -= 1;
+ seg_res--;
}
if (!seg_res) {
if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -942,14 +982,14 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- extra_idx += 1;
+ extra_idx++;
}
seg_res -= rcv_cnt;
if (!seg_res) {
if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -961,26 +1001,91 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.packets += nb_rx;
refill:
- /* Allocate new mbuf for the used descriptor */
- if (likely(!zxdh_queue_full(vq))) {
- struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
- /* free_cnt may include mrg descs */
- uint16_t free_cnt = RTE_MIN(vq->vq_free_cnt, ZXDH_MBUF_BURST_SZ);
-
- if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
- error = zxdh_enqueue_recv_refill_packed(vq, new_pkts, free_cnt);
- if (unlikely(error)) {
- for (i = 0; i < free_cnt; i++)
- rte_pktmbuf_free(new_pkts[i]);
- }
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
+ }
- if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
- zxdh_queue_notify(vq);
- } else {
- struct rte_eth_dev *dev = hw->eth_dev;
+ return nb_rx;
+}
+
+static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
+ struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
+{
+ uint16_t hdr_size = 0;
+ struct zxdh_net_hdr_ul *header;
+
+ header = (struct zxdh_net_hdr_ul *)((char *)
+ rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
+ rxm->ol_flags = 0;
+ rxm->vlan_tci = 0;
+ rxm->vlan_tci_outer = 0;
+
+ hdr_size = header->type_hdr.pd_len << 1;
+ if (unlikely(header->type_hdr.num_buffers != 1)) {
+ PMD_RX_LOG(DEBUG, "hdr_size:%u nb_segs %d is invalid",
+ hdr_size, header->type_hdr.num_buffers);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.invalid_hdr_len_err++;
+ return -1;
+ }
+ zxdh_rx_update_mbuf(hw, rxm, header);
+
+ rxm->nb_segs = 1;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+ rxm->data_len = len - hdr_size;
+ rxm->port = hw->port_id;
+
+ if (rxm->data_len != rxm->pkt_len) {
+ PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d bufaddr %p.",
+ rxm->data_len, rxm->pkt_len, rxm->buf_addr);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.truncated_err++;
+ rxvq->stats.errors++;
+ return -1;
+ }
+ return 0;
+}
+
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_rx *rxvq = rx_queue;
+ struct zxdh_virtqueue *vq = rxq_get_vq(rxvq);
+ struct zxdh_hw *hw = vq->hw;
+ struct rte_mbuf *rxm;
+ uint32_t lens[ZXDH_MBUF_BURST_SZ];
+ uint16_t len = 0;
+ uint16_t nb_rx = 0;
+ uint16_t num;
+ uint16_t i = 0;
- dev->data->rx_mbuf_alloc_failed += free_cnt;
+ num = nb_pkts;
+ if (unlikely(num > ZXDH_MBUF_BURST_SZ))
+ num = ZXDH_MBUF_BURST_SZ;
+ num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+ if (num == 0) {
+ rxvq->stats.idle++;
+ goto refill;
+ }
+
+ for (i = 0; i < num; i++) {
+ rxm = rcv_pkts[i];
+ len = lens[i];
+ if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0)) {
+ rte_pktmbuf_free(rxm);
+ continue;
}
+ zxdh_update_packet_stats(&rxvq->stats, rxm);
+ nb_rx++;
+ }
+ rxvq->stats.packets += nb_rx;
+
+refill:
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
}
return nb_rx;
}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 424048607e..dba9567414 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -36,29 +36,22 @@ struct zxdh_virtnet_stats {
uint64_t bytes;
uint64_t errors;
uint64_t idle;
- uint64_t full;
- uint64_t norefill;
- uint64_t multicast;
- uint64_t broadcast;
uint64_t truncated_err;
uint64_t offload_cfg_err;
uint64_t invalid_hdr_len_err;
uint64_t no_segs_err;
+ uint64_t no_free_tx_desc_err;
uint64_t size_bins[8];
};
struct __rte_cache_aligned zxdh_virtnet_rx {
struct zxdh_virtqueue *vq;
-
- uint64_t mbuf_initializer; /* value to init mbufs. */
struct rte_mempool *mpool; /* mempool for mbuf allocation */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate RX ring. */
-
- /* dummy mbuf, for wraparound when processing RX ring. */
- struct rte_mbuf fake_mbuf;
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
struct __rte_cache_aligned zxdh_virtnet_tx {
@@ -75,5 +68,6 @@ struct __rte_cache_aligned zxdh_virtnet_tx {
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 40588 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 3/3] net/zxdh: optimize Tx xmit pkts performance
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
2026-04-23 1:18 ` [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
2026-04-23 1:18 ` [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
@ 2026-04-23 1:18 ` Junlong Wang
2026-04-23 19:23 ` [PATCH v2 0/3] net/zxdh: optimize Rx/Tx path performance Stephen Hemminger
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
4 siblings, 0 replies; 23+ messages in thread
From: Junlong Wang @ 2026-04-23 1:18 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 17705 bytes --]
Add simple Tx xmit functions (zxdh_xmit_pkts_simple)
for single-segment packet xmit.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 7 +-
drivers/net/zxdh/zxdh_rxtx.c | 329 ++++++++++++++++++++++++---------
drivers/net/zxdh/zxdh_rxtx.h | 11 +-
3 files changed, 252 insertions(+), 95 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 7363d4c3d3..4d98680784 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -1298,10 +1298,15 @@ static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
static int32_t
zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
{
+ uint64_t tx_offloads = eth_dev->data->dev_conf.txmode.offloads;
+
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
- eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+ if (!(tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_simple;
+ else
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
if (eth_dev->data->scattered_rx)
eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index cc266e8659..8e13847fc8 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,7 +114,21 @@
RTE_MBUF_F_TX_SEC_OFFLOAD | \
RTE_MBUF_F_TX_UDP_SEG)
+#if RTE_CACHE_LINE_SIZE == 128
+#define NEXT_CACHELINE_OFF_16B 8
+#define NEXT_CACHELINE_OFF_8B 16
+#elif RTE_CACHE_LINE_SIZE == 64
+#define NEXT_CACHELINE_OFF_16B 4
+#define NEXT_CACHELINE_OFF_8B 8
+#else
+#define NEXT_CACHELINE_OFF_16B (RTE_CACHE_LINE_SIZE / 16)
+#define NEXT_CACHELINE_OFF_8B (RTE_CACHE_LINE_SIZE / 8)
+#endif
+#define N_PER_LOOP NEXT_CACHELINE_OFF_8B
+#define N_PER_LOOP_MASK (N_PER_LOOP - 1)
+
#define rxq_get_vq(q) ((q)->vq)
+#define txq_get_vq(q) ((q)->vq)
uint32_t zxdh_outer_l2_type[16] = {
0,
@@ -203,43 +217,6 @@ uint32_t zxdh_inner_l4_type[16] = {
0,
};
-static void
-zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
- uint16_t used_idx = 0;
- uint16_t id = 0;
- uint16_t curr_id = 0;
- uint16_t free_cnt = 0;
- uint16_t size = vq->vq_nentries;
- struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
- struct zxdh_vq_desc_extra *dxp = NULL;
-
- used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_io_rmb inside
- * and wait for used desc in virtqueue.
- */
- while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
- id = desc[used_idx].id;
- do {
- curr_id = used_idx;
- dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- num -= dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->used_wrap_counter ^= 1;
- }
- if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
- dxp->cookie = NULL;
- }
- } while (curr_id != id);
- }
- vq->vq_used_cons_idx = used_idx;
- vq->vq_free_cnt += free_cnt;
-}
-
static inline uint16_t
zxdh_get_mtu(struct zxdh_virtqueue *vq)
{
@@ -336,7 +313,7 @@ zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
}
static inline void
-zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_push(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie)
{
struct zxdh_virtqueue *vq = txvq->vq;
@@ -347,7 +324,6 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
- dxp->ndescs = 1;
dxp->cookie = cookie;
hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -364,52 +340,49 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
}
static inline void
-zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_append(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie,
uint16_t needed)
{
struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
struct zxdh_virtqueue *vq = txvq->vq;
- uint16_t id = vq->vq_avail_idx;
- struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ struct zxdh_vq_desc_extra *dep = &vq->vq_descx[0];
uint16_t head_idx = vq->vq_avail_idx;
uint16_t idx = head_idx;
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
struct zxdh_net_hdr_dl *hdr = NULL;
-
- uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+ uint16_t head_flags = 0;
- dxp->ndescs = needed;
- dxp->cookie = cookie;
- head_flags |= vq->cached_flags;
+ dxp->cookie = NULL;
+ /* setup first tx ring slot to point to header stored in reserved region. */
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
- head_flags |= ZXDH_VRING_DESC_F_NEXT;
+ start_dp[idx].id = idx;
+ head_flags |= vq->cached_flags | ZXDH_VRING_DESC_F_NEXT;
hdr = (void *)&txr[idx].tx_hdr;
- rte_prefetch1(hdr);
+ zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
+
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
- zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
-
do {
start_dp[idx].addr = rte_pktmbuf_iova(cookie);
start_dp[idx].len = cookie->data_len;
- start_dp[idx].id = id;
- if (likely(idx != head_idx)) {
- uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-
- flags |= vq->cached_flags;
- start_dp[idx].flags = flags;
- }
+ start_dp[idx].id = idx;
+ dep[idx].cookie = cookie;
+ uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ flags |= vq->cached_flags;
+ start_dp[idx].flags = flags;
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
@@ -419,7 +392,6 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq->vq_avail_idx = idx;
-
zxdh_queue_store_flags_packed(head_dp, head_flags);
}
@@ -458,7 +430,7 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
}
static void
-zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+zxdh_xmit_fast_flush(struct zxdh_virtqueue *vq)
{
uint16_t id = 0;
uint16_t curr_id = 0;
@@ -474,20 +446,22 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
* for a used descriptor in the virtqueue.
*/
while (desc_is_used(&desc[used_idx], vq)) {
+ rte_prefetch0(&desc[used_idx + NEXT_CACHELINE_OFF_16B]);
id = desc[used_idx].id;
do {
+ desc[used_idx].id = used_idx;
curr_id = used_idx;
dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->used_wrap_counter ^= 1;
- }
if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
+ rte_pktmbuf_free_seg(dxp->cookie);
dxp->cookie = NULL;
}
+ used_idx += 1;
+ free_cnt += 1;
+ if (unlikely(used_idx == size)) {
+ used_idx = 0;
+ vq->used_wrap_counter ^= 1;
+ }
} while (curr_id != id);
}
vq->vq_used_cons_idx = used_idx;
@@ -501,13 +475,12 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t nb_tx = 0;
- zxdh_xmit_flush(vq);
+ zxdh_xmit_fast_flush(vq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *txm = tx_pkts[nb_tx];
int32_t can_push = 0;
int32_t slots = 0;
- int32_t need = 0;
rte_prefetch0(txm);
/* optimize ring usage */
@@ -524,26 +497,15 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
* default => number of segments + 1
**/
slots = txm->nb_segs + !can_push;
- need = slots - vq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
- if (unlikely(need > 0)) {
- zxdh_xmit_cleanup_inorder_packed(vq, need);
- need = slots - vq->vq_free_cnt;
- if (unlikely(need > 0)) {
- PMD_TX_LOG(ERR,
- " No enough %d free tx descriptors to transmit."
- "freecnt %d",
- need,
- vq->vq_free_cnt);
- break;
- }
- }
+ if (unlikely(slots > vq->vq_free_cnt))
+ break;
/* Enqueue Packet buffers */
if (can_push)
- zxdh_enqueue_xmit_packed_fast(txvq, txm);
+ zxdh_xmit_enqueue_push(txvq, txm);
else
- zxdh_enqueue_xmit_packed(txvq, txm, slots);
+ zxdh_xmit_enqueue_append(txvq, txm, slots);
zxdh_update_packet_stats(&txvq->stats, txm);
}
txvq->stats.packets += nb_tx;
@@ -581,11 +543,6 @@ uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts,
}
#endif
- error = rte_net_intel_cksum_prepare(m);
- if (unlikely(error)) {
- rte_errno = -error;
- break;
- }
if (m->nb_segs > ZXDH_TX_MAX_SEGS) {
PMD_TX_LOG(ERR, "%d segs dropped", m->nb_segs);
txvq->stats.truncated_err += nb_pkts - nb_tx;
@@ -1089,3 +1046,197 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
}
return nb_rx;
}
+
+static inline void pkt_padding(struct rte_mbuf *cookie, struct zxdh_hw *hw)
+{
+ uint16_t mtu_or_mss = 0;
+ uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
+ uint16_t l3_offset;
+ uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+ uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+ struct zxdh_pi_hdr *pi_hdr;
+ struct zxdh_pd_hdr_dl *pd_hdr;
+ struct zxdh_net_hdr_dl *net_hdr_dl = hw->net_hdr_dl;
+ uint8_t hdr_len = hw->dl_net_hdr_len;
+ uint16_t ol_flag = 0;
+ struct zxdh_net_hdr_dl *hdr = NULL;
+
+ hdr = (struct zxdh_net_hdr_dl *)rte_pktmbuf_prepend(cookie, hdr_len);
+ rte_memcpy(hdr, net_hdr_dl, hdr_len);
+
+ if (hw->has_tx_offload) {
+ pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+ pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
+
+ pcode = ZXDH_PCODE_IP_PKT_TYPE;
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+ l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+ else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+ l3_ptype = ZXDH_PI_L3TYPE_IP;
+ else
+ pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS) ?
+ cookie->tso_segsz : ZXDH_MIN_MSS;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ } else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+ mtu_or_mss = hw->eth_dev->data->mtu;
+ mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ } else {
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+ }
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+ if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ } else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+ RTE_MBUF_F_TX_TCP_CKSUM) {
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ }
+ pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+ pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+ pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+ l3_offset = hdr_len + cookie->l2_len;
+ l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+ cookie->outer_l2_len + cookie->outer_l3_len : 0;
+ pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+ pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+ ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+ } else {
+ pd_hdr = &hdr->pd_hdr;
+ }
+
+ pd_hdr->dst_vfid = rte_be_to_cpu_16(cookie->port);
+
+ if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+ pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
+ ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+ pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+ }
+ }
+
+ pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
+}
+
+/* Populate 4 descriptors with data from 4 mbufs */
+static inline void
+tx_bunch(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf **pkts)
+{
+ uint16_t flags = vq->cached_flags;
+ int i;
+ for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
+ /* write data to descriptor */
+ txdp->addr = rte_mbuf_data_iova(*pkts);
+ txdp->len = (*pkts)->data_len;
+ txdp->flags = flags;
+ }
+}
+
+/* Populate 1 descriptor with data from 1 mbuf */
+static inline void
+tx1(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf *pkts)
+{
+ uint16_t flags = vq->cached_flags;
+ txdp->addr = rte_mbuf_data_iova(pkts);
+ txdp->len = pkts->data_len;
+ txdp->flags = flags;
+}
+
+static void submit_to_backend_simple(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_hw *hw = vq->hw;
+ struct rte_mbuf *m = NULL;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vring_packed_desc *txdp = &vq->vq_packed.ring.desc[id];
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ int mainpart, leftover;
+ int i, j;
+
+ /*
+ * Process most of the packets in chunks of N pkts. Any
+ * leftover packets will get processed one at a time.
+ */
+ mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));
+ leftover = (nb_pkts & ((uint32_t)N_PER_LOOP_MASK));
+
+ for (i = 0; i < mainpart; i += N_PER_LOOP) {
+ rte_prefetch0(dxp + i);
+ rte_prefetch0(tx_pkts + i);
+ for (j = 0; j < N_PER_LOOP; ++j) {
+ m = *(tx_pkts + i + j);
+ pkt_padding(m, hw);
+ (dxp + i + j)->cookie = (void *)m;
+ }
+ /* write data to descriptor */
+ tx_bunch(vq, txdp + i, tx_pkts + i);
+ }
+
+ if (leftover > 0) {
+ rte_prefetch0(dxp + mainpart);
+ rte_prefetch0(tx_pkts + mainpart);
+
+ for (i = 0; i < leftover; ++i) {
+ m = *(tx_pkts + mainpart + i);
+ pkt_padding(m, hw);
+ (dxp + mainpart + i)->cookie = m;
+ tx1(vq, txdp + mainpart + i, *(tx_pkts + mainpart + i));
+ }
+ }
+}
+
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_tx *txvq = tx_queue;
+ struct zxdh_virtqueue *vq = txq_get_vq(txvq);
+ uint16_t nb_tx = 0, nb_tx_left;
+
+ zxdh_xmit_fast_flush(vq);
+
+ nb_pkts = (uint16_t)RTE_MIN(nb_pkts, vq->vq_free_cnt);
+ if (unlikely(nb_pkts == 0)) {
+ txvq->stats.idle++;
+ return 0;
+ }
+
+ nb_tx_left = nb_pkts;
+ if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
+ nb_tx = vq->vq_nentries - vq->vq_avail_idx;
+ nb_tx_left = nb_pkts - nb_tx;
+ submit_to_backend_simple(vq, tx_pkts, nb_tx);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+
+ vq->vq_free_cnt -= nb_tx;
+ tx_pkts += nb_tx;
+ }
+ if (nb_tx_left) {
+ submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
+ vq->vq_avail_idx += nb_tx_left;
+ vq->vq_free_cnt -= nb_tx_left;
+ }
+
+ zxdh_queue_notify(vq);
+ txvq->stats.packets += nb_pkts;
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++)
+ zxdh_update_packet_stats(&txvq->stats, tx_pkts[nb_tx]);
+
+ return nb_pkts;
+}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index dba9567414..783fb456de 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -56,18 +56,19 @@ struct __rte_cache_aligned zxdh_virtnet_rx {
struct __rte_cache_aligned zxdh_virtnet_tx {
struct zxdh_virtqueue *vq;
-
- rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
+ const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate TX ring. */
- const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 44675 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance
2026-04-23 1:18 ` [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
@ 2026-04-23 18:54 ` Stephen Hemminger
2026-04-23 23:39 ` Stephen Hemminger
1 sibling, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-04-23 18:54 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Thu, 23 Apr 2026 09:18:17 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> +
> + PMD_DRV_LOG(DEBUG, "port %d min_rx_buf_size %d",
> + eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
Don't use %d when printing unsigned values.
+ /* If device is started, refuse mtu that requires the support of
+ * scattered packets when this feature has not been enabled before.
+ */
+ if (dev->data->dev_started &&
+ ((!dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) >
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))) ||
+ (dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) <=
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))))) {
+ PMD_DRV_LOG(ERR, "Stop port first.");
+ return -EINVAL;
+ }
You can use lines up to 100 characters, and break up this into multiple
if statements to avoid such a complex expression. Looks like multiple
parts are the same?
>
> +#define ZXDH_VLAN_TAG_LEN 4
Why not use RTE_VLAN_HLEN?
> +#define ZXDH_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
> +#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
> +static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
> + struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
> +{
> + uint16_t hdr_size = 0;
> + struct zxdh_net_hdr_ul *header;
> +
> + header = (struct zxdh_net_hdr_ul *)((char *)
> + rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
Please use rte_pktmbuf_mtod instead for this.
> +uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
> +{
> + struct zxdh_virtnet_rx *rxvq = rx_queue;
> + struct zxdh_virtqueue *vq = rxq_get_vq(rxvq);
> + struct zxdh_hw *hw = vq->hw;
> + struct rte_mbuf *rxm;
> + uint32_t lens[ZXDH_MBUF_BURST_SZ];
> + uint16_t len = 0;
> + uint16_t nb_rx = 0;
> + uint16_t num;
> + uint16_t i = 0;
Useless initialization of i.
>
> - dev->data->rx_mbuf_alloc_failed += free_cnt;
> + num = nb_pkts;
> + if (unlikely(num > ZXDH_MBUF_BURST_SZ))
> + num = ZXDH_MBUF_BURST_SZ;
> + num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
> + if (num == 0) {
> + rxvq->stats.idle++;
> + goto refill;
Since this is normal path on idle network, the counter will grow
rapidly. Do you need it?
> + }
> +
> + for (i = 0; i < num; i++) {
> + rxm = rcv_pkts[i];
> + len = lens[i];
> + if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0)) {
> + rte_pktmbuf_free(rxm);
> + continue;
> }
Better practice to make rxm and len variables scoped to the loop.
AI review noticed that there is now a double free in the error path.
Both error paths inside zxdh_init_mbuf() already call rte_pktmbuf_free(rxm) before returning -1. The caller's rte_pktmbuf_free(rxm) then frees it a second time. Remove the caller's free, or stop freeing inside zxdh_init_mbuf().
(zxdh_set_rxtx_funcs) — dropped mergeable-rxbuf feature check: The old code returned -1 with an error log when the peer did not negotiate ZXDH_NET_F_MRG_RXBUF. The new code silently drops that check. If the negotiated feature set doesn't include MRG_RXBUF, the multi-segment rx path may now be selected against a peer that doesn't support it.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance
2026-04-23 1:18 ` [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
@ 2026-04-23 18:57 ` Stephen Hemminger
0 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-04-23 18:57 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Thu, 23 Apr 2026 09:18:16 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> @@ -1537,8 +1516,6 @@ static const struct eth_dev_ops zxdh_eth_dev_ops = {
> .dev_infos_get = zxdh_dev_infos_get,
> .rx_queue_setup = zxdh_dev_rx_queue_setup,
> .tx_queue_setup = zxdh_dev_tx_queue_setup,
> - .rx_queue_intr_enable = zxdh_dev_rx_queue_intr_enable,
> - .rx_queue_intr_disable = zxdh_dev_rx_queue_intr_disable,
> .rxq_info_get = zxdh_rxq_info_get,
> .txq_info_get = zxdh_txq_info_get,
> .link_update = zxdh_dev_link_update,
Dropping rx_queue interrupt support.
Not good, but if you are doing that need a release note and change to features matrix.
In zxdh_init_queue:
> + if (hdr_mz)
> + rte_memzone_free(hdr_mz);
Null check here is redundant since rte_memzone_free(NULL)
does the right thing if needed.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2 0/3] net/zxdh: optimize Rx/Tx path performance
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
` (2 preceding siblings ...)
2026-04-23 1:18 ` [PATCH v2 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
@ 2026-04-23 19:23 ` Stephen Hemminger
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
4 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-04-23 19:23 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Thu, 23 Apr 2026 09:18:15 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> v2:
> - zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
> - zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
> the loop does "continue" and free mbufs;
> - zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
> in the refill_que_descs();
> - zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
> - zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;
>
> - zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
> - zxdh_rxtx.c, zxdh_init_mbuf():remove rte_pktmbuf_dump(stdout, rxm, 40);
> - zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
> - Splitting into separate patches, structure reorganization and sw_ring removal、
> RX recv optimize、Tx xmit optimize、Tx;
>
> v1:
> This patch optimizes the ZXDH PMD's receive and transmit path for better
> performance through several improvements:
>
> - Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
> zxdh_recv_single_pkts) for single-segment packet scenarios.
> - Remove RX software ring (sw_ring) to reduce memory allocation and
> copy.
> - Optimize descriptor management with prefetching and simplified
> cleanup.
> - Reorganize structure fields for better cache locality.
>
> These changes reduce CPU cycles and memory bandwidth consumption,
> resulting in improved packet processing throughput.
>
> Junlong Wang (3):
> net/zxdh: optimize queue structure to improve performance
> net/zxdh: optimize Rx recv pkts performance
> net/zxdh: optimize Tx xmit pkts performance
>
> drivers/net/zxdh/zxdh_ethdev.c | 88 ++---
> drivers/net/zxdh/zxdh_ethdev_ops.c | 24 +-
> drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
> drivers/net/zxdh/zxdh_pci.c | 2 +-
> drivers/net/zxdh/zxdh_queue.c | 31 +-
> drivers/net/zxdh/zxdh_queue.h | 125 +++----
> drivers/net/zxdh/zxdh_rxtx.c | 520 +++++++++++++++++++++--------
> drivers/net/zxdh/zxdh_rxtx.h | 27 +-
> 8 files changed, 527 insertions(+), 294 deletions(-)
>
Here is the full AI review of this patchset
```
Review of v2 series: net/zxdh queue/Rx/Tx optimization
Overall: needs a v3. Findings below, organized by patch.
Patch 1/3 (optimize queue structure)
------------------------------------
Warning: zxdh_queue_notify() in zxdh_queue.h now hardcodes the bit-31
avail-wrap into notify_data unconditionally, removing the
zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) gate that the previous
out-of-line version in zxdh_pci.c had. The series also adds a
vq_split member to the virtqueue union, which suggests split-ring
support is planned. Once that exists, this helper will corrupt notify
data for split rings. Either reinstate the feature gate or keep the
dispatch going through VTPCI_OPS()->notify_queue().
Warning: rx_queue_intr_enable/rx_queue_intr_disable dev_ops and the
zxdh_queue_enable_intr() helper are removed. The commit log talks
about cache locality and sw_ring removal but not this. Please split
it into its own patch with a justification, or at minimum call it out
in the commit message.
Minor: fail_q_alloc now does "if (hdr_mz) rte_memzone_free(hdr_mz);".
rte_memzone_free() accepts NULL; the guard is unnecessary.
Minor: The new "if (hdr_mz == NULL)" check inside the VTNET_TQ branch
of zxdh_init_queue() is unreachable. hdr_mz was already validated
earlier in the function.
Minor: Doxygen close "**/" used in several places where "*/" is the
correct terminator.
Patch 2/3 (optimize Rx recv pkts performance)
---------------------------------------------
Error: Double-free in zxdh_recv_single_pkts(). Both error paths
inside zxdh_init_mbuf() already call rte_pktmbuf_free(rxm), but the
caller also frees rxm on return < 0:
if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0)) {
rte_pktmbuf_free(rxm); /* already freed inside */
continue;
}
Drop either the caller's free or the callees' frees, not both.
Warning: zxdh_set_rxtx_funcs() silently drops the
ZXDH_NET_F_MRG_RXBUF negotiation check. The previous version
returned -1 if MRG_RXBUF was not advertised; the new version selects a
burst function unconditionally. The multi-seg path
zxdh_recv_pkts_packed() reads header->type_hdr.num_buffers, which is
only meaningful when MRG_RXBUF is negotiated with the peer.
Warning: xstats "full", "norefill", "multicast_packets",
"broadcast_packets" (rx) and "norefill", "multicast_packets",
"broadcast_packets" (tx) are removed from the name tables. If these
counters were never being updated, say so in the commit log. If they
were, multicast_packets/broadcast_packets in particular are
operator-facing counters and this is a user-visible regression.
Warning: zxdh_scattered_rx() reads eth_dev->data->min_rx_buf_size,
which is populated during rx_queue_setup(). Depending on when
zxdh_set_rxtx_funcs() runs, "min_rx_buf_size - RTE_PKTMBUF_HEADROOM"
can underflow (uint16_t wraps) if min_rx_buf_size is 0 at that point.
Minor: Open-coded rte_pktmbuf_mtod(). Both the new zxdh_init_mbuf()
and the modified zxdh_recv_pkts_packed() use
(char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
where rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *) expresses the
same intent. data_off equals RTE_PKTMBUF_HEADROOM here because the
refill path aligns the hardware write target to
buf_iova + RTE_PKTMBUF_HEADROOM.
Minor: zxdh_init_mbuf() zeroes rxm->ol_flags, rxm->vlan_tci, and
rxm->vlan_tci_outer. All three are already cleared by
rte_pktmbuf_reset() on alloc from the pool.
Minor: rxq_get_vq(q) is a trivial one-line macro aliasing "q->vq"
with no functional benefit. Either drop it or apply it consistently.
Patch 3/3 (optimize Tx xmit pkts performance)
---------------------------------------------
Error: zxdh_xmit_pkts_simple() does not write txdp->id. tx_bunch()
and tx1() write addr, len, and flags but leave id untouched. The new
zxdh_xmit_fast_flush() reads "id = desc[used_idx].id" as the
chain-terminator for its inner do-while loop
("while (curr_id != id)").
Descriptors submitted by the simple path therefore carry stale ids:
either 0 at cold start from memzone init, or the self-index written
by a previous flush pass. Because the flush rewrites
desc[used_idx].id = used_idx during processing, after one full warmup
cycle every desc[i].id == i and the inner do-while happens to exit
after one iteration. But on a cold ring, or any ring whose
descriptors were left with non-self ids by a preceding append-path
burst, the inner loop keeps iterating, freeing cookies and advancing
used_idx across descriptors the backend has not marked used, until
used_idx wraps back to 0. That corrupts vq_free_cnt and
vq_used_cons_idx accounting.
Fix: set txdp->id = avail_idx + i in tx_bunch/tx1 so the invariant is
explicit rather than relying on the flush's self-rewrite side effect.
Warning: zxdh_xmit_pkts_prepare() drops the
rte_net_intel_cksum_prepare() call. If the driver still advertises
L4 checksum offload, pseudo-header checksum preparation becomes the
application's responsibility. That's a user-visible contract change
and needs justification in the commit log, or should be paired with a
matching capability change.
Warning: zxdh_xmit_enqueue_append() now sets dxp->cookie = NULL for
the head slot and stores cookies per descriptor via dep[idx].cookie.
This works with the new per-descriptor free in
zxdh_xmit_fast_flush() (rte_pktmbuf_free_seg), but any residual code
path still reading vq_descx[head_id].cookie will see NULL. Worth a
comment documenting the new invariant.
Minor: Extra initialization. In pkt_padding(),
struct zxdh_net_hdr_dl *hdr = NULL;
is immediately overwritten by the rte_pktmbuf_prepend() return. In
submit_to_backend_simple(),
struct rte_mbuf *m = NULL;
is overwritten on first use inside the loop. Drop both initializers.
Minor: "mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));" — the
uint32_t cast is pointless. nb_pkts is uint16_t and N_PER_LOOP_MASK
is a small integer constant.
Minor: submit_to_backend_simple() uses "*(tx_pkts + i + j)" where
"tx_pkts[i + j]" reads more naturally and matches style elsewhere.
Minor: tx_bunch() is named to imply a variable batch but is hardcoded
to N_PER_LOOP iterations for single-segment packets only. A one-line
comment noting that the simple path handles single-segment only
(selected when TX_OFFLOAD_MULTI_SEGS is off) would help.
```
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance
2026-04-23 1:18 ` [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
2026-04-23 18:54 ` Stephen Hemminger
@ 2026-04-23 23:39 ` Stephen Hemminger
1 sibling, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-04-23 23:39 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Thu, 23 Apr 2026 09:18:17 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> +
> + PMD_DRV_LOG(DEBUG, "port %d min_rx_buf_size %d",
> + eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
Don't use %d when printing unsigned values.
+ /* If device is started, refuse mtu that requires the support of
+ * scattered packets when this feature has not been enabled before.
+ */
+ if (dev->data->dev_started &&
+ ((!dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) >
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))) ||
+ (dev->data->scattered_rx &&
+ ((uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) <=
+ (dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))))) {
+ PMD_DRV_LOG(ERR, "Stop port first.");
+ return -EINVAL;
+ }
You can use lines up to 100 characters, and break up this into multiple
if statements to avoid such a complex expression. Looks like multiple
parts are the same?
>
> +#define ZXDH_VLAN_TAG_LEN 4
Why not use RTE_VLAN_HLEN?
> +#define ZXDH_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
> +#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
> +static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
> + struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
> +{
> + uint16_t hdr_size = 0;
> + struct zxdh_net_hdr_ul *header;
> +
> + header = (struct zxdh_net_hdr_ul *)((char *)
> + rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
Please use rte_pktmbuf_mtod instead for this.
> +uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
> +{
> + struct zxdh_virtnet_rx *rxvq = rx_queue;
> + struct zxdh_virtqueue *vq = rxq_get_vq(rxvq);
> + struct zxdh_hw *hw = vq->hw;
> + struct rte_mbuf *rxm;
> + uint32_t lens[ZXDH_MBUF_BURST_SZ];
> + uint16_t len = 0;
> + uint16_t nb_rx = 0;
> + uint16_t num;
> + uint16_t i = 0;
Useless initialization of i.
>
> - dev->data->rx_mbuf_alloc_failed += free_cnt;
> + num = nb_pkts;
> + if (unlikely(num > ZXDH_MBUF_BURST_SZ))
> + num = ZXDH_MBUF_BURST_SZ;
> + num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
> + if (num == 0) {
> + rxvq->stats.idle++;
> + goto refill;
Since this is normal path on idle network, the counter will grow
rapidly. Do you need it?
> + }
> +
> + for (i = 0; i < num; i++) {
> + rxm = rcv_pkts[i];
> + len = lens[i];
> + if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0)) {
> + rte_pktmbuf_free(rxm);
> + continue;
> }
Better practice to make rxm and len variables scoped to the loop.
AI review noticed that there is now a double free in the error path.
Both error paths inside zxdh_init_mbuf() already call rte_pktmbuf_free(rxm) before returning -1. The caller's rte_pktmbuf_free(rxm) then frees it a second time. Remove the caller's free, or stop freeing inside zxdh_init_mbuf().
(zxdh_set_rxtx_funcs) — dropped mergeable-rxbuf feature check: The old code returned -1 with an error log when the peer did not negotiate ZXDH_NET_F_MRG_RXBUF. The new code silently drops that check. If the negotiated feature set doesn't include MRG_RXBUF, the multi-segment rx path may now be selected against a peer that doesn't support it.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v3 0/3] net/zxdh: optimize Rx/Tx path performance
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
` (3 preceding siblings ...)
2026-04-23 19:23 ` [PATCH v2 0/3] net/zxdh: optimize Rx/Tx path performance Stephen Hemminger
@ 2026-05-09 6:29 ` Junlong Wang
2026-05-09 6:29 ` [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
` (3 more replies)
4 siblings, 4 replies; 23+ messages in thread
From: Junlong Wang @ 2026-05-09 6:29 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 2932 bytes --]
v3:
- remove unnecessary NULL check in zxdh_init_queue.
- Split Ring: Bit[31] is unused and reserved, zxdh_queue_notify(): removing the
zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) check;
- remove unnecessary double-free in in zxdh_recv_single_pkts();
- used rte_pktmbuf_mtod();
- remove rxq_get_vq(q) macro, use q->vq and apply it consistently;
- Refactoring scatter and mtu check logic in zxdh_dev_mtu_set();
- set txdp->id = avail_idx + i in tx_bunch/tx1.
- add comment documenting zxdh_xmit_enqueue_append() now sets dxp->cookie = NULL for
the head slot and stores cookies per descriptor via dep[idx].cookie.
- add one-line comment noting tx_bunch() is the simple path handles single-segment.
- remove unnecessary Extra initialization and the uint32_t cast.
v2:
- zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
- zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
the loop does "continue" and free mbufs;
- zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
in the refill_que_descs();
- zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
- zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;
- zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
- zxdh_rxtx.c, zxdh_init_mbuf():remove rte_pktmbuf_dump(stdout, rxm, 40);
- zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
- Splitting into separate patches, structure reorganization and sw_ring removal、
RX recv optimize、Tx xmit optimize、Tx;
v1:
This patch optimizes the ZXDH PMD's receive and transmit path for better
performance through several improvements:
- Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
zxdh_recv_single_pkts) for single-segment packet scenarios.
- Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
- Optimize descriptor management with prefetching and simplified
cleanup.
- Reorganize structure fields for better cache locality.
These changes reduce CPU cycles and memory bandwidth consumption,
resulting in improved packet processing throughput.
Junlong Wang (3):
net/zxdh: optimize queue structure to improve performance
net/zxdh: optimize Rx recv pkts performance
net/zxdh: optimize Tx xmit pkts performance
drivers/net/zxdh/zxdh_ethdev.c | 81 ++---
drivers/net/zxdh/zxdh_ethdev_ops.c | 23 +-
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 11 +-
drivers/net/zxdh/zxdh_queue.h | 120 ++++---
drivers/net/zxdh/zxdh_rxtx.c | 524 ++++++++++++++++++++++-------
drivers/net/zxdh/zxdh_rxtx.h | 27 +-
8 files changed, 533 insertions(+), 259 deletions(-)
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 5226 bytes --]
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
@ 2026-05-09 6:29 ` Junlong Wang
2026-05-18 2:20 ` Stephen Hemminger
2026-05-09 6:29 ` [PATCH v3 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
` (2 subsequent siblings)
3 siblings, 1 reply; 23+ messages in thread
From: Junlong Wang @ 2026-05-09 6:29 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 16714 bytes --]
Reorganize structure fields for better cache locality.
Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 33 +--------
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 11 ++-
drivers/net/zxdh/zxdh_queue.h | 120 ++++++++++++++++-----------------
drivers/net/zxdh/zxdh_rxtx.c | 22 +++---
5 files changed, 77 insertions(+), 111 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index aeb01f4652..08119e28c7 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
struct zxdh_virtnet_tx *txvq = NULL;
struct zxdh_virtqueue *vq = NULL;
size_t sz_hdr_mz = 0;
- void *sw_ring = NULL;
int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
int32_t numa_node = dev->device->numa_node;
uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
vq->vq_queue_index = vtpci_phy_qidx;
vq->vq_nentries = vq_size;
- vq->vq_packed.used_wrap_counter = 1;
- vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
- vq->vq_packed.event_flags_shadow = 0;
+ vq->used_wrap_counter = 1;
+ vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
if (queue_type == ZXDH_VTNET_RQ)
- vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+ vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
/*
* Reserve a memzone for vring elements
@@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
if (queue_type == ZXDH_VTNET_RQ) {
- size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
-
- sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
- if (!sw_ring) {
- PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
- ret = -ENOMEM;
- goto fail_q_alloc;
- }
-
- vq->sw_ring = sw_ring;
rxvq = &vq->rxq;
rxvq->vq = vq;
rxvq->port_id = dev->data->port_id;
@@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
txvq->zxdh_net_hdr_mem = hdr_mz->iova;
}
- vq->offset = offsetof(struct rte_mbuf, buf_iova);
if (queue_type == ZXDH_VTNET_TQ) {
struct zxdh_tx_region *txr = hdr_mz->addr;
- uint32_t i;
-
memset(txr, 0, vq_size * sizeof(*txr));
- for (i = 0; i < vq_size; i++) {
- /* first indirect descriptor is always the tx header */
- struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
-
- zxdh_vring_desc_init_indirect_packed(start_dp,
- RTE_DIM(txr[i].tx_packed_indir));
- start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
- offsetof(struct zxdh_tx_region, tx_hdr);
- /* length will be updated to actual pi hdr size when xmit pkt */
- start_dp->len = 0;
- }
}
if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
return 0;
fail_q_alloc:
- rte_free(sw_ring);
rte_memzone_free(hdr_mz);
rte_memzone_free(mz);
rte_free(vq);
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq)
notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
- (vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
+ (vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
notify_data |= RTE_BIT32(31);
PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..4668cb5d13 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
{
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vq_desc_extra *dxp;
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
int32_t i;
uint16_t idx;
@@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
idx = vq->vq_avail_idx;
dxp = &vq->vq_descx[idx];
dxp->cookie = (void *)cookie[i];
- dxp->ndescs = 1;
/* rx pkt fill in data_off */
start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
zxdh_queue_store_flags_packed(&start_dp[idx], flags);
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
- flags = vq->vq_packed.cached_flags;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ flags = vq->cached_flags;
}
}
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
int32_t cnt = 0;
i = vq->vq_used_cons_idx;
- while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+ while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
dxp = &vq->vq_descx[descs[i].id];
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
i = vq->vq_used_cons_idx;
}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..b079272162 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
#include <rte_common.h>
#include <rte_atomic.h>
+#include <rte_io.h>
#include "zxdh_ethdev.h"
#include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
};
struct zxdh_vring_packed {
- uint32_t num;
struct zxdh_vring_packed_desc *desc;
struct zxdh_vring_packed_desc_event *driver;
struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
uint16_t next;
};
+struct zxdh_vring {
+ uint32_t num;
+ struct zxdh_vring_desc *desc;
+ struct zxdh_vring_avail *avail;
+ struct zxdh_vring_used *used;
+};
+
struct zxdh_virtqueue {
+ union {
+ struct {
+ struct zxdh_vring ring; /**< vring keeping desc, used and avail */
+ } vq_split;
+ struct __rte_packed_begin {
+ struct zxdh_vring_packed ring;
+ } __rte_packed_end vq_packed;
+ };
struct zxdh_hw *hw; /* < zxdh_hw structure pointer. */
- struct {
- /* vring keeping descs and events */
- struct zxdh_vring_packed ring;
- uint8_t used_wrap_counter;
- uint8_t rsv;
- uint16_t cached_flags; /* < cached flags for descs */
- uint16_t event_flags_shadow;
- uint16_t rsv1;
- } vq_packed;
-
- uint16_t vq_used_cons_idx; /* < last consumed descriptor */
- uint16_t vq_nentries; /* < vring desc numbers */
- uint16_t vq_free_cnt; /* < num of desc available */
- uint16_t vq_avail_idx; /* < sync until needed */
- uint16_t vq_free_thresh; /* < free threshold */
- uint16_t rsv2;
-
- void *vq_ring_virt_mem; /* < linear address of vring */
- uint32_t vq_ring_size;
+ uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+ uint16_t vq_avail_idx; /**< sync until needed */
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+
+ uint16_t cached_flags; /**< cached flags for descs */
+ uint8_t used_wrap_counter;
+ uint8_t rsv;
+ uint16_t vq_free_thresh; /**< free threshold */
+ uint16_t next_qidx;
+
+ void *notify_addr;
union {
struct zxdh_virtnet_rx rxq;
struct zxdh_virtnet_tx txq;
};
- /*
- * physical address of vring, or virtual address
- */
- rte_iova_t vq_ring_mem;
+ uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+ uint16_t event_flags_shadow;
+ uint32_t vq_ring_size;
- /*
+ /**
* Head of the free chain in the descriptor table. If
* there are no free descriptors, this will be set to
* VQ_RING_DESC_CHAIN_END.
- */
+ **/
uint16_t vq_desc_head_idx;
uint16_t vq_desc_tail_idx;
- uint16_t vq_queue_index; /* < PCI queue index */
- uint16_t offset; /* < relative offset to obtain addr in mbuf */
- uint16_t *notify_addr;
- struct rte_mbuf **sw_ring; /* < RX software ring. */
+ uint32_t rsv_8B;
+
+ void *vq_ring_virt_mem; /**< linear address of vring*/
+ /* physical address of vring, or virtual address for virtio_user. */
+ rte_iova_t vq_ring_mem;
+
struct zxdh_vq_desc_extra vq_descx[];
};
@@ -296,10 +305,9 @@ static inline void
zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
unsigned long align, uint32_t num)
{
- vr->num = num;
vr->desc = (struct zxdh_vring_packed_desc *)p;
vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
- vr->num * sizeof(struct zxdh_vring_packed_desc));
+ num * sizeof(struct zxdh_vring_packed_desc));
vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
sizeof(struct zxdh_vring_packed_desc_event)), align);
}
@@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
static inline void
zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
static inline void
zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
- if (weak_barriers)
- rte_atomic_thread_fence(rte_memory_order_seq_cst);
- else
- rte_mb();
-}
-
static inline
int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
{
@@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue
rte_io_rmb();
used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
+ return avail == used && used == vq->used_wrap_counter;
}
static inline int32_t
@@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
dp->flags = flags;
}
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
- uint16_t flags;
- uint16_t used, avail;
-
- flags = desc->flags;
- rte_io_rmb();
- used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
- avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
{
- ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+ /* Bit[0:15]: vq queue index
+ * Bit[16:30]: avail index
+ * Bit[31]: avail wrap counter
+ */
+ uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+ ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+ ((uint32_t)vq->vq_avail_idx << 16) |
+ vq->vq_queue_index;
+ rte_write32(notify_data, vq->notify_addr);
}
static inline int32_t
@@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
{
uint16_t flags = 0;
- zxdh_mb(1);
+ rte_mb();
flags = vq->vq_packed.ring.device->desc_event_flags;
return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..93506a4b49 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
/* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
+ while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
id = desc[used_idx].id;
do {
curr_id = used_idx;
@@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
num -= dxp->ndescs;
if (used_idx >= size) {
used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t id = vq->vq_avail_idx;
struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
struct zxdh_net_hdr_dl *hdr = NULL;
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
@@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
dp->id = id;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
vq->vq_free_cnt--;
zxdh_queue_store_flags_packed(dp, flags);
@@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
dxp->ndescs = needed;
dxp->cookie = cookie;
- head_flags |= vq->vq_packed.cached_flags;
+ head_flags |= vq->cached_flags;
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
@@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
if (likely(idx != head_idx)) {
uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
- flags |= vq->vq_packed.cached_flags;
+ flags |= vq->cached_flags;
start_dp[idx].flags = flags;
}
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
} while ((cookie = cookie->next) != NULL);
@@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
free_cnt += dxp->ndescs;
if (used_idx >= size) {
used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- if (!zxdh_desc_used(&desc[used_idx], vq))
+ if (!desc_is_used(&desc[used_idx], vq))
return i;
len[i] = desc[used_idx].len;
id = desc[used_idx].id;
@@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
}
return i;
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 38617 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v3 2/3] net/zxdh: optimize Rx recv pkts performance
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
2026-05-09 6:29 ` [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
@ 2026-05-09 6:29 ` Junlong Wang
2026-05-09 6:29 ` [PATCH v3 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
3 siblings, 0 replies; 23+ messages in thread
From: Junlong Wang @ 2026-05-09 6:29 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 16207 bytes --]
- Add simple RX recv functions (zxdh_recv_single_pkts)
for single-segment packet recv.
- And optimize Rx recv pkts packed ops.
- Remove unnecessary ZXDH_NET_F_MRG_RXBUF negotiation check and
some unnecessary statistical counters form the xstats name tables.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 39 +++++--
drivers/net/zxdh/zxdh_ethdev_ops.c | 23 ++--
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_rxtx.c | 173 +++++++++++++++++++++++------
drivers/net/zxdh/zxdh_rxtx.h | 16 +--
5 files changed, 192 insertions(+), 63 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 08119e28c7..0ab137189b 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -1263,18 +1263,43 @@ zxdh_dev_close(struct rte_eth_dev *dev)
return ret;
}
-static int32_t
-zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+/*
+ * Determine whether the current configuration requires support for scattered
+ * receive; return 1 if scattered receive is required and 0 if not.
+ */
+static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
{
- struct zxdh_hw *hw = eth_dev->data->dev_private;
+ uint16_t buf_size;
- if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
- PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
- return -1;
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
+ eth_dev->data->lro = 1;
+ return 1;
}
+
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ return 1;
+
+ PMD_DRV_LOG(DEBUG, "port %u min_rx_buf_size %u",
+ eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
+ buf_size = eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+ if (eth_dev->data->mtu + ZXDH_ETH_OVERHEAD > buf_size)
+ return 1;
+
+ return 0;
+}
+
+static int32_t
+zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+{
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+ eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
+
eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
- eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+
+ if (eth_dev->data->scattered_rx)
+ eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+ else
+ eth_dev->rx_pkt_burst = &zxdh_recv_single_pkts;
return 0;
}
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.c b/drivers/net/zxdh/zxdh_ethdev_ops.c
index 50247116d9..9a8e05e941 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.c
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.c
@@ -95,10 +95,6 @@ static const struct rte_zxdh_xstats_name_off zxdh_rxq_stat_strings[] = {
{"good_bytes", offsetof(struct zxdh_virtnet_rx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_rx, stats.errors)},
{"idle", offsetof(struct zxdh_virtnet_rx, stats.idle)},
- {"full", offsetof(struct zxdh_virtnet_rx, stats.full)},
- {"norefill", offsetof(struct zxdh_virtnet_rx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_rx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_rx, stats.broadcast)},
{"truncated_err", offsetof(struct zxdh_virtnet_rx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_rx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_rx, stats.invalid_hdr_len_err)},
@@ -117,14 +113,12 @@ static const struct rte_zxdh_xstats_name_off zxdh_txq_stat_strings[] = {
{"good_packets", offsetof(struct zxdh_virtnet_tx, stats.packets)},
{"good_bytes", offsetof(struct zxdh_virtnet_tx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_tx, stats.errors)},
- {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
- {"norefill", offsetof(struct zxdh_virtnet_tx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_tx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_tx, stats.broadcast)},
+ {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
{"truncated_err", offsetof(struct zxdh_virtnet_tx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_tx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_tx, stats.invalid_hdr_len_err)},
{"no_segs_err", offsetof(struct zxdh_virtnet_tx, stats.no_segs_err)},
+ {"no_free_tx_desc_err", offsetof(struct zxdh_virtnet_tx, stats.no_free_tx_desc_err)},
{"undersize_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[0])},
{"size_64_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[1])},
{"size_65_127_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[2])},
@@ -2026,6 +2020,19 @@ int zxdh_dev_mtu_set(struct rte_eth_dev *dev, uint16_t new_mtu)
uint16_t vfid = zxdh_vport_to_vfid(hw->vport);
int ret;
+ /* If device is started, refuse mtu that requires the support of
+ * scattered packets when this feature has not been enabled before.
+ */
+ if (dev->data->dev_started) {
+ uint32_t buf_size = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+ uint8_t need_scatter = (uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) > buf_size;
+
+ if (need_scatter != dev->data->scattered_rx) {
+ PMD_DRV_LOG(ERR, "Stop port first.");
+ return -EINVAL;
+ }
+ }
+
if (hw->is_pf) {
ret = zxdh_get_panel_attr(dev, &panel);
if (ret != 0) {
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.h b/drivers/net/zxdh/zxdh_ethdev_ops.h
index 6dfe4be473..c49d79c232 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.h
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.h
@@ -40,6 +40,10 @@
#define ZXDH_SPM_SPEED_4X_100G RTE_BIT32(10)
#define ZXDH_SPM_SPEED_4X_200G RTE_BIT32(11)
+#define ZXDH_VLAN_TAG_LEN 4
+#define ZXDH_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
+#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
+
struct zxdh_np_stats_data {
uint64_t n_pkts_dropped;
uint64_t n_bytes_dropped;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 93506a4b49..4723d4b1d2 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -613,10 +613,12 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
uint16_t i, used_idx;
uint16_t id;
+ used_idx = vq->vq_used_cons_idx;
+ rte_prefetch0(&desc[used_idx]);
+
for (i = 0; i < num; i++) {
used_idx = vq->vq_used_cons_idx;
- /**
- * desc_is_used has a load-acquire or rte_io_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
if (!desc_is_used(&desc[used_idx], vq))
@@ -823,17 +825,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
}
}
-static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
+static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **cookie, uint16_t nb_pkts)
{
- int32_t error = 0;
- /*
- * Requeue the discarded mbuf. This should always be
- * successful since it was just dequeued.
- */
- error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
- if (unlikely(error)) {
- PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
- rte_pktmbuf_free(m);
+ struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+ struct zxdh_vq_desc_extra *dxp;
+ uint16_t flags = vq->cached_flags;
+ int32_t i;
+ uint16_t idx;
+
+ idx = vq->vq_avail_idx;
+ for (i = 0; i < nb_pkts; i++) {
+ dxp = &vq->vq_descx[idx];
+ dxp->cookie = (void *)cookie[i];
+ start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
+ start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
+ zxdh_queue_store_flags_packed(&start_dp[idx], flags);
+ idx++;
+ }
+ vq->vq_avail_idx += nb_pkts;
+ vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
+}
+
+static void refill_que_descs(struct zxdh_virtqueue *vq, struct rte_eth_dev *dev)
+{
+ /* free_cnt may include mrg descs */
+ struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
+ uint16_t free_cnt = RTE_MIN(ZXDH_MBUF_BURST_SZ, vq->vq_free_cnt);
+ struct zxdh_virtnet_rx *rxvq = &vq->rxq;
+ uint16_t unwrap_cnt, left_cnt;
+
+ if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
+ left_cnt = free_cnt;
+ unwrap_cnt = 0;
+ if ((vq->vq_avail_idx + free_cnt) >= vq->vq_nentries) {
+ unwrap_cnt = vq->vq_nentries - vq->vq_avail_idx;
+ left_cnt = free_cnt - unwrap_cnt;
+ refill_desc_unwrap(vq, new_pkts, unwrap_cnt);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ }
+ if (left_cnt)
+ refill_desc_unwrap(vq, new_pkts + unwrap_cnt, left_cnt);
+
+ rte_io_wmb();
+ } else {
+ dev->data->rx_mbuf_alloc_failed += free_cnt;
}
}
@@ -852,7 +889,6 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t len = 0;
uint32_t seg_num = 0;
uint32_t seg_res = 0;
- uint32_t error = 0;
uint16_t hdr_size = 0;
uint16_t nb_rx = 0;
uint16_t i;
@@ -873,7 +909,8 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_pkts[nb_rx] = rxm;
prev = rxm;
len = lens[i];
- header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+ header = (struct zxdh_net_hdr_ul *)((char *)
+ rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
seg_num = header->type_hdr.num_buffers;
@@ -886,7 +923,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.invalid_hdr_len_err++;
continue;
}
- rxm->data_off += hdr_size;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
rxm->nb_segs = seg_num;
rxm->ol_flags = 0;
rcvd_pkt_len = len - hdr_size;
@@ -902,18 +939,19 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
len = lens[i];
rxm = rcv_pkts[i];
rxm->data_len = len;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM;
rcvd_pkt_len += len;
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- seg_res -= 1;
+ seg_res--;
}
if (!seg_res) {
if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -942,14 +980,14 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- extra_idx += 1;
+ extra_idx++;
}
seg_res -= rcv_cnt;
if (!seg_res) {
if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -961,26 +999,87 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.packets += nb_rx;
refill:
- /* Allocate new mbuf for the used descriptor */
- if (likely(!zxdh_queue_full(vq))) {
- struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
- /* free_cnt may include mrg descs */
- uint16_t free_cnt = RTE_MIN(vq->vq_free_cnt, ZXDH_MBUF_BURST_SZ);
-
- if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
- error = zxdh_enqueue_recv_refill_packed(vq, new_pkts, free_cnt);
- if (unlikely(error)) {
- for (i = 0; i < free_cnt; i++)
- rte_pktmbuf_free(new_pkts[i]);
- }
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
+ }
- if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
- zxdh_queue_notify(vq);
- } else {
- struct rte_eth_dev *dev = hw->eth_dev;
+ return nb_rx;
+}
- dev->data->rx_mbuf_alloc_failed += free_cnt;
- }
+static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
+ struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
+{
+ uint16_t hdr_size = 0;
+ struct zxdh_net_hdr_ul *header;
+
+ header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+ rxm->ol_flags = 0;
+ rxm->vlan_tci = 0;
+ rxm->vlan_tci_outer = 0;
+
+ hdr_size = header->type_hdr.pd_len << 1;
+ if (unlikely(header->type_hdr.num_buffers != 1)) {
+ PMD_RX_LOG(DEBUG, "hdr_size:%u nb_segs %d is invalid",
+ hdr_size, header->type_hdr.num_buffers);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.invalid_hdr_len_err++;
+ return -1;
+ }
+ zxdh_rx_update_mbuf(hw, rxm, header);
+
+ rxm->nb_segs = 1;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+ rxm->data_len = len - hdr_size;
+ rxm->port = hw->port_id;
+
+ if (rxm->data_len != rxm->pkt_len) {
+ PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d bufaddr %p.",
+ rxm->data_len, rxm->pkt_len, rxm->buf_addr);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.truncated_err++;
+ rxvq->stats.errors++;
+ return -1;
+ }
+ return 0;
+}
+
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_rx *rxvq = rx_queue;
+ struct zxdh_virtqueue *vq = rxvq->vq;
+ struct zxdh_hw *hw = vq->hw;
+ uint32_t lens[ZXDH_MBUF_BURST_SZ];
+ uint16_t nb_rx = 0;
+ uint16_t num;
+ uint16_t i;
+
+ num = nb_pkts;
+ if (unlikely(num > ZXDH_MBUF_BURST_SZ))
+ num = ZXDH_MBUF_BURST_SZ;
+ num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+ if (num == 0) {
+ rxvq->stats.idle++;
+ goto refill;
+ }
+
+ for (i = 0; i < num; i++) {
+ struct rte_mbuf *rxm = rcv_pkts[i];
+ uint16_t len = lens[i];
+
+ if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
+ continue;
+ zxdh_update_packet_stats(&rxvq->stats, rxm);
+ nb_rx++;
+ }
+ rxvq->stats.packets += nb_rx;
+
+refill:
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
}
return nb_rx;
}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 424048607e..dba9567414 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -36,29 +36,22 @@ struct zxdh_virtnet_stats {
uint64_t bytes;
uint64_t errors;
uint64_t idle;
- uint64_t full;
- uint64_t norefill;
- uint64_t multicast;
- uint64_t broadcast;
uint64_t truncated_err;
uint64_t offload_cfg_err;
uint64_t invalid_hdr_len_err;
uint64_t no_segs_err;
+ uint64_t no_free_tx_desc_err;
uint64_t size_bins[8];
};
struct __rte_cache_aligned zxdh_virtnet_rx {
struct zxdh_virtqueue *vq;
-
- uint64_t mbuf_initializer; /* value to init mbufs. */
struct rte_mempool *mpool; /* mempool for mbuf allocation */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate RX ring. */
-
- /* dummy mbuf, for wraparound when processing RX ring. */
- struct rte_mbuf fake_mbuf;
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
struct __rte_cache_aligned zxdh_virtnet_tx {
@@ -75,5 +68,6 @@ struct __rte_cache_aligned zxdh_virtnet_tx {
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 39003 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v3 3/3] net/zxdh: optimize Tx xmit pkts performance
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
2026-05-09 6:29 ` [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
2026-05-09 6:29 ` [PATCH v3 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
@ 2026-05-09 6:29 ` Junlong Wang
2026-05-18 2:22 ` Stephen Hemminger
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
3 siblings, 1 reply; 23+ messages in thread
From: Junlong Wang @ 2026-05-09 6:29 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 18737 bytes --]
Add simple Tx xmit functions (zxdh_xmit_pkts_simple)
for single-segment packet xmit.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 11 +-
drivers/net/zxdh/zxdh_rxtx.c | 341 +++++++++++++++++++++++++--------
drivers/net/zxdh/zxdh_rxtx.h | 11 +-
3 files changed, 271 insertions(+), 92 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 0ab137189b..54d43b54d9 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -490,7 +490,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
if (!vq)
continue;
while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
- rte_pktmbuf_free(buf);
+ rte_pktmbuf_free_seg(buf);
PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
"rxq", i * 2);
}
@@ -499,7 +499,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
if (!vq)
continue;
while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
- rte_pktmbuf_free(buf);
+ rte_pktmbuf_free_seg(buf);
PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
"txq", i * 2 + 1);
}
@@ -1291,10 +1291,15 @@ static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
static int32_t
zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
{
+ uint64_t tx_offloads = eth_dev->data->dev_conf.txmode.offloads;
+
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
- eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+ if (!(tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_simple;
+ else
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
if (eth_dev->data->scattered_rx)
eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 4723d4b1d2..e8f1cd65b0 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,6 +114,22 @@
RTE_MBUF_F_TX_SEC_OFFLOAD | \
RTE_MBUF_F_TX_UDP_SEG)
+#if RTE_CACHE_LINE_SIZE == 128
+#define NEXT_CACHELINE_OFF_16B 8
+#define NEXT_CACHELINE_OFF_8B 16
+#elif RTE_CACHE_LINE_SIZE == 64
+#define NEXT_CACHELINE_OFF_16B 4
+#define NEXT_CACHELINE_OFF_8B 8
+#else
+#define NEXT_CACHELINE_OFF_16B (RTE_CACHE_LINE_SIZE / 16)
+#define NEXT_CACHELINE_OFF_8B (RTE_CACHE_LINE_SIZE / 8)
+#endif
+#define N_PER_LOOP NEXT_CACHELINE_OFF_8B
+#define N_PER_LOOP_MASK (N_PER_LOOP - 1)
+
+#define rxq_get_vq(q) ((q)->vq)
+#define txq_get_vq(q) ((q)->vq)
+
uint32_t zxdh_outer_l2_type[16] = {
0,
RTE_PTYPE_L2_ETHER,
@@ -201,43 +217,6 @@ uint32_t zxdh_inner_l4_type[16] = {
0,
};
-static void
-zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
- uint16_t used_idx = 0;
- uint16_t id = 0;
- uint16_t curr_id = 0;
- uint16_t free_cnt = 0;
- uint16_t size = vq->vq_nentries;
- struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
- struct zxdh_vq_desc_extra *dxp = NULL;
-
- used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_io_rmb inside
- * and wait for used desc in virtqueue.
- */
- while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
- id = desc[used_idx].id;
- do {
- curr_id = used_idx;
- dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- num -= dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->used_wrap_counter ^= 1;
- }
- if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
- dxp->cookie = NULL;
- }
- } while (curr_id != id);
- }
- vq->vq_used_cons_idx = used_idx;
- vq->vq_free_cnt += free_cnt;
-}
-
static inline uint16_t
zxdh_get_mtu(struct zxdh_virtqueue *vq)
{
@@ -334,7 +313,7 @@ zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
}
static inline void
-zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_push(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie)
{
struct zxdh_virtqueue *vq = txvq->vq;
@@ -345,7 +324,6 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
- dxp->ndescs = 1;
dxp->cookie = cookie;
hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -362,52 +340,57 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
}
static inline void
-zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_append(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie,
uint16_t needed)
{
struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
struct zxdh_virtqueue *vq = txvq->vq;
- uint16_t id = vq->vq_avail_idx;
- struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ struct zxdh_vq_desc_extra *dep = &vq->vq_descx[0];
uint16_t head_idx = vq->vq_avail_idx;
uint16_t idx = head_idx;
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
struct zxdh_net_hdr_dl *hdr = NULL;
-
- uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+ uint16_t head_flags = 0;
- dxp->ndescs = needed;
- dxp->cookie = cookie;
- head_flags |= vq->cached_flags;
+ /*
+ * IMPORTANT: For multi-seg packets, we set the head descriptor's cookie to NULL
+ * and store each segment's mbuf in its corresponding vq_descx[idx].cookie.
+ * This is required for the per-descriptor mbuf free in zxdh_xmit_fast_flush()
+ * which uses rte_pktmbuf_free_seg() to free individual segments.
+ * Any code path that attempts to read vq_descx[head_id].cookie will see NULL
+ * and must handle this case appropriately.
+ */
+ dxp->cookie = NULL;
+ /* setup first tx ring slot to point to header stored in reserved region. */
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
- head_flags |= ZXDH_VRING_DESC_F_NEXT;
+ start_dp[idx].id = idx;
+ head_flags |= vq->cached_flags | ZXDH_VRING_DESC_F_NEXT;
hdr = (void *)&txr[idx].tx_hdr;
- rte_prefetch1(hdr);
+ zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
+
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
- zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
-
do {
start_dp[idx].addr = rte_pktmbuf_iova(cookie);
start_dp[idx].len = cookie->data_len;
- start_dp[idx].id = id;
- if (likely(idx != head_idx)) {
- uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-
- flags |= vq->cached_flags;
- start_dp[idx].flags = flags;
- }
+ start_dp[idx].id = idx;
+ dep[idx].cookie = cookie;
+ uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ flags |= vq->cached_flags;
+ start_dp[idx].flags = flags;
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
@@ -417,7 +400,6 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq->vq_avail_idx = idx;
-
zxdh_queue_store_flags_packed(head_dp, head_flags);
}
@@ -456,7 +438,7 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
}
static void
-zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+zxdh_xmit_fast_flush(struct zxdh_virtqueue *vq)
{
uint16_t id = 0;
uint16_t curr_id = 0;
@@ -472,20 +454,22 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
* for a used descriptor in the virtqueue.
*/
while (desc_is_used(&desc[used_idx], vq)) {
+ rte_prefetch0(&desc[used_idx + NEXT_CACHELINE_OFF_16B]);
id = desc[used_idx].id;
do {
+ desc[used_idx].id = used_idx;
curr_id = used_idx;
dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->used_wrap_counter ^= 1;
- }
if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
+ rte_pktmbuf_free_seg(dxp->cookie);
dxp->cookie = NULL;
}
+ used_idx += 1;
+ free_cnt += 1;
+ if (unlikely(used_idx == size)) {
+ used_idx = 0;
+ vq->used_wrap_counter ^= 1;
+ }
} while (curr_id != id);
}
vq->vq_used_cons_idx = used_idx;
@@ -499,13 +483,12 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t nb_tx = 0;
- zxdh_xmit_flush(vq);
+ zxdh_xmit_fast_flush(vq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *txm = tx_pkts[nb_tx];
int32_t can_push = 0;
int32_t slots = 0;
- int32_t need = 0;
rte_prefetch0(txm);
/* optimize ring usage */
@@ -522,26 +505,15 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
* default => number of segments + 1
**/
slots = txm->nb_segs + !can_push;
- need = slots - vq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
- if (unlikely(need > 0)) {
- zxdh_xmit_cleanup_inorder_packed(vq, need);
- need = slots - vq->vq_free_cnt;
- if (unlikely(need > 0)) {
- PMD_TX_LOG(ERR,
- " No enough %d free tx descriptors to transmit."
- "freecnt %d",
- need,
- vq->vq_free_cnt);
- break;
- }
- }
+ if (unlikely(slots > vq->vq_free_cnt))
+ break;
/* Enqueue Packet buffers */
if (can_push)
- zxdh_enqueue_xmit_packed_fast(txvq, txm);
+ zxdh_xmit_enqueue_push(txvq, txm);
else
- zxdh_enqueue_xmit_packed(txvq, txm, slots);
+ zxdh_xmit_enqueue_append(txvq, txm, slots);
zxdh_update_packet_stats(&txvq->stats, txm);
}
txvq->stats.packets += nb_tx;
@@ -1083,3 +1055,204 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
}
return nb_rx;
}
+
+static inline void pkt_padding(struct rte_mbuf *cookie, struct zxdh_hw *hw)
+{
+ uint16_t mtu_or_mss = 0;
+ uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
+ uint16_t l3_offset;
+ uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+ uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+ struct zxdh_pi_hdr *pi_hdr;
+ struct zxdh_pd_hdr_dl *pd_hdr;
+ struct zxdh_net_hdr_dl *net_hdr_dl = hw->net_hdr_dl;
+ uint8_t hdr_len = hw->dl_net_hdr_len;
+ uint16_t ol_flag = 0;
+ struct zxdh_net_hdr_dl *hdr;
+
+ hdr = (struct zxdh_net_hdr_dl *)rte_pktmbuf_prepend(cookie, hdr_len);
+ rte_memcpy(hdr, net_hdr_dl, hdr_len);
+
+ if (hw->has_tx_offload) {
+ pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+ pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
+
+ pcode = ZXDH_PCODE_IP_PKT_TYPE;
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+ l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+ else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+ l3_ptype = ZXDH_PI_L3TYPE_IP;
+ else
+ pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS) ?
+ cookie->tso_segsz : ZXDH_MIN_MSS;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ } else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+ mtu_or_mss = hw->eth_dev->data->mtu;
+ mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ } else {
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+ }
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+ if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ } else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+ RTE_MBUF_F_TX_TCP_CKSUM) {
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ }
+ pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+ pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+ pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+ l3_offset = hdr_len + cookie->l2_len;
+ l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+ cookie->outer_l2_len + cookie->outer_l3_len : 0;
+ pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+ pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+ ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+ } else {
+ pd_hdr = &hdr->pd_hdr;
+ }
+
+ pd_hdr->dst_vfid = rte_be_to_cpu_16(cookie->port);
+
+ if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+ pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
+ ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+ pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+ }
+ }
+
+ pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
+}
+
+/*
+ * Populate N_PER_LOOP descriptors with data from N_PER_LOOP single-segment mbufs.
+ * Note: The simple transmit path (zxdh_xmit_pkts_simple) is selected only when
+ * RTE_ETH_TX_OFFLOAD_MULTI_SEGS is disabled, so all packets handled here are
+ * guaranteed to be single-segment.
+ */
+static inline void
+tx_bunch(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf **pkts, uint16_t start_id)
+{
+ uint16_t flags = vq->cached_flags;
+ int i;
+ for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
+ /* write data to descriptor */
+ txdp->addr = rte_mbuf_data_iova(*pkts);
+ txdp->len = (*pkts)->data_len;
+ txdp->id = start_id + i;
+ txdp->flags = flags;
+ }
+}
+
+/* Populate 1 descriptor with data from 1 single-segment mbuf */
+static inline void
+tx1(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf *pkts, uint16_t id)
+{
+ uint16_t flags = vq->cached_flags;
+ txdp->addr = rte_mbuf_data_iova(pkts);
+ txdp->len = pkts->data_len;
+ txdp->id = id;
+ txdp->flags = flags;
+}
+
+static void submit_to_backend_simple(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_hw *hw = vq->hw;
+ struct rte_mbuf *m = NULL;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vring_packed_desc *txdp = &vq->vq_packed.ring.desc[id];
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ int mainpart, leftover;
+ int i, j;
+
+ /*
+ * Process most of the packets in chunks of N pkts. Any
+ * leftover packets will get processed one at a time.
+ */
+ mainpart = (nb_pkts & ~N_PER_LOOP_MASK);
+ leftover = (nb_pkts & N_PER_LOOP_MASK);
+
+ for (i = 0; i < mainpart; i += N_PER_LOOP) {
+ rte_prefetch0(dxp + i);
+ rte_prefetch0(tx_pkts + i);
+ for (j = 0; j < N_PER_LOOP; ++j) {
+ m = *(tx_pkts + i + j);
+ pkt_padding(m, hw);
+ (dxp + i + j)->cookie = (void *)m;
+ }
+ /* write data to descriptor */
+ tx_bunch(vq, txdp + i, tx_pkts + i, id + i);
+ }
+
+ if (leftover > 0) {
+ rte_prefetch0(dxp + mainpart);
+ rte_prefetch0(tx_pkts + mainpart);
+
+ for (i = 0; i < leftover; ++i) {
+ m = *(tx_pkts + mainpart + i);
+ pkt_padding(m, hw);
+ (dxp + mainpart + i)->cookie = m;
+ tx1(vq, txdp + mainpart + i, *(tx_pkts + mainpart + i), id + mainpart + i);
+ }
+ }
+}
+
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_tx *txvq = tx_queue;
+ struct zxdh_virtqueue *vq = txq_get_vq(txvq);
+ uint16_t nb_tx = 0, nb_tx_left;
+
+ zxdh_xmit_fast_flush(vq);
+
+ nb_pkts = (uint16_t)RTE_MIN(nb_pkts, vq->vq_free_cnt);
+ if (unlikely(nb_pkts == 0)) {
+ txvq->stats.idle++;
+ return 0;
+ }
+
+ nb_tx_left = nb_pkts;
+ if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
+ nb_tx = vq->vq_nentries - vq->vq_avail_idx;
+ nb_tx_left = nb_pkts - nb_tx;
+ submit_to_backend_simple(vq, tx_pkts, nb_tx);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+
+ vq->vq_free_cnt -= nb_tx;
+ tx_pkts += nb_tx;
+ }
+ if (nb_tx_left) {
+ submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
+ vq->vq_avail_idx += nb_tx_left;
+ vq->vq_free_cnt -= nb_tx_left;
+ }
+
+ zxdh_queue_notify(vq);
+ txvq->stats.packets += nb_pkts;
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++)
+ zxdh_update_packet_stats(&txvq->stats, tx_pkts[nb_tx]);
+
+ return nb_pkts;
+}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index dba9567414..783fb456de 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -56,18 +56,19 @@ struct __rte_cache_aligned zxdh_virtnet_rx {
struct __rte_cache_aligned zxdh_virtnet_tx {
struct zxdh_virtqueue *vq;
-
- rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
+ const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate TX ring. */
- const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 47065 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance
2026-05-09 6:29 ` [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
@ 2026-05-18 2:20 ` Stephen Hemminger
0 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-05-18 2:20 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Sat, 9 May 2026 14:29:27 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> Reorganize structure fields for better cache locality.
> Remove RX software ring (sw_ring) to reduce memory allocation and
> copy.
>
> Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
> ---
Looks good.
Some AI comments:
[PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance
Warning: silent bug fix
This patch quietly fixes a real bug in zxdh_queue_enable_intr(). Upstream has:
static inline void
zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
{
if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
}
}
The function checks for DISABLE and then sets to DISABLE — interrupts are never enabled. The patch corrects both occurrences to ENABLE. That fix is not mentioned in the commit message, and it has no Fixes: tag or Cc: stable@dpdk.org. Please split it into its own patch ahead of the structure reorganization so it can be backported.
The commit message also omits other non-trivial changes: removal of zxdh_mb(), and the inlining of zxdh_queue_notify() so it no longer dispatches through ZXDH_VTPCI_OPS()->notify_queue. Worth a sentence each.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v3 3/3] net/zxdh: optimize Tx xmit pkts performance
2026-05-09 6:29 ` [PATCH v3 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
@ 2026-05-18 2:22 ` Stephen Hemminger
0 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-05-18 2:22 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Sat, 9 May 2026 14:29:29 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> Add simple Tx xmit functions (zxdh_xmit_pkts_simple)
> for single-segment packet xmit.
>
> Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
> ---
Some AI review feedback.
PATCH v3 3/3] net/zxdh: optimize Tx xmit pkts performance
Error: NULL pointer dereference in pkt_padding()
hdr = (struct zxdh_net_hdr_dl *)rte_pktmbuf_prepend(cookie, hdr_len);
rte_memcpy(hdr, net_hdr_dl, hdr_len);
rte_pktmbuf_prepend() returns NULL when headroom is insufficient. The existing zxdh_xmit_pkts_packed() path guards its push fast-path with txm->data_off >= ZXDH_DL_NET_HDR_SIZE and falls back to the indirect path when that fails. The simple Tx path has no such guard; any mbuf submitted with headroom < hw->dl_net_hdr_len will crash here.
Add a NULL check, or screen mbufs with insufficient headroom in zxdh_xmit_pkts_simple() before calling submit_to_backend_simple().
Error: out-of-bounds read in zxdh_xmit_pkts_simple() stats loop
if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
nb_tx = vq->vq_nentries - vq->vq_avail_idx;
nb_tx_left = nb_pkts - nb_tx;
submit_to_backend_simple(vq, tx_pkts, nb_tx);
...
tx_pkts += nb_tx;
}
if (nb_tx_left) {
submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
...
}
zxdh_queue_notify(vq);
txvq->stats.packets += nb_pkts;
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++)
zxdh_update_packet_stats(&txvq->stats, tx_pkts[nb_tx]);
When the ring wraps within a burst, tx_pkts is advanced past the first chunk. The stats loop then walks nb_pkts entries from the advanced pointer, reading past the end of the caller's mbuf array. The first chunk's per-packet stats are also skipped.
Use a separate cursor for the submit calls and iterate the stats loop over the original tx_pkts, or accumulate per-packet stats inside each submit step.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
` (2 preceding siblings ...)
2026-05-09 6:29 ` [PATCH v3 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
@ 2026-06-06 6:32 ` Junlong Wang
2026-06-06 6:32 ` [PATCH v4 1/4] net/zxdh: optimize queue structure to improve performance Junlong Wang
` (4 more replies)
3 siblings, 5 replies; 23+ messages in thread
From: Junlong Wang @ 2026-06-06 6:32 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 3045 bytes --]
v4:
- fix some AI review issues.
- fix queue enable intr bug.
v3:
- remove unnecessary NULL check in zxdh_init_queue.
- Split Ring: Bit[31] is unused and reserved, zxdh_queue_notify(): removing the
zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) check;
- remove unnecessary double-free in in zxdh_recv_single_pkts();
- used rte_pktmbuf_mtod();
- remove rxq_get_vq(q) macro, use q->vq and apply it consistently;
- Refactoring scatter and mtu check logic in zxdh_dev_mtu_set();
- set txdp->id = avail_idx + i in tx_bunch/tx1.
- add comment documenting zxdh_xmit_enqueue_append() now sets dxp->cookie = NULL for
the head slot and stores cookies per descriptor via dep[idx].cookie.
- add one-line comment noting tx_bunch() is the simple path handles single-segment.
- remove unnecessary Extra initialization and the uint32_t cast.
v2:
- zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
- zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
the loop does "continue" and free mbufs;
- zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
in the refill_que_descs();
- zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
- zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;
- zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
- zxdh_rxtx.c, zxdh_init_mbuf():remove rte_pktmbuf_dump(stdout, rxm, 40);
- zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
- Splitting into separate patches, structure reorganization and sw_ring removal、
RX recv optimize、Tx xmit optimize、Tx;
v1:
This patch optimizes the ZXDH PMD's receive and transmit path for better
performance through several improvements:
- Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
zxdh_recv_single_pkts) for single-segment packet scenarios.
- Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
- Optimize descriptor management with prefetching and simplified
cleanup.
- Reorganize structure fields for better cache locality.
These changes reduce CPU cycles and memory bandwidth consumption,
resulting in improved packet processing throughput.
Junlong Wang (4):
net/zxdh: optimize queue structure to improve performance
net/zxdh: optimize Rx recv pkts performance
net/zxdh: optimize Tx xmit pkts performance
net/zxdh: fix queue enable intr issues
drivers/net/zxdh/zxdh_ethdev.c | 81 ++---
drivers/net/zxdh/zxdh_ethdev_ops.c | 23 +-
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 11 +-
drivers/net/zxdh/zxdh_queue.h | 120 +++----
drivers/net/zxdh/zxdh_rxtx.c | 534 ++++++++++++++++++++++-------
drivers/net/zxdh/zxdh_rxtx.h | 27 +-
8 files changed, 543 insertions(+), 259 deletions(-)
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 5465 bytes --]
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 1/4] net/zxdh: optimize queue structure to improve performance
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
@ 2026-06-06 6:32 ` Junlong Wang
2026-06-06 6:32 ` [PATCH v4 2/4] net/zxdh: optimize Rx recv pkts performance Junlong Wang
` (3 subsequent siblings)
4 siblings, 0 replies; 23+ messages in thread
From: Junlong Wang @ 2026-06-06 6:32 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 16850 bytes --]
1. Reorganize structure fields for better cache locality.
2. Remove RX software ring (sw_ring) to reduce memory allocation and
copy.
3. Remove zxdh_mb(), use native rte_mb().
4. optimize zxdh_queue_notify() functions, remove unnecessary feature
check.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 33 +--------
drivers/net/zxdh/zxdh_pci.c | 2 +-
drivers/net/zxdh/zxdh_queue.c | 11 ++-
drivers/net/zxdh/zxdh_queue.h | 120 ++++++++++++++++-----------------
drivers/net/zxdh/zxdh_rxtx.c | 22 +++---
5 files changed, 77 insertions(+), 111 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index aeb01f4652..08119e28c7 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
struct zxdh_virtnet_tx *txvq = NULL;
struct zxdh_virtqueue *vq = NULL;
size_t sz_hdr_mz = 0;
- void *sw_ring = NULL;
int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
int32_t numa_node = dev->device->numa_node;
uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
vq->vq_queue_index = vtpci_phy_qidx;
vq->vq_nentries = vq_size;
- vq->vq_packed.used_wrap_counter = 1;
- vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
- vq->vq_packed.event_flags_shadow = 0;
+ vq->used_wrap_counter = 1;
+ vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
if (queue_type == ZXDH_VTNET_RQ)
- vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+ vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
/*
* Reserve a memzone for vring elements
@@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
if (queue_type == ZXDH_VTNET_RQ) {
- size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
-
- sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
- if (!sw_ring) {
- PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
- ret = -ENOMEM;
- goto fail_q_alloc;
- }
-
- vq->sw_ring = sw_ring;
rxvq = &vq->rxq;
rxvq->vq = vq;
rxvq->port_id = dev->data->port_id;
@@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
txvq->zxdh_net_hdr_mem = hdr_mz->iova;
}
- vq->offset = offsetof(struct rte_mbuf, buf_iova);
if (queue_type == ZXDH_VTNET_TQ) {
struct zxdh_tx_region *txr = hdr_mz->addr;
- uint32_t i;
-
memset(txr, 0, vq_size * sizeof(*txr));
- for (i = 0; i < vq_size; i++) {
- /* first indirect descriptor is always the tx header */
- struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
-
- zxdh_vring_desc_init_indirect_packed(start_dp,
- RTE_DIM(txr[i].tx_packed_indir));
- start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
- offsetof(struct zxdh_tx_region, tx_hdr);
- /* length will be updated to actual pi hdr size when xmit pkt */
- start_dp->len = 0;
- }
}
if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
}
return 0;
fail_q_alloc:
- rte_free(sw_ring);
rte_memzone_free(hdr_mz);
rte_memzone_free(mz);
rte_free(vq);
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq)
notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
- (vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
+ (vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
notify_data |= RTE_BIT32(31);
PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..4668cb5d13 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
{
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vq_desc_extra *dxp;
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
int32_t i;
uint16_t idx;
@@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
idx = vq->vq_avail_idx;
dxp = &vq->vq_descx[idx];
dxp->cookie = (void *)cookie[i];
- dxp->ndescs = 1;
/* rx pkt fill in data_off */
start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
zxdh_queue_store_flags_packed(&start_dp[idx], flags);
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
- flags = vq->vq_packed.cached_flags;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ flags = vq->cached_flags;
}
}
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
int32_t cnt = 0;
i = vq->vq_used_cons_idx;
- while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+ while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
dxp = &vq->vq_descx[descs[i].id];
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
i = vq->vq_used_cons_idx;
}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..94101c8269 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
#include <rte_common.h>
#include <rte_atomic.h>
+#include <rte_io.h>
#include "zxdh_ethdev.h"
#include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
};
struct zxdh_vring_packed {
- uint32_t num;
struct zxdh_vring_packed_desc *desc;
struct zxdh_vring_packed_desc_event *driver;
struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
uint16_t next;
};
+struct zxdh_vring {
+ uint32_t num;
+ struct zxdh_vring_desc *desc;
+ struct zxdh_vring_avail *avail;
+ struct zxdh_vring_used *used;
+};
+
struct zxdh_virtqueue {
+ union {
+ struct {
+ struct zxdh_vring ring; /**< vring keeping desc, used and avail */
+ } vq_split;
+ struct __rte_packed_begin {
+ struct zxdh_vring_packed ring;
+ } __rte_packed_end vq_packed;
+ };
struct zxdh_hw *hw; /* < zxdh_hw structure pointer. */
- struct {
- /* vring keeping descs and events */
- struct zxdh_vring_packed ring;
- uint8_t used_wrap_counter;
- uint8_t rsv;
- uint16_t cached_flags; /* < cached flags for descs */
- uint16_t event_flags_shadow;
- uint16_t rsv1;
- } vq_packed;
-
- uint16_t vq_used_cons_idx; /* < last consumed descriptor */
- uint16_t vq_nentries; /* < vring desc numbers */
- uint16_t vq_free_cnt; /* < num of desc available */
- uint16_t vq_avail_idx; /* < sync until needed */
- uint16_t vq_free_thresh; /* < free threshold */
- uint16_t rsv2;
-
- void *vq_ring_virt_mem; /* < linear address of vring */
- uint32_t vq_ring_size;
+ uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+ uint16_t vq_avail_idx; /**< sync until needed */
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+
+ uint16_t cached_flags; /**< cached flags for descs */
+ uint8_t used_wrap_counter;
+ uint8_t rsv;
+ uint16_t vq_free_thresh; /**< free threshold */
+ uint16_t next_qidx;
+
+ void *notify_addr;
union {
struct zxdh_virtnet_rx rxq;
struct zxdh_virtnet_tx txq;
};
- /*
- * physical address of vring, or virtual address
- */
- rte_iova_t vq_ring_mem;
+ uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+ uint16_t event_flags_shadow;
+ uint32_t vq_ring_size;
- /*
+ /**
* Head of the free chain in the descriptor table. If
* there are no free descriptors, this will be set to
* VQ_RING_DESC_CHAIN_END.
- */
+ **/
uint16_t vq_desc_head_idx;
uint16_t vq_desc_tail_idx;
- uint16_t vq_queue_index; /* < PCI queue index */
- uint16_t offset; /* < relative offset to obtain addr in mbuf */
- uint16_t *notify_addr;
- struct rte_mbuf **sw_ring; /* < RX software ring. */
+ uint32_t rsv_8B;
+
+ void *vq_ring_virt_mem; /**< linear address of vring*/
+ /* physical address of vring, or virtual address for virtio_user. */
+ rte_iova_t vq_ring_mem;
+
struct zxdh_vq_desc_extra vq_descx[];
};
@@ -296,10 +305,9 @@ static inline void
zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
unsigned long align, uint32_t num)
{
- vr->num = num;
vr->desc = (struct zxdh_vring_packed_desc *)p;
vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
- vr->num * sizeof(struct zxdh_vring_packed_desc));
+ num * sizeof(struct zxdh_vring_packed_desc));
vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
sizeof(struct zxdh_vring_packed_desc_event)), align);
}
@@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
static inline void
zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
static inline void
zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
- vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+ if (vq->event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
- if (weak_barriers)
- rte_atomic_thread_fence(rte_memory_order_seq_cst);
- else
- rte_mb();
-}
-
static inline
int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
{
@@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue
rte_io_rmb();
used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
+ return avail == used && used == vq->used_wrap_counter;
}
static inline int32_t
@@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
dp->flags = flags;
}
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
- uint16_t flags;
- uint16_t used, avail;
-
- flags = desc->flags;
- rte_io_rmb();
- used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
- avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
- return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
{
- ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+ /* Bit[0:15]: vq queue index
+ * Bit[16:30]: avail index
+ * Bit[31]: avail wrap counter
+ */
+ uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+ ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+ ((uint32_t)vq->vq_avail_idx << 16) |
+ vq->vq_queue_index;
+ rte_write32(notify_data, vq->notify_addr);
}
static inline int32_t
@@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
{
uint16_t flags = 0;
- zxdh_mb(1);
+ rte_mb();
flags = vq->vq_packed.ring.device->desc_event_flags;
return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..93506a4b49 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
/* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
+ while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
id = desc[used_idx].id;
do {
curr_id = used_idx;
@@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
num -= dxp->ndescs;
if (used_idx >= size) {
used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t id = vq->vq_avail_idx;
struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
- uint16_t flags = vq->vq_packed.cached_flags;
+ uint16_t flags = vq->cached_flags;
struct zxdh_net_hdr_dl *hdr = NULL;
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
@@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
dp->id = id;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
vq->vq_free_cnt--;
zxdh_queue_store_flags_packed(dp, flags);
@@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
dxp->ndescs = needed;
dxp->cookie = cookie;
- head_flags |= vq->vq_packed.cached_flags;
+ head_flags |= vq->cached_flags;
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
@@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
if (likely(idx != head_idx)) {
uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
- flags |= vq->vq_packed.cached_flags;
+ flags |= vq->cached_flags;
start_dp[idx].flags = flags;
}
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
} while ((cookie = cookie->next) != NULL);
@@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
free_cnt += dxp->ndescs;
if (used_idx >= size) {
used_idx -= size;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
if (dxp->cookie != NULL) {
rte_pktmbuf_free(dxp->cookie);
@@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
- if (!zxdh_desc_used(&desc[used_idx], vq))
+ if (!desc_is_used(&desc[used_idx], vq))
return i;
len[i] = desc[used_idx].len;
id = desc[used_idx].id;
@@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
- vq->vq_packed.used_wrap_counter ^= 1;
+ vq->used_wrap_counter ^= 1;
}
}
return i;
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 38860 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v4 2/4] net/zxdh: optimize Rx recv pkts performance
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
2026-06-06 6:32 ` [PATCH v4 1/4] net/zxdh: optimize queue structure to improve performance Junlong Wang
@ 2026-06-06 6:32 ` Junlong Wang
2026-06-06 6:32 ` [PATCH v4 3/4] net/zxdh: optimize Tx xmit " Junlong Wang
` (2 subsequent siblings)
4 siblings, 0 replies; 23+ messages in thread
From: Junlong Wang @ 2026-06-06 6:32 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 16212 bytes --]
1. Add simple RX recv functions (zxdh_recv_single_pkts)
for single-segment packet recv.
2. And optimize Rx recv pkts packed ops.
3. Remove unnecessary ZXDH_NET_F_MRG_RXBUF negotiation check and
some unnecessary statistical counters form the xstats name tables.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 39 +++++--
drivers/net/zxdh/zxdh_ethdev_ops.c | 23 ++--
drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
drivers/net/zxdh/zxdh_rxtx.c | 173 +++++++++++++++++++++++------
drivers/net/zxdh/zxdh_rxtx.h | 16 +--
5 files changed, 192 insertions(+), 63 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 08119e28c7..0ab137189b 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -1263,18 +1263,43 @@ zxdh_dev_close(struct rte_eth_dev *dev)
return ret;
}
-static int32_t
-zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+/*
+ * Determine whether the current configuration requires support for scattered
+ * receive; return 1 if scattered receive is required and 0 if not.
+ */
+static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
{
- struct zxdh_hw *hw = eth_dev->data->dev_private;
+ uint16_t buf_size;
- if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
- PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
- return -1;
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
+ eth_dev->data->lro = 1;
+ return 1;
}
+
+ if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+ return 1;
+
+ PMD_DRV_LOG(DEBUG, "port %u min_rx_buf_size %u",
+ eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
+ buf_size = eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+ if (eth_dev->data->mtu + ZXDH_ETH_OVERHEAD > buf_size)
+ return 1;
+
+ return 0;
+}
+
+static int32_t
+zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+{
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+ eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
+
eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
- eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+
+ if (eth_dev->data->scattered_rx)
+ eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+ else
+ eth_dev->rx_pkt_burst = &zxdh_recv_single_pkts;
return 0;
}
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.c b/drivers/net/zxdh/zxdh_ethdev_ops.c
index 50247116d9..9a8e05e941 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.c
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.c
@@ -95,10 +95,6 @@ static const struct rte_zxdh_xstats_name_off zxdh_rxq_stat_strings[] = {
{"good_bytes", offsetof(struct zxdh_virtnet_rx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_rx, stats.errors)},
{"idle", offsetof(struct zxdh_virtnet_rx, stats.idle)},
- {"full", offsetof(struct zxdh_virtnet_rx, stats.full)},
- {"norefill", offsetof(struct zxdh_virtnet_rx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_rx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_rx, stats.broadcast)},
{"truncated_err", offsetof(struct zxdh_virtnet_rx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_rx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_rx, stats.invalid_hdr_len_err)},
@@ -117,14 +113,12 @@ static const struct rte_zxdh_xstats_name_off zxdh_txq_stat_strings[] = {
{"good_packets", offsetof(struct zxdh_virtnet_tx, stats.packets)},
{"good_bytes", offsetof(struct zxdh_virtnet_tx, stats.bytes)},
{"errors", offsetof(struct zxdh_virtnet_tx, stats.errors)},
- {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
- {"norefill", offsetof(struct zxdh_virtnet_tx, stats.norefill)},
- {"multicast_packets", offsetof(struct zxdh_virtnet_tx, stats.multicast)},
- {"broadcast_packets", offsetof(struct zxdh_virtnet_tx, stats.broadcast)},
+ {"idle", offsetof(struct zxdh_virtnet_tx, stats.idle)},
{"truncated_err", offsetof(struct zxdh_virtnet_tx, stats.truncated_err)},
{"offload_cfg_err", offsetof(struct zxdh_virtnet_tx, stats.offload_cfg_err)},
{"invalid_hdr_len_err", offsetof(struct zxdh_virtnet_tx, stats.invalid_hdr_len_err)},
{"no_segs_err", offsetof(struct zxdh_virtnet_tx, stats.no_segs_err)},
+ {"no_free_tx_desc_err", offsetof(struct zxdh_virtnet_tx, stats.no_free_tx_desc_err)},
{"undersize_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[0])},
{"size_64_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[1])},
{"size_65_127_packets", offsetof(struct zxdh_virtnet_tx, stats.size_bins[2])},
@@ -2026,6 +2020,19 @@ int zxdh_dev_mtu_set(struct rte_eth_dev *dev, uint16_t new_mtu)
uint16_t vfid = zxdh_vport_to_vfid(hw->vport);
int ret;
+ /* If device is started, refuse mtu that requires the support of
+ * scattered packets when this feature has not been enabled before.
+ */
+ if (dev->data->dev_started) {
+ uint32_t buf_size = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+ uint8_t need_scatter = (uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) > buf_size;
+
+ if (need_scatter != dev->data->scattered_rx) {
+ PMD_DRV_LOG(ERR, "Stop port first.");
+ return -EINVAL;
+ }
+ }
+
if (hw->is_pf) {
ret = zxdh_get_panel_attr(dev, &panel);
if (ret != 0) {
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.h b/drivers/net/zxdh/zxdh_ethdev_ops.h
index 6dfe4be473..c49d79c232 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.h
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.h
@@ -40,6 +40,10 @@
#define ZXDH_SPM_SPEED_4X_100G RTE_BIT32(10)
#define ZXDH_SPM_SPEED_4X_200G RTE_BIT32(11)
+#define ZXDH_VLAN_TAG_LEN 4
+#define ZXDH_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
+#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
+
struct zxdh_np_stats_data {
uint64_t n_pkts_dropped;
uint64_t n_bytes_dropped;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 93506a4b49..4723d4b1d2 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -613,10 +613,12 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
uint16_t i, used_idx;
uint16_t id;
+ used_idx = vq->vq_used_cons_idx;
+ rte_prefetch0(&desc[used_idx]);
+
for (i = 0; i < num; i++) {
used_idx = vq->vq_used_cons_idx;
- /**
- * desc_is_used has a load-acquire or rte_io_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
if (!desc_is_used(&desc[used_idx], vq))
@@ -823,17 +825,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
}
}
-static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
+static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **cookie, uint16_t nb_pkts)
{
- int32_t error = 0;
- /*
- * Requeue the discarded mbuf. This should always be
- * successful since it was just dequeued.
- */
- error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
- if (unlikely(error)) {
- PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
- rte_pktmbuf_free(m);
+ struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+ struct zxdh_vq_desc_extra *dxp;
+ uint16_t flags = vq->cached_flags;
+ int32_t i;
+ uint16_t idx;
+
+ idx = vq->vq_avail_idx;
+ for (i = 0; i < nb_pkts; i++) {
+ dxp = &vq->vq_descx[idx];
+ dxp->cookie = (void *)cookie[i];
+ start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
+ start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
+ zxdh_queue_store_flags_packed(&start_dp[idx], flags);
+ idx++;
+ }
+ vq->vq_avail_idx += nb_pkts;
+ vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
+}
+
+static void refill_que_descs(struct zxdh_virtqueue *vq, struct rte_eth_dev *dev)
+{
+ /* free_cnt may include mrg descs */
+ struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
+ uint16_t free_cnt = RTE_MIN(ZXDH_MBUF_BURST_SZ, vq->vq_free_cnt);
+ struct zxdh_virtnet_rx *rxvq = &vq->rxq;
+ uint16_t unwrap_cnt, left_cnt;
+
+ if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
+ left_cnt = free_cnt;
+ unwrap_cnt = 0;
+ if ((vq->vq_avail_idx + free_cnt) >= vq->vq_nentries) {
+ unwrap_cnt = vq->vq_nentries - vq->vq_avail_idx;
+ left_cnt = free_cnt - unwrap_cnt;
+ refill_desc_unwrap(vq, new_pkts, unwrap_cnt);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+ }
+ if (left_cnt)
+ refill_desc_unwrap(vq, new_pkts + unwrap_cnt, left_cnt);
+
+ rte_io_wmb();
+ } else {
+ dev->data->rx_mbuf_alloc_failed += free_cnt;
}
}
@@ -852,7 +889,6 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t len = 0;
uint32_t seg_num = 0;
uint32_t seg_res = 0;
- uint32_t error = 0;
uint16_t hdr_size = 0;
uint16_t nb_rx = 0;
uint16_t i;
@@ -873,7 +909,8 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_pkts[nb_rx] = rxm;
prev = rxm;
len = lens[i];
- header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+ header = (struct zxdh_net_hdr_ul *)((char *)
+ rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
seg_num = header->type_hdr.num_buffers;
@@ -886,7 +923,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.invalid_hdr_len_err++;
continue;
}
- rxm->data_off += hdr_size;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
rxm->nb_segs = seg_num;
rxm->ol_flags = 0;
rcvd_pkt_len = len - hdr_size;
@@ -902,18 +939,19 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
len = lens[i];
rxm = rcv_pkts[i];
rxm->data_len = len;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM;
rcvd_pkt_len += len;
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- seg_res -= 1;
+ seg_res--;
}
if (!seg_res) {
if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -942,14 +980,14 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
prev->next = rxm;
prev = rxm;
rxm->next = NULL;
- extra_idx += 1;
+ extra_idx++;
}
seg_res -= rcv_cnt;
if (!seg_res) {
if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
- zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+ rte_pktmbuf_free(rx_pkts[nb_rx]);
rxvq->stats.errors++;
rxvq->stats.truncated_err++;
continue;
@@ -961,26 +999,87 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
rxvq->stats.packets += nb_rx;
refill:
- /* Allocate new mbuf for the used descriptor */
- if (likely(!zxdh_queue_full(vq))) {
- struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
- /* free_cnt may include mrg descs */
- uint16_t free_cnt = RTE_MIN(vq->vq_free_cnt, ZXDH_MBUF_BURST_SZ);
-
- if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
- error = zxdh_enqueue_recv_refill_packed(vq, new_pkts, free_cnt);
- if (unlikely(error)) {
- for (i = 0; i < free_cnt; i++)
- rte_pktmbuf_free(new_pkts[i]);
- }
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
+ }
- if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
- zxdh_queue_notify(vq);
- } else {
- struct rte_eth_dev *dev = hw->eth_dev;
+ return nb_rx;
+}
- dev->data->rx_mbuf_alloc_failed += free_cnt;
- }
+static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
+ struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
+{
+ uint16_t hdr_size = 0;
+ struct zxdh_net_hdr_ul *header;
+
+ header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+ rxm->ol_flags = 0;
+ rxm->vlan_tci = 0;
+ rxm->vlan_tci_outer = 0;
+
+ hdr_size = header->type_hdr.pd_len << 1;
+ if (unlikely(header->type_hdr.num_buffers != 1)) {
+ PMD_RX_LOG(DEBUG, "hdr_size:%u nb_segs %d is invalid",
+ hdr_size, header->type_hdr.num_buffers);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.invalid_hdr_len_err++;
+ return -1;
+ }
+ zxdh_rx_update_mbuf(hw, rxm, header);
+
+ rxm->nb_segs = 1;
+ rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+ rxm->data_len = len - hdr_size;
+ rxm->port = hw->port_id;
+
+ if (rxm->data_len != rxm->pkt_len) {
+ PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d bufaddr %p.",
+ rxm->data_len, rxm->pkt_len, rxm->buf_addr);
+ rte_pktmbuf_free(rxm);
+ rxvq->stats.truncated_err++;
+ rxvq->stats.errors++;
+ return -1;
+ }
+ return 0;
+}
+
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_rx *rxvq = rx_queue;
+ struct zxdh_virtqueue *vq = rxvq->vq;
+ struct zxdh_hw *hw = vq->hw;
+ uint32_t lens[ZXDH_MBUF_BURST_SZ];
+ uint16_t nb_rx = 0;
+ uint16_t num;
+ uint16_t i;
+
+ num = nb_pkts;
+ if (unlikely(num > ZXDH_MBUF_BURST_SZ))
+ num = ZXDH_MBUF_BURST_SZ;
+ num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+ if (num == 0) {
+ rxvq->stats.idle++;
+ goto refill;
+ }
+
+ for (i = 0; i < num; i++) {
+ struct rte_mbuf *rxm = rcv_pkts[i];
+ uint16_t len = lens[i];
+
+ if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
+ continue;
+ zxdh_update_packet_stats(&rxvq->stats, rxm);
+ nb_rx++;
+ }
+ rxvq->stats.packets += nb_rx;
+
+refill:
+ if (vq->vq_free_cnt > 0) {
+ struct rte_eth_dev *dev = hw->eth_dev;
+ refill_que_descs(vq, dev);
+ zxdh_queue_notify(vq);
}
return nb_rx;
}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 424048607e..dba9567414 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -36,29 +36,22 @@ struct zxdh_virtnet_stats {
uint64_t bytes;
uint64_t errors;
uint64_t idle;
- uint64_t full;
- uint64_t norefill;
- uint64_t multicast;
- uint64_t broadcast;
uint64_t truncated_err;
uint64_t offload_cfg_err;
uint64_t invalid_hdr_len_err;
uint64_t no_segs_err;
+ uint64_t no_free_tx_desc_err;
uint64_t size_bins[8];
};
struct __rte_cache_aligned zxdh_virtnet_rx {
struct zxdh_virtqueue *vq;
-
- uint64_t mbuf_initializer; /* value to init mbufs. */
struct rte_mempool *mpool; /* mempool for mbuf allocation */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate RX ring. */
-
- /* dummy mbuf, for wraparound when processing RX ring. */
- struct rte_mbuf fake_mbuf;
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
struct __rte_cache_aligned zxdh_virtnet_tx {
@@ -75,5 +68,6 @@ struct __rte_cache_aligned zxdh_virtnet_tx {
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 39018 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v4 3/4] net/zxdh: optimize Tx xmit pkts performance
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
2026-06-06 6:32 ` [PATCH v4 1/4] net/zxdh: optimize queue structure to improve performance Junlong Wang
2026-06-06 6:32 ` [PATCH v4 2/4] net/zxdh: optimize Rx recv pkts performance Junlong Wang
@ 2026-06-06 6:32 ` Junlong Wang
2026-06-06 6:32 ` [PATCH v4 4/4] net/zxdh: fix queue enable intr issues Junlong Wang
2026-06-07 18:00 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Stephen Hemminger
4 siblings, 0 replies; 23+ messages in thread
From: Junlong Wang @ 2026-06-06 6:32 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang
[-- Attachment #1.1.1: Type: text/plain, Size: 18937 bytes --]
Add simple Tx xmit functions (zxdh_xmit_pkts_simple)
for single-segment packet xmit.
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_ethdev.c | 11 +-
drivers/net/zxdh/zxdh_rxtx.c | 351 +++++++++++++++++++++++++--------
drivers/net/zxdh/zxdh_rxtx.h | 11 +-
3 files changed, 281 insertions(+), 92 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 0ab137189b..54d43b54d9 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -490,7 +490,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
if (!vq)
continue;
while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
- rte_pktmbuf_free(buf);
+ rte_pktmbuf_free_seg(buf);
PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
"rxq", i * 2);
}
@@ -499,7 +499,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
if (!vq)
continue;
while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
- rte_pktmbuf_free(buf);
+ rte_pktmbuf_free_seg(buf);
PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
"txq", i * 2 + 1);
}
@@ -1291,10 +1291,15 @@ static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
static int32_t
zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
{
+ uint64_t tx_offloads = eth_dev->data->dev_conf.txmode.offloads;
+
eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
- eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+ if (!(tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_simple;
+ else
+ eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
if (eth_dev->data->scattered_rx)
eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 4723d4b1d2..f1f574caf6 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,6 +114,22 @@
RTE_MBUF_F_TX_SEC_OFFLOAD | \
RTE_MBUF_F_TX_UDP_SEG)
+#if RTE_CACHE_LINE_SIZE == 128
+#define NEXT_CACHELINE_OFF_16B 8
+#define NEXT_CACHELINE_OFF_8B 16
+#elif RTE_CACHE_LINE_SIZE == 64
+#define NEXT_CACHELINE_OFF_16B 4
+#define NEXT_CACHELINE_OFF_8B 8
+#else
+#define NEXT_CACHELINE_OFF_16B (RTE_CACHE_LINE_SIZE / 16)
+#define NEXT_CACHELINE_OFF_8B (RTE_CACHE_LINE_SIZE / 8)
+#endif
+#define N_PER_LOOP NEXT_CACHELINE_OFF_8B
+#define N_PER_LOOP_MASK (N_PER_LOOP - 1)
+
+#define rxq_get_vq(q) ((q)->vq)
+#define txq_get_vq(q) ((q)->vq)
+
uint32_t zxdh_outer_l2_type[16] = {
0,
RTE_PTYPE_L2_ETHER,
@@ -201,43 +217,6 @@ uint32_t zxdh_inner_l4_type[16] = {
0,
};
-static void
-zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
- uint16_t used_idx = 0;
- uint16_t id = 0;
- uint16_t curr_id = 0;
- uint16_t free_cnt = 0;
- uint16_t size = vq->vq_nentries;
- struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
- struct zxdh_vq_desc_extra *dxp = NULL;
-
- used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_io_rmb inside
- * and wait for used desc in virtqueue.
- */
- while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
- id = desc[used_idx].id;
- do {
- curr_id = used_idx;
- dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- num -= dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->used_wrap_counter ^= 1;
- }
- if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
- dxp->cookie = NULL;
- }
- } while (curr_id != id);
- }
- vq->vq_used_cons_idx = used_idx;
- vq->vq_free_cnt += free_cnt;
-}
-
static inline uint16_t
zxdh_get_mtu(struct zxdh_virtqueue *vq)
{
@@ -334,7 +313,7 @@ zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
}
static inline void
-zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_push(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie)
{
struct zxdh_virtqueue *vq = txvq->vq;
@@ -345,7 +324,6 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
- dxp->ndescs = 1;
dxp->cookie = cookie;
hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -362,52 +340,57 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
}
static inline void
-zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_append(struct zxdh_virtnet_tx *txvq,
struct rte_mbuf *cookie,
uint16_t needed)
{
struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
struct zxdh_virtqueue *vq = txvq->vq;
- uint16_t id = vq->vq_avail_idx;
- struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ struct zxdh_vq_desc_extra *dep = &vq->vq_descx[0];
uint16_t head_idx = vq->vq_avail_idx;
uint16_t idx = head_idx;
struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
struct zxdh_net_hdr_dl *hdr = NULL;
-
- uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+ uint16_t head_flags = 0;
- dxp->ndescs = needed;
- dxp->cookie = cookie;
- head_flags |= vq->cached_flags;
+ /*
+ * IMPORTANT: For multi-seg packets, we set the head descriptor's cookie to NULL
+ * and store each segment's mbuf in its corresponding vq_descx[idx].cookie.
+ * This is required for the per-descriptor mbuf free in zxdh_xmit_fast_flush()
+ * which uses rte_pktmbuf_free_seg() to free individual segments.
+ * Any code path that attempts to read vq_descx[head_id].cookie will see NULL
+ * and must handle this case appropriately.
+ */
+ dxp->cookie = NULL;
+ /* setup first tx ring slot to point to header stored in reserved region. */
start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
start_dp[idx].len = hdr_len;
- head_flags |= ZXDH_VRING_DESC_F_NEXT;
+ start_dp[idx].id = idx;
+ head_flags |= vq->cached_flags | ZXDH_VRING_DESC_F_NEXT;
hdr = (void *)&txr[idx].tx_hdr;
- rte_prefetch1(hdr);
+ zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
+
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
}
- zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
-
do {
start_dp[idx].addr = rte_pktmbuf_iova(cookie);
start_dp[idx].len = cookie->data_len;
- start_dp[idx].id = id;
- if (likely(idx != head_idx)) {
- uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-
- flags |= vq->cached_flags;
- start_dp[idx].flags = flags;
- }
+ start_dp[idx].id = idx;
+ dep[idx].cookie = cookie;
+ uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+ flags |= vq->cached_flags;
+ start_dp[idx].flags = flags;
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
@@ -417,7 +400,6 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq->vq_avail_idx = idx;
-
zxdh_queue_store_flags_packed(head_dp, head_flags);
}
@@ -456,7 +438,7 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
}
static void
-zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+zxdh_xmit_fast_flush(struct zxdh_virtqueue *vq)
{
uint16_t id = 0;
uint16_t curr_id = 0;
@@ -472,20 +454,22 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
* for a used descriptor in the virtqueue.
*/
while (desc_is_used(&desc[used_idx], vq)) {
+ rte_prefetch0(&desc[used_idx + NEXT_CACHELINE_OFF_16B]);
id = desc[used_idx].id;
do {
+ desc[used_idx].id = used_idx;
curr_id = used_idx;
dxp = &vq->vq_descx[used_idx];
- used_idx += dxp->ndescs;
- free_cnt += dxp->ndescs;
- if (used_idx >= size) {
- used_idx -= size;
- vq->used_wrap_counter ^= 1;
- }
if (dxp->cookie != NULL) {
- rte_pktmbuf_free(dxp->cookie);
+ rte_pktmbuf_free_seg(dxp->cookie);
dxp->cookie = NULL;
}
+ used_idx += 1;
+ free_cnt += 1;
+ if (unlikely(used_idx == size)) {
+ used_idx = 0;
+ vq->used_wrap_counter ^= 1;
+ }
} while (curr_id != id);
}
vq->vq_used_cons_idx = used_idx;
@@ -499,13 +483,12 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
struct zxdh_virtqueue *vq = txvq->vq;
uint16_t nb_tx = 0;
- zxdh_xmit_flush(vq);
+ zxdh_xmit_fast_flush(vq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *txm = tx_pkts[nb_tx];
int32_t can_push = 0;
int32_t slots = 0;
- int32_t need = 0;
rte_prefetch0(txm);
/* optimize ring usage */
@@ -522,26 +505,15 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
* default => number of segments + 1
**/
slots = txm->nb_segs + !can_push;
- need = slots - vq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
- if (unlikely(need > 0)) {
- zxdh_xmit_cleanup_inorder_packed(vq, need);
- need = slots - vq->vq_free_cnt;
- if (unlikely(need > 0)) {
- PMD_TX_LOG(ERR,
- " No enough %d free tx descriptors to transmit."
- "freecnt %d",
- need,
- vq->vq_free_cnt);
- break;
- }
- }
+ if (unlikely(slots > vq->vq_free_cnt))
+ break;
/* Enqueue Packet buffers */
if (can_push)
- zxdh_enqueue_xmit_packed_fast(txvq, txm);
+ zxdh_xmit_enqueue_push(txvq, txm);
else
- zxdh_enqueue_xmit_packed(txvq, txm, slots);
+ zxdh_xmit_enqueue_append(txvq, txm, slots);
zxdh_update_packet_stats(&txvq->stats, txm);
}
txvq->stats.packets += nb_tx;
@@ -1083,3 +1055,214 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
}
return nb_rx;
}
+
+static inline int pkt_padding(struct rte_mbuf *cookie, struct zxdh_hw *hw)
+{
+ uint16_t mtu_or_mss = 0;
+ uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
+ uint16_t l3_offset;
+ uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+ uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+ struct zxdh_pi_hdr *pi_hdr;
+ struct zxdh_pd_hdr_dl *pd_hdr;
+ struct zxdh_net_hdr_dl *net_hdr_dl = hw->net_hdr_dl;
+ uint8_t hdr_len = hw->dl_net_hdr_len;
+ uint16_t ol_flag = 0;
+ struct zxdh_net_hdr_dl *hdr;
+
+ hdr = (struct zxdh_net_hdr_dl *)rte_pktmbuf_prepend(cookie, hdr_len);
+ if (unlikely(hdr == NULL))
+ return -1;
+ rte_memcpy(hdr, net_hdr_dl, hdr_len);
+
+ if (hw->has_tx_offload) {
+ pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+ pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
+
+ pcode = ZXDH_PCODE_IP_PKT_TYPE;
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+ l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+ else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+ l3_ptype = ZXDH_PI_L3TYPE_IP;
+ else
+ pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS) ?
+ cookie->tso_segsz : ZXDH_MIN_MSS;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ } else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+ mtu_or_mss = hw->eth_dev->data->mtu;
+ mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ } else {
+ pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+ }
+
+ if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+ pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+ if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
+ pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ } else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+ RTE_MBUF_F_TX_TCP_CKSUM) {
+ pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+ pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+ }
+ pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+ pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+ pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+ l3_offset = hdr_len + cookie->l2_len;
+ l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+ cookie->outer_l2_len + cookie->outer_l3_len : 0;
+ pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+ pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+ ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+ } else {
+ pd_hdr = &hdr->pd_hdr;
+ }
+
+ pd_hdr->dst_vfid = rte_be_to_cpu_16(cookie->port);
+
+ if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+ pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+ if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
+ ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+ pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+ }
+ }
+
+ pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
+
+ return 0;
+}
+
+/*
+ * Populate N_PER_LOOP descriptors with data from N_PER_LOOP single-segment mbufs.
+ * Note: The simple transmit path (zxdh_xmit_pkts_simple) is selected only when
+ * RTE_ETH_TX_OFFLOAD_MULTI_SEGS is disabled, so all packets handled here are
+ * guaranteed to be single-segment.
+ */
+static inline void
+tx_bunch(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf **pkts, uint16_t start_id)
+{
+ uint16_t flags = vq->cached_flags;
+ int i;
+ for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
+ /* write data to descriptor */
+ txdp->addr = rte_mbuf_data_iova(*pkts);
+ txdp->len = (*pkts)->data_len;
+ txdp->id = start_id + i;
+ txdp->flags = flags;
+ }
+}
+
+/* Populate 1 descriptor with data from 1 single-segment mbuf */
+static inline void
+tx1(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+ struct rte_mbuf *pkts, uint16_t id)
+{
+ uint16_t flags = vq->cached_flags;
+ txdp->addr = rte_mbuf_data_iova(pkts);
+ txdp->len = pkts->data_len;
+ txdp->id = id;
+ txdp->flags = flags;
+}
+
+static void submit_to_backend_simple(struct zxdh_virtqueue *vq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_hw *hw = vq->hw;
+ struct rte_mbuf *m = NULL;
+ uint16_t id = vq->vq_avail_idx;
+ struct zxdh_vring_packed_desc *txdp = &vq->vq_packed.ring.desc[id];
+ struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+ int mainpart, leftover;
+ int i, j;
+
+ /*
+ * Process most of the packets in chunks of N pkts. Any
+ * leftover packets will get processed one at a time.
+ */
+ mainpart = (nb_pkts & ~N_PER_LOOP_MASK);
+ leftover = (nb_pkts & N_PER_LOOP_MASK);
+
+ for (i = 0; i < mainpart; i += N_PER_LOOP) {
+ rte_prefetch0(dxp + i);
+ rte_prefetch0(tx_pkts + i);
+ for (j = 0; j < N_PER_LOOP; ++j) {
+ m = *(tx_pkts + i + j);
+ if (unlikely(pkt_padding(m, hw) < 0)) {
+ vq->txq.stats.errors++;
+ continue;
+ }
+ (dxp + i + j)->cookie = (void *)m;
+ zxdh_update_packet_stats(&vq->txq.stats, m);
+ }
+ /* write data to descriptor */
+ tx_bunch(vq, txdp + i, tx_pkts + i, id + i);
+ }
+
+ if (leftover > 0) {
+ rte_prefetch0(dxp + mainpart);
+ rte_prefetch0(tx_pkts + mainpart);
+
+ for (i = 0; i < leftover; ++i) {
+ m = *(tx_pkts + mainpart + i);
+ if (unlikely(pkt_padding(m, hw) < 0)) {
+ vq->txq.stats.errors++;
+ continue;
+ }
+ (dxp + mainpart + i)->cookie = m;
+ tx1(vq, txdp + mainpart + i, *(tx_pkts + mainpart + i), id + mainpart + i);
+ zxdh_update_packet_stats(&vq->txq.stats, m);
+ }
+ }
+}
+
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct zxdh_virtnet_tx *txvq = tx_queue;
+ struct zxdh_virtqueue *vq = txq_get_vq(txvq);
+ uint16_t nb_tx = 0, nb_tx_left;
+
+ zxdh_xmit_fast_flush(vq);
+
+ nb_pkts = (uint16_t)RTE_MIN(nb_pkts, vq->vq_free_cnt);
+ if (unlikely(nb_pkts == 0)) {
+ txvq->stats.idle++;
+ return 0;
+ }
+
+ nb_tx_left = nb_pkts;
+ if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
+ nb_tx = vq->vq_nentries - vq->vq_avail_idx;
+ nb_tx_left = nb_pkts - nb_tx;
+ submit_to_backend_simple(vq, tx_pkts, nb_tx);
+ vq->vq_avail_idx = 0;
+ vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+
+ vq->vq_free_cnt -= nb_tx;
+ tx_pkts += nb_tx;
+ }
+ if (nb_tx_left) {
+ submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
+ vq->vq_avail_idx += nb_tx_left;
+ vq->vq_free_cnt -= nb_tx_left;
+ }
+
+ zxdh_queue_notify(vq);
+ txvq->stats.packets += nb_pkts;
+
+ return nb_pkts;
+}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index dba9567414..783fb456de 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -56,18 +56,19 @@ struct __rte_cache_aligned zxdh_virtnet_rx {
struct __rte_cache_aligned zxdh_virtnet_tx {
struct zxdh_virtqueue *vq;
-
- rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
- uint16_t queue_id; /* DPDK queue index. */
- uint16_t port_id; /* Device port identifier. */
+ const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ rte_iova_t zxdh_net_hdr_mem; /* hdr for each xmit packet */
struct zxdh_virtnet_stats stats;
const struct rte_memzone *mz; /* mem zone to populate TX ring. */
- const struct rte_memzone *zxdh_net_hdr_mz; /* memzone to populate hdr. */
+ uint64_t offloads;
+ uint16_t queue_id; /* DPDK queue index. */
+ uint16_t port_id; /* Device port identifier. */
};
uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
#endif /* ZXDH_RXTX_H */
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 48018 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v4 4/4] net/zxdh: fix queue enable intr issues
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
` (2 preceding siblings ...)
2026-06-06 6:32 ` [PATCH v4 3/4] net/zxdh: optimize Tx xmit " Junlong Wang
@ 2026-06-06 6:32 ` Junlong Wang
2026-06-07 18:00 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Stephen Hemminger
4 siblings, 0 replies; 23+ messages in thread
From: Junlong Wang @ 2026-06-06 6:32 UTC (permalink / raw)
To: stephen; +Cc: dev, Junlong Wang, stable
[-- Attachment #1.1.1: Type: text/plain, Size: 1146 bytes --]
Fix incorrect condition check in zxdh_queue_enable_intr.
Change "==" to "!=", consistent with zxdh_queue_disable_intr logic,
to properly enable interrupts when event_flags_shadow is not
already set to ENABLE state.
Fixes: 7677f3871ef3 ("net/zxdh: setup Rx/Tx queues and interrupt")
Cc: stable@dpdk.org
Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
drivers/net/zxdh/zxdh_queue.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 94101c8269..46d441e933 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -348,8 +348,8 @@ zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
static inline void
zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
{
- if (vq->event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ if (vq->event_flags_shadow == ZXDH_RING_EVENT_FLAGS_ENABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
}
--
2.27.0
[-- Attachment #1.1.2: Type: text/html , Size: 1952 bytes --]
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
` (3 preceding siblings ...)
2026-06-06 6:32 ` [PATCH v4 4/4] net/zxdh: fix queue enable intr issues Junlong Wang
@ 2026-06-07 18:00 ` Stephen Hemminger
4 siblings, 0 replies; 23+ messages in thread
From: Stephen Hemminger @ 2026-06-07 18:00 UTC (permalink / raw)
To: Junlong Wang; +Cc: dev
On Sat, 6 Jun 2026 14:32:21 +0800
Junlong Wang <wang.junlong1@zte.com.cn> wrote:
> v4:
> - fix some AI review issues.
> - fix queue enable intr bug.
>
> v3:
> - remove unnecessary NULL check in zxdh_init_queue.
> - Split Ring: Bit[31] is unused and reserved, zxdh_queue_notify(): removing the
> zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) check;
> - remove unnecessary double-free in in zxdh_recv_single_pkts();
> - used rte_pktmbuf_mtod();
> - remove rxq_get_vq(q) macro, use q->vq and apply it consistently;
> - Refactoring scatter and mtu check logic in zxdh_dev_mtu_set();
> - set txdp->id = avail_idx + i in tx_bunch/tx1.
> - add comment documenting zxdh_xmit_enqueue_append() now sets dxp->cookie = NULL for
> the head slot and stores cookies per descriptor via dep[idx].cookie.
> - add one-line comment noting tx_bunch() is the simple path handles single-segment.
> - remove unnecessary Extra initialization and the uint32_t cast.
>
> v2:
> - zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
> - zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
> the loop does "continue" and free mbufs;
> - zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
> in the refill_que_descs();
> - zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
> - zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;
>
> - zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
> - zxdh_rxtx.c, zxdh_init_mbuf():remove rte_pktmbuf_dump(stdout, rxm, 40);
> - zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
> - Splitting into separate patches, structure reorganization and sw_ring removal、
> RX recv optimize、Tx xmit optimize、Tx;
>
> v1:
> This patch optimizes the ZXDH PMD's receive and transmit path for better
> performance through several improvements:
>
> - Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
> zxdh_recv_single_pkts) for single-segment packet scenarios.
> - Remove RX software ring (sw_ring) to reduce memory allocation and
> copy.
> - Optimize descriptor management with prefetching and simplified
> cleanup.
> - Reorganize structure fields for better cache locality.
>
> These changes reduce CPU cycles and memory bandwidth consumption,
> resulting in improved packet processing throughput.
>
> Junlong Wang (4):
> net/zxdh: optimize queue structure to improve performance
> net/zxdh: optimize Rx recv pkts performance
> net/zxdh: optimize Tx xmit pkts performance
> net/zxdh: fix queue enable intr issues
>
> drivers/net/zxdh/zxdh_ethdev.c | 81 ++---
> drivers/net/zxdh/zxdh_ethdev_ops.c | 23 +-
> drivers/net/zxdh/zxdh_ethdev_ops.h | 4 +
> drivers/net/zxdh/zxdh_pci.c | 2 +-
> drivers/net/zxdh/zxdh_queue.c | 11 +-
> drivers/net/zxdh/zxdh_queue.h | 120 +++----
> drivers/net/zxdh/zxdh_rxtx.c | 534 ++++++++++++++++++++++-------
> drivers/net/zxdh/zxdh_rxtx.h | 27 +-
> 8 files changed, 543 insertions(+), 259 deletions(-)
>
Multiple issues reported by AI review. I ran in Claude to get better summary.
That way it can look at progress across revisions in the series.
Series review: net/zxdh Rx/Tx optimization (v4)
The v3 issues are addressed: pkt_padding() now checks the
rte_pktmbuf_prepend() return for NULL, the simple-path statistics no
longer read past the end of tx_pkts on a ring wrap, and the
zxdh_queue_enable_intr() fix has been split into its own patch 4/4
with a Fixes: tag and Cc: stable. Remaining issues below.
[PATCH v4 2/4] net/zxdh: optimize Rx recv pkts performance
Error: zxdh_recv_single_pkts() does not compact rcv_pkts[] on a
mid-burst failure, so a freed mbuf can be returned to the application
and a valid mbuf can be leaked.
for (i = 0; i < num; i++) {
struct rte_mbuf *rxm = rcv_pkts[i];
uint16_t len = lens[i];
if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
continue;
zxdh_update_packet_stats(&rxvq->stats, rxm);
nb_rx++;
}
zxdh_init_mbuf() frees rxm on failure (num_buffers != 1, or
data_len != pkt_len). The loop then skips it via continue but never
writes the surviving mbufs down to rcv_pkts[nb_rx]. If packet i fails
and a later packet j > i succeeds, the valid mbuf stays at rcv_pkts[j]
while the caller is told only nb_rx packets are valid. The caller reads
rcv_pkts[0..nb_rx-1], which now includes the freed slot at index i
(use-after-free) and never sees the valid mbuf at index j (leak).
The companion zxdh_recv_pkts_packed() in this same file does it
correctly with rcv_pkts[nb_rx] = rxm. Mirror that here:
if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
continue;
rcv_pkts[nb_rx] = rxm;
zxdh_update_packet_stats(&rxvq->stats, rxm);
nb_rx++;
[PATCH v4 3/4] net/zxdh: optimize Tx xmit pkts performance
Error: tx_bunch() and tx1() publish the descriptor AVAIL flag with no
store barrier, so the device may observe the AVAIL bit before addr/len
are visible and DMA from a stale address.
for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
txdp->addr = rte_mbuf_data_iova(*pkts);
txdp->len = (*pkts)->data_len;
txdp->id = start_id + i;
txdp->flags = flags; /* AVAIL bit, no rte_io_wmb */
}
The packed ring requires the addr/len/id stores to be globally visible
before the flags store that sets the AVAIL bit. The rest of the driver
does this through zxdh_queue_store_flags_packed(), which issues
rte_io_wmb() before writing flags (used by the original
zxdh_enqueue_xmit_packed_fast() that this path replaces, and by
refill_que_descs() in patch 2/4). The direct txdp->flags = flags writes
here drop that barrier. volatile prevents only compiler reordering, not
hardware reordering on weakly-ordered architectures (arm, ppc), so this
is a real data race against the device.
Write addr/len/id for the batch, then a single rte_io_wmb(), then the
flags; or publish each flag through zxdh_queue_store_flags_packed().
Error: in submit_to_backend_simple(), a packet that fails pkt_padding()
is skipped for cookie tracking but its descriptor is still written and
published, and the mbuf leaks.
for (j = 0; j < N_PER_LOOP; ++j) {
m = *(tx_pkts + i + j);
if (unlikely(pkt_padding(m, hw) < 0)) {
vq->txq.stats.errors++;
continue; /* cookie not set, m not freed */
}
(dxp + i + j)->cookie = (void *)m;
zxdh_update_packet_stats(&vq->txq.stats, m);
}
/* writes ALL N_PER_LOOP descriptors, including the failed one */
tx_bunch(vq, txdp + i, tx_pkts + i, id + i);
When pkt_padding() returns < 0 the mbuf has no prepended downlink
header, but tx_bunch() still writes its iova/len into the descriptor and
sets the AVAIL bit, so a header-less packet is handed to the device.
The matching vq_descx[].cookie is left unset, so the mbuf is never
freed (leak) and, if the slot holds a stale cookie from a prior use,
the next flush double-frees it. vq_avail_idx and the returned count are
also advanced by the full nb_pkts, counting the dropped packet as sent.
The leftover loop handles this correctly (it calls tx1() per packet,
inside the padding check). The main loop needs the same structure:
either build the descriptor per packet after a successful pkt_padding(),
or track which slots succeeded and only publish those.
[PATCH v4 4/4] net/zxdh: fix queue enable intr issues
Error: the fix does not enable interrupts; the condition is inverted
relative to the commit message.
- if (vq->event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
- vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+ if (vq->event_flags_shadow == ZXDH_RING_EVENT_FLAGS_ENABLE) {
+ vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
ENABLE is 0x0 and DISABLE is 0x1. After a disable the shadow holds
DISABLE, which is the state from which the application re-arms the
interrupt via zxdh_dev_rx_queue_intr_enable(). With "== ENABLE" the
body runs only when interrupts are already enabled; when the shadow is
DISABLE the test is false and nothing happens, so interrupts are still
never enabled. The body now writes ENABLE, but the guard prevents it
from ever running in the case that matters.
The commit message says "Change == to !=", which is the correct fix,
but the diff changed the constant and kept ==. Mirror
zxdh_queue_disable_intr() (which uses != DISABLE):
if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
}
(Keeping the original "== DISABLE" guard and only changing the body
assignment to ENABLE is also correct and is the minimal fix for the
original bug.)
Warning: this Cc: stable fix depends on patch 1/4, which moved
event_flags_shadow out of vq_packed. The diff context here is
vq->event_flags_shadow, but the stable branches still have
vq->vq_packed.event_flags_shadow, so this patch will not cherry-pick
cleanly. Consider making the fix standalone against the current field
name and placing it first in the series so it backports without the
reorganization.
^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2026-06-07 18:00 UTC | newest]
Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-26 2:28 [PATCH v1] net/zxdh: optimize Rx/Tx path performance Junlong Wang
2026-03-26 3:27 ` Stephen Hemminger
2026-04-06 4:26 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 0/3] " Junlong Wang
2026-04-23 1:18 ` [PATCH v2 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
2026-04-23 18:57 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
2026-04-23 18:54 ` Stephen Hemminger
2026-04-23 23:39 ` Stephen Hemminger
2026-04-23 1:18 ` [PATCH v2 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
2026-04-23 19:23 ` [PATCH v2 0/3] net/zxdh: optimize Rx/Tx path performance Stephen Hemminger
2026-05-09 6:29 ` [PATCH v3 " Junlong Wang
2026-05-09 6:29 ` [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance Junlong Wang
2026-05-18 2:20 ` Stephen Hemminger
2026-05-09 6:29 ` [PATCH v3 2/3] net/zxdh: optimize Rx recv pkts performance Junlong Wang
2026-05-09 6:29 ` [PATCH v3 3/3] net/zxdh: optimize Tx xmit " Junlong Wang
2026-05-18 2:22 ` Stephen Hemminger
2026-06-06 6:32 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Junlong Wang
2026-06-06 6:32 ` [PATCH v4 1/4] net/zxdh: optimize queue structure to improve performance Junlong Wang
2026-06-06 6:32 ` [PATCH v4 2/4] net/zxdh: optimize Rx recv pkts performance Junlong Wang
2026-06-06 6:32 ` [PATCH v4 3/4] net/zxdh: optimize Tx xmit " Junlong Wang
2026-06-06 6:32 ` [PATCH v4 4/4] net/zxdh: fix queue enable intr issues Junlong Wang
2026-06-07 18:00 ` [PATCH v4 0/4] net/zxdh: optimize Rx/Tx path performance Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox