DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: liujie5@linkdatatechnology.com
To: stephen@networkplumber.org
Cc: dev@dpdk.org, Jie Liu <liujie5@linkdatatechnology.com>
Subject: [PATCH v6 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx
Date: Tue,  2 Jun 2026 23:52:20 +0800	[thread overview]
Message-ID: <20260602155240.1002602-2-liujie5@linkdatatechnology.com> (raw)
In-Reply-To: <20260602155240.1002602-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

Add AVX512 vector data path for Rx and Tx burst functions.
The decision to use AVX512 is based on:
1. CPU hardware flags (AVX512F, AVX512BW).
2. Compiler support (CC_AVX512_SUPPORT).
3. Max SIMD bitwidth configuration.

Performance shows approximately X% improvement in small packet
forwarding scenarios.

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 drivers/net/sxe2/meson.build            |  24 +
 drivers/net/sxe2/sxe2_drv_cmd.h         |  80 +--
 drivers/net/sxe2/sxe2_ethdev.c          |   2 +-
 drivers/net/sxe2/sxe2_txrx.c            |  92 ++-
 drivers/net/sxe2/sxe2_txrx_vec.c        |  46 +-
 drivers/net/sxe2/sxe2_txrx_vec.h        |  18 +-
 drivers/net/sxe2/sxe2_txrx_vec_avx512.c | 897 ++++++++++++++++++++++++
 7 files changed, 1099 insertions(+), 60 deletions(-)
 create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_avx512.c

diff --git a/drivers/net/sxe2/meson.build b/drivers/net/sxe2/meson.build
index 6b2eb75b0e..7bd0d8120c 100644
--- a/drivers/net/sxe2/meson.build
+++ b/drivers/net/sxe2/meson.build
@@ -15,6 +15,30 @@ includes += include_directories('../../common/sxe2')
 
 if arch_subdir == 'x86'
         sources += files('sxe2_txrx_vec_sse.c')
+
+        sxe2_avx512_cpu_support =(
+                cc.get_define('__AVX512F__', args: machine_args) != '' and
+                cc.get_define('__AVX512BW__', args: machine_args) != '')
+
+        sxe2_avx512_cc_support = (
+                not machine_args.contains('-mno-avx512f') and
+                cc.has_argument('-mavx512f') and
+                cc.has_argument('-mavx512bw'))
+
+        if sxe2_avx512_cpu_support == true or sxe2_avx512_cc_support == true
+                cflags += ['-DCC_AVX512_SUPPORT']
+                avx512_args = [cflags, '-mavx512f', '-mavx512bw']
+                if cc.has_argument('-march=skylake-avx512')
+                        avx512_args += '-march=skylake-avx512'
+                endif
+                sxe2_avx512_lib = static_library('sxe2_avx512_lib', 'sxe2_txrx_vec_avx512.c',
+                        dependencies: [static_rte_ethdev,
+                        static_rte_kvargs, static_rte_hash,
+                        static_rte_security, static_rte_cryptodev, static_rte_bus_pci],
+                        include_directories: includes,
+                        c_args: avx512_args)
+                objs += sxe2_avx512_lib.extract_objects('sxe2_txrx_vec_avx512.c')
+        endif
 endif
 
 sources += files(
diff --git a/drivers/net/sxe2/sxe2_drv_cmd.h b/drivers/net/sxe2/sxe2_drv_cmd.h
index bba6476c2e..ccc9c20ef4 100644
--- a/drivers/net/sxe2/sxe2_drv_cmd.h
+++ b/drivers/net/sxe2/sxe2_drv_cmd.h
@@ -67,20 +67,20 @@ enum sxe2_dev_type {
 	SXE2_DEV_T_MAX,
 };
 
-struct sxe2_drv_queue_caps {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_queue_caps {
 	uint16_t queues_cnt;
 	uint16_t base_idx_in_pf;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_msix_caps {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_msix_caps {
 	uint16_t msix_vectors_cnt;
 	uint16_t base_idx_in_func;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_rss_hash_caps {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_rss_hash_caps {
 	uint16_t hash_key_size;
 	uint16_t lut_key_size;
-};
+} __rte_packed_end;
 
 enum sxe2_vf_vsi_valid {
 	SXE2_VF_VSI_BOTH = 0,
@@ -89,18 +89,18 @@ enum sxe2_vf_vsi_valid {
 	SXE2_VF_VSI_MAX,
 };
 
-struct sxe2_drv_vsi_caps {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_caps {
 	uint16_t func_id;
 	uint16_t dpdk_vsi_id;
 	uint16_t kernel_vsi_id;
 	uint16_t vsi_type;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_representor_caps {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_representor_caps {
 	uint16_t cnt_repr_vf;
 	uint8_t rsv[2];
 	struct sxe2_drv_vsi_caps repr_vf_id[256];
-};
+} __rte_packed_end;
 
 enum sxe2_phys_port_name_type {
 	SXE2_PHYS_PORT_NAME_TYPE_NOTSET = 0,
@@ -111,25 +111,25 @@ enum sxe2_phys_port_name_type {
 	SXE2_PHYS_PORT_NAME_TYPE_UNKNOWN,
 };
 
-struct sxe2_switchdev_mode_info {
+struct __rte_aligned(4) __rte_packed_begin sxe2_switchdev_mode_info {
 	uint8_t pf_id;
 	uint8_t is_switchdev;
 	uint8_t rsv[2];
-};
+} __rte_packed_end;
 
-struct sxe2_switchdev_cpvsi_info {
+struct __rte_aligned(4) __rte_packed_begin sxe2_switchdev_cpvsi_info {
 	uint16_t cp_vsi_id;
 	uint8_t rsv[2];
-};
+} __rte_packed_end;
 
-struct sxe2_txsch_caps {
+struct __rte_aligned(4) __rte_packed_begin sxe2_txsch_caps {
 	uint8_t layer_cap;
 	uint8_t tm_mid_node_num;
 	uint8_t prio_num;
 	uint8_t rev;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_dev_caps_resp {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_dev_caps_resp {
 	struct sxe2_drv_queue_caps queue_caps;
 	struct sxe2_drv_msix_caps msix_caps;
 	struct sxe2_drv_rss_hash_caps rss_hash_caps;
@@ -141,24 +141,24 @@ struct sxe2_drv_dev_caps_resp {
 	uint8_t dev_type;
 	uint8_t rev;
 	uint32_t cap_flags;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_dev_info_resp {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_dev_info_resp {
 	uint64_t dsn;
 	uint16_t vsi_id;
 	uint8_t rsv[2];
 	uint8_t mac_addr[SXE2_ETH_ALEN];
 	uint8_t rsv2[2];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_dev_fw_info_resp {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_dev_fw_info_resp {
 	uint8_t main_version_id;
 	uint8_t sub_version_id;
 	uint8_t fix_version_id;
 	uint8_t build_id;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_rxq_ctxt {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_rxq_ctxt {
 	uint64_t dma_addr;
 	uint32_t max_lro_size;
 	uint32_t split_type_mask;
@@ -170,62 +170,62 @@ struct sxe2_drv_rxq_ctxt {
 	uint8_t keep_crc_en;
 	uint8_t split_en;
 	uint8_t desc_size;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_rxq_cfg_req {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_rxq_cfg_req {
 	uint16_t q_cnt;
 	uint16_t vsi_id;
 	uint16_t max_frame_size;
 	uint8_t rsv[2];
 	struct sxe2_drv_rxq_ctxt cfg[];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_txq_ctxt {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_txq_ctxt {
 	uint64_t dma_addr;
 	uint32_t sched_mode;
 	uint16_t queue_id;
 	uint16_t depth;
 	uint16_t vsi_id;
 	uint8_t rsv[2];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_txq_cfg_req {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_txq_cfg_req {
 	uint16_t q_cnt;
 	uint16_t vsi_id;
 	struct sxe2_drv_txq_ctxt cfg[];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_q_switch_req {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_q_switch_req {
 	uint16_t q_idx;
 	uint16_t vsi_id;
 	uint8_t is_enable;
 	uint8_t sched_mode;
 	uint8_t rsv[2];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_vsi_create_req_resp {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_create_req_resp {
 	uint16_t vsi_id;
 	uint16_t vsi_type;
 	struct sxe2_drv_queue_caps used_queues;
 	struct sxe2_drv_msix_caps used_msix;
-};
+} __rte_packed_end;
 
-struct sxe2_drv_vsi_free_req {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_free_req {
 	uint16_t vsi_id;
 	uint8_t rsv[2];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_vsi_info_get_req {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_info_get_req {
 	uint16_t vsi_id;
 	uint8_t rsv[2];
-};
+} __rte_packed_end;
 
-struct sxe2_drv_vsi_info_get_resp {
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_info_get_resp {
 	uint16_t vsi_id;
 	uint16_t vsi_type;
 	struct sxe2_drv_queue_caps used_queues;
 	struct sxe2_drv_msix_caps used_msix;
-};
+} __rte_packed_end;
 
 enum sxe2_drv_cmd_module {
 	SXE2_DRV_CMD_MODULE_HANDSHAKE = 0,
diff --git a/drivers/net/sxe2/sxe2_ethdev.c b/drivers/net/sxe2/sxe2_ethdev.c
index 8d66e5d8c5..e0f7002138 100644
--- a/drivers/net/sxe2/sxe2_ethdev.c
+++ b/drivers/net/sxe2/sxe2_ethdev.c
@@ -891,7 +891,7 @@ static int32_t sxe2_eth_pmd_probe_pf(struct sxe2_common_device *cdev,
 static int32_t sxe2_parse_eth_devargs(struct rte_device *dev,
 			  struct rte_eth_devargs *eth_da)
 {
-	int ret = 0;
+	int32_t ret = 0;
 
 	if (dev->devargs == NULL)
 		return 0;
diff --git a/drivers/net/sxe2/sxe2_txrx.c b/drivers/net/sxe2/sxe2_txrx.c
index 8d17535301..aa1c474088 100644
--- a/drivers/net/sxe2/sxe2_txrx.c
+++ b/drivers/net/sxe2/sxe2_txrx.c
@@ -157,6 +157,19 @@ void sxe2_tx_mode_func_set(struct rte_eth_dev *dev)
 		if (ret == 0 &&
 		    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
 			tx_mode_flags = vec_flags;
+#ifdef RTE_ARCH_X86
+			if ((rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) &&
+			    (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+			    (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)) {
+#ifdef CC_AVX512_SUPPORT
+				tx_mode_flags |= SXE2_TX_MODE_VEC_AVX512;
+#else
+				PMD_LOG_INFO(TX, "AVX512 is not supported in build env.");
+#endif
+			}
+			if ((tx_mode_flags & SXE2_TX_MODE_VEC_SET_MASK) == 0)
+				tx_mode_flags |= SXE2_TX_MODE_VEC_SSE;
+#endif
 			if (tx_mode_flags & SXE2_TX_MODE_VEC_SET_MASK) {
 				ret = sxe2_tx_queues_vec_prepare(dev);
 				if (ret != 0)
@@ -172,14 +185,25 @@ void sxe2_tx_mode_func_set(struct rte_eth_dev *dev)
 		tx_mode_flags = adapter->q_ctxt.tx_mode_flags;
 	}
 
-#ifdef RTE_ARCH_X86
 	if (tx_mode_flags & SXE2_TX_MODE_VEC_SET_MASK) {
-		if (tx_mode_flags & SXE2_TX_MODE_VEC_OFFLOAD) {
-			dev->tx_pkt_prepare = sxe2_tx_pkts_prepare;
-			dev->tx_pkt_burst = sxe2_tx_pkts_vec_sse;
+		dev->tx_pkt_prepare = NULL;
+#ifdef RTE_ARCH_X86
+		if (tx_mode_flags & SXE2_TX_MODE_VEC_AVX512) {
+#ifdef CC_AVX512_SUPPORT
+			if (tx_mode_flags & SXE2_TX_MODE_VEC_OFFLOAD) {
+				dev->tx_pkt_prepare = sxe2_tx_pkts_prepare;
+				dev->tx_pkt_burst = sxe2_tx_pkts_vec_avx512;
+			} else {
+				dev->tx_pkt_burst = sxe2_tx_pkts_vec_avx512_simple;
+			}
+#endif
 		} else {
-			dev->tx_pkt_prepare = NULL;
-			dev->tx_pkt_burst = sxe2_tx_pkts_vec_sse_simple;
+			if (tx_mode_flags & SXE2_TX_MODE_VEC_OFFLOAD) {
+				dev->tx_pkt_prepare = sxe2_tx_pkts_prepare;
+				dev->tx_pkt_burst = sxe2_tx_pkts_vec_sse;
+			} else {
+				dev->tx_pkt_burst = sxe2_tx_pkts_vec_sse_simple;
+			}
 		}
 	} else {
 #endif
@@ -201,8 +225,16 @@ static const struct {
 } sxe2_tx_burst_infos[] = {
 	{ sxe2_tx_pkts,   "Scalar" },
 #ifdef RTE_ARCH_X86
-	{ sxe2_tx_pkts_vec_sse,        "Vector SSE" },
-	{ sxe2_tx_pkts_vec_sse_simple, "Vector SSE Simple" },
+#ifdef CC_AVX512_SUPPORT
+	{ sxe2_tx_pkts_vec_avx512,
+	      "Vector AVX512" },
+	{ sxe2_tx_pkts_vec_avx512_simple,
+	      "Vector AVX512 Simple" },
+#endif
+	{ sxe2_tx_pkts_vec_sse,
+	      "Vector SSE" },
+	{ sxe2_tx_pkts_vec_sse_simple,
+	      "Vector SSE Simple" },
 #endif
 };
 
@@ -288,6 +320,20 @@ void sxe2_rx_mode_func_set(struct rte_eth_dev *dev)
 		if (ret == 0 &&
 		    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
 			rx_mode_flags = vec_flags;
+#ifdef RTE_ARCH_X86
+			if ((rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) &&
+				(rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				(rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)) {
+#ifdef CC_AVX512_SUPPORT
+				rx_mode_flags |= SXE2_RX_MODE_VEC_AVX512;
+#else
+				PMD_LOG_INFO(RX, "AVX512 support detected but not enabled");
+#endif
+			}
+			if ((rx_mode_flags & SXE2_RX_MODE_VEC_SET_MASK) == 0 &&
+				rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
+				rx_mode_flags |= SXE2_RX_MODE_VEC_SSE;
+#endif
 			if ((rx_mode_flags & SXE2_RX_MODE_VEC_SET_MASK) != 0) {
 				ret = sxe2_rx_queues_vec_prepare(dev);
 				if (ret != 0)
@@ -301,7 +347,16 @@ void sxe2_rx_mode_func_set(struct rte_eth_dev *dev)
 
 #ifdef RTE_ARCH_X86
 	if (rx_mode_flags & SXE2_RX_MODE_VEC_SET_MASK) {
-		dev->rx_pkt_burst = sxe2_rx_pkts_scattered_vec_sse_offload;
+		if (rx_mode_flags & SXE2_RX_MODE_VEC_AVX512) {
+#ifdef CC_AVX512_SUPPORT
+			if (rx_mode_flags & SXE2_RX_MODE_VEC_OFFLOAD)
+				dev->rx_pkt_burst = sxe2_rx_pkts_scattered_vec_avx512_offload;
+			else
+				dev->rx_pkt_burst = sxe2_rx_pkts_scattered_vec_avx512;
+#endif
+		} else {
+			dev->rx_pkt_burst = sxe2_rx_pkts_scattered_vec_sse_offload;
+		}
 		return;
 	}
 #endif
@@ -315,19 +370,30 @@ static const struct {
 	eth_rx_burst_t rx_burst;
 	const char *info;
 } sxe2_rx_burst_infos[] = {
-	{ sxe2_rx_pkts_scattered,          "Scalar Scattered" },
-	{ sxe2_rx_pkts_scattered_split,          "Scalar Scattered split" },
+	{ sxe2_rx_pkts_scattered,
+	      "Scalar Scattered" },
+	{ sxe2_rx_pkts_scattered_split,
+	      "Scalar Scattered split" },
 #ifdef RTE_ARCH_X86
-	{ sxe2_rx_pkts_scattered_vec_sse_offload,      "Vector SSE Scattered" },
+#ifdef CC_AVX512_SUPPORT
+	{ sxe2_rx_pkts_scattered_vec_avx512,
+	      "Vector AVX512 Scattered" },
+	{ sxe2_rx_pkts_scattered_vec_avx512_offload,
+	      "Offload Vector AVX512 Scattered" },
+#endif
+	{ sxe2_rx_pkts_scattered_vec_sse_offload,
+	      "Vector SSE Scattered" },
 #endif
 };
 
 int32_t sxe2_rx_burst_mode_get(struct rte_eth_dev *dev,
-			__rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode)
+			       __rte_unused uint16_t queue_id,
+			       struct rte_eth_burst_mode *mode)
 {
 	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
 	int32_t ret = -EINVAL;
 	uint32_t i, size;
+
 	size = RTE_DIM(sxe2_rx_burst_infos);
 	for (i = 0; i < size; ++i) {
 		if (pkt_burst == sxe2_rx_burst_infos[i].rx_burst) {
diff --git a/drivers/net/sxe2/sxe2_txrx_vec.c b/drivers/net/sxe2/sxe2_txrx_vec.c
index 8df4954d86..cf004f5eb2 100644
--- a/drivers/net/sxe2/sxe2_txrx_vec.c
+++ b/drivers/net/sxe2/sxe2_txrx_vec.c
@@ -165,16 +165,54 @@ static void sxe2_tx_queue_mbufs_release_vec(struct sxe2_tx_queue *txq)
 		return;
 	}
 	i = txq->next_dd - (txq->rs_thresh - 1);
-	buffer = txq->buffer_ring;
-	if (txq->next_use < i) {
-		for ( ; i < txq->ring_depth; ++i) {
+#ifdef CC_AVX512_SUPPORT
+	struct rte_eth_dev *dev;
+	struct sxe2_tx_buffer_vec *buffer_vec;
+
+	dev = &rte_eth_devices[txq->port_id];
+
+	if (dev->tx_pkt_burst == sxe2_tx_pkts_vec_avx512 ||
+		dev->tx_pkt_burst == sxe2_tx_pkts_vec_avx512_simple) {
+		buffer_vec = (struct sxe2_tx_buffer_vec *)txq->buffer_ring;
+
+		if (txq->next_use < i) {
+			for ( ; i < txq->ring_depth; ++i) {
+				if (buffer_vec[i].mbuf != NULL) {
+					rte_pktmbuf_free_seg(buffer_vec[i].mbuf);
+					buffer_vec[i].mbuf = NULL;
+				}
+			}
+			i = 0;
+		}
+		for ( ; i < txq->next_use; ++i) {
+			if (buffer_vec[i].mbuf != NULL) {
+				rte_pktmbuf_free_seg(buffer_vec[i].mbuf);
+				buffer_vec[i].mbuf = NULL;
+			}
+		}
+	} else {
+#endif
+		buffer = txq->buffer_ring;
+		buffer = txq->buffer_ring;
+		if (txq->next_use < i) {
+			for ( ; i < txq->ring_depth; ++i) {
+				if (buffer[i].mbuf != NULL) {
+					rte_pktmbuf_free_seg(buffer[i].mbuf);
+					buffer[i].mbuf = NULL;
+				}
+			}
+			i = 0;
+		}
+		for (; i < txq->next_use; ++i) {
 			if (buffer[i].mbuf != NULL) {
 				rte_pktmbuf_free_seg(buffer[i].mbuf);
 				buffer[i].mbuf = NULL;
 			}
 		}
-		i = 0;
+#ifdef CC_AVX512_SUPPORT
 	}
+#endif
+
 	for (; i < txq->next_use; ++i) {
 		if (buffer[i].mbuf != NULL) {
 			rte_pktmbuf_free_seg(buffer[i].mbuf);
diff --git a/drivers/net/sxe2/sxe2_txrx_vec.h b/drivers/net/sxe2/sxe2_txrx_vec.h
index 04ff4d96a5..af7c8d12b2 100644
--- a/drivers/net/sxe2/sxe2_txrx_vec.h
+++ b/drivers/net/sxe2/sxe2_txrx_vec.h
@@ -11,15 +11,19 @@
 #define SXE2_RX_MODE_VEC_SIMPLE    RTE_BIT32(0)
 #define SXE2_RX_MODE_VEC_OFFLOAD   RTE_BIT32(1)
 #define SXE2_RX_MODE_VEC_SSE       RTE_BIT32(2)
+#define SXE2_RX_MODE_VEC_AVX512    RTE_BIT32(4)
 #define SXE2_RX_MODE_BATCH_ALLOC   RTE_BIT32(10)
 #define SXE2_RX_MODE_VEC_SET_MASK	(SXE2_RX_MODE_VEC_SIMPLE | \
-			SXE2_RX_MODE_VEC_OFFLOAD | SXE2_RX_MODE_VEC_SSE)
+			SXE2_RX_MODE_VEC_OFFLOAD | SXE2_RX_MODE_VEC_SSE | \
+			SXE2_RX_MODE_VEC_AVX512)
 #define SXE2_TX_MODE_VEC_SIMPLE   RTE_BIT32(0)
 #define SXE2_TX_MODE_VEC_OFFLOAD  RTE_BIT32(1)
 #define SXE2_TX_MODE_VEC_SSE      RTE_BIT32(2)
+#define SXE2_TX_MODE_VEC_AVX512   RTE_BIT32(4)
 #define SXE2_TX_MODE_SIMPLE_BATCH RTE_BIT32(10)
 #define SXE2_TX_MODE_VEC_SET_MASK	(SXE2_TX_MODE_VEC_SIMPLE | \
-			SXE2_TX_MODE_VEC_OFFLOAD | SXE2_TX_MODE_VEC_SSE)
+			SXE2_TX_MODE_VEC_OFFLOAD | SXE2_TX_MODE_VEC_SSE | \
+			SXE2_TX_MODE_VEC_AVX512)
 #define SXE2_TX_VEC_NO_SUPPORT_OFFLOAD (		  \
 			RTE_ETH_TX_OFFLOAD_MULTI_SEGS |		  \
 			RTE_ETH_TX_OFFLOAD_QINQ_INSERT |	  \
@@ -54,6 +58,16 @@ uint16_t sxe2_tx_pkts_vec_sse(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_
 uint16_t sxe2_tx_pkts_vec_sse_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t sxe2_rx_pkts_scattered_vec_sse_offload(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t sxe2_tx_pkts_vec_avx512_simple(void *tx_queue,
+		struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+uint16_t sxe2_tx_pkts_vec_avx512(void *tx_queue,
+		struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+uint16_t sxe2_tx_pkts_vec_avx512_ctx_offload(void *tx_queue,
+		struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+uint16_t sxe2_rx_pkts_scattered_vec_avx512(void *rx_queue,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t sxe2_rx_pkts_scattered_vec_avx512_offload(void *rx_queue,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 #endif
 int32_t __rte_cold sxe2_tx_vec_support_check(struct rte_eth_dev *dev, uint32_t *vec_flags);
 int32_t __rte_cold sxe2_tx_queues_vec_prepare(struct rte_eth_dev *dev);
diff --git a/drivers/net/sxe2/sxe2_txrx_vec_avx512.c b/drivers/net/sxe2/sxe2_txrx_vec_avx512.c
new file mode 100644
index 0000000000..2aec8037dd
--- /dev/null
+++ b/drivers/net/sxe2/sxe2_txrx_vec_avx512.c
@@ -0,0 +1,897 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2025, Wuxi Stars Micro System Technologies Co., Ltd.
+ */
+
+#ifndef SXE2_TEST
+#include <rte_vect.h>
+
+#include "sxe2_ethdev.h"
+#include "sxe2_common_log.h"
+#include "sxe2_queue.h"
+#include "sxe2_txrx_vec.h"
+#include "sxe2_txrx_vec_common.h"
+#include "sxe2_vsi.h"
+
+static __rte_always_inline int32_t sxe2_tx_bufs_free_vec_avx512(struct sxe2_tx_queue *txq)
+{
+	struct sxe2_tx_buffer_vec *buffer;
+	struct rte_mbuf *mbuf;
+	struct rte_mbuf *mbuf_free_arr[SXE2_TX_FREE_BUFFER_SIZE_MAX_VEC];
+	struct rte_mempool *mp;
+	struct rte_mempool_cache *cache;
+	void **cache_objs;
+	uint32_t copied;
+	uint32_t i;
+	int32_t ret;
+	uint16_t rs_thresh;
+	uint16_t free_num;
+
+	if (rte_cpu_to_le_64(SXE2_TX_DESC_DTYPE_DESC_DONE) !=
+		(txq->desc_ring[txq->next_dd].wb.dd &
+			rte_cpu_to_le_64(SXE2_TX_DESC_DTYPE_MASK))) {
+		ret = 0;
+		goto l_end;
+	}
+
+	rs_thresh = txq->rs_thresh;
+
+	buffer = (struct sxe2_tx_buffer_vec *)txq->buffer_ring;
+	buffer += txq->next_dd - (rs_thresh - 1);
+
+	if ((txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) &&
+			(rs_thresh & 31) == 0) {
+		mp = buffer[0].mbuf->pool;
+		cache = rte_mempool_default_cache(mp, rte_lcore_id());
+
+		if (cache == NULL || cache->len)
+			goto normal;
+
+		if (rs_thresh > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+			(void)rte_mempool_ops_enqueue_bulk(mp, (void *)buffer, rs_thresh);
+			goto done;
+		}
+		cache_objs = &cache->objs[cache->len];
+
+		copied = 0;
+		while (copied < rs_thresh) {
+			const __m512i objs0 = _mm512_loadu_si512(&buffer[copied]);
+			const __m512i objs1 = _mm512_loadu_si512(&buffer[copied + 8]);
+			const __m512i objs2 = _mm512_loadu_si512(&buffer[copied + 16]);
+			const __m512i objs3 = _mm512_loadu_si512(&buffer[copied + 24]);
+
+			_mm512_storeu_si512(&cache_objs[copied], objs0);
+			_mm512_storeu_si512(&cache_objs[copied + 8], objs1);
+			_mm512_storeu_si512(&cache_objs[copied + 16], objs2);
+			_mm512_storeu_si512(&cache_objs[copied + 24], objs3);
+			copied += 32;
+		}
+		cache->len += rs_thresh;
+
+		if (cache->len >= cache->flushthresh) {
+			(void)rte_mempool_ops_enqueue_bulk(mp,
+					&cache->objs[cache->size], cache->len - cache->size);
+			cache->len = cache->size;
+		}
+		goto done;
+	}
+
+normal:
+	mbuf = rte_pktmbuf_prefree_seg(buffer[0].mbuf);
+
+	if (likely(mbuf)) {
+		mbuf_free_arr[0] = mbuf;
+		free_num = 1;
+
+		for (i = 1; i < rs_thresh; ++i) {
+			mbuf = rte_pktmbuf_prefree_seg(buffer[i].mbuf);
+
+			if (likely(mbuf)) {
+				if (likely(mbuf->pool == mbuf_free_arr[0]->pool)) {
+					mbuf_free_arr[free_num] = mbuf;
+					free_num++;
+				} else {
+					rte_mempool_put_bulk(mbuf_free_arr[0]->pool,
+						(void *)mbuf_free_arr, free_num);
+
+				mbuf_free_arr[0] = mbuf;
+				free_num = 1;
+			}
+			}
+		}
+
+		rte_mempool_put_bulk(mbuf_free_arr[0]->pool,
+						(void *)mbuf_free_arr, free_num);
+	} else {
+		for (i = 1; i < rs_thresh; ++i) {
+			mbuf = rte_pktmbuf_prefree_seg(buffer[i].mbuf);
+			if (mbuf != NULL)
+				rte_mempool_put(mbuf->pool, mbuf);
+		}
+	}
+
+done:
+	txq->desc_free_num += txq->rs_thresh;
+	txq->next_dd       += txq->rs_thresh;
+	if (txq->next_dd >= txq->ring_depth)
+		txq->next_dd = txq->rs_thresh - 1;
+	ret = rs_thresh;
+
+l_end:
+	return ret;
+}
+
+static __rte_always_inline void
+sxe2_tx_desc_fill_one_avx512(volatile union sxe2_tx_data_desc *desc, struct rte_mbuf *pkt,
+	uint64_t desc_cmd, bool with_offloads)
+{
+	__m128i data_desc;
+	uint64_t desc_qw1;
+	uint32_t desc_offset;
+
+	desc_qw1 = (SXE2_TX_DESC_DTYPE_DATA |
+				((uint64_t)desc_cmd) << SXE2_TX_DATA_DESC_CMD_SHIFT |
+				((uint64_t)pkt->data_len) << SXE2_TX_DATA_DESC_BUF_SZ_SHIFT);
+	desc_offset = SXE2_TX_DATA_DESC_MACLEN_VAL(pkt->l2_len);
+	desc_qw1 |= ((uint64_t)desc_offset) << SXE2_TX_DATA_DESC_OFFSET_SHIFT;
+	if (with_offloads)
+		sxe2_tx_desc_fill_offloads(pkt, &desc_qw1);
+
+	data_desc = _mm_set_epi64x(desc_qw1, rte_pktmbuf_iova(pkt));
+
+	_mm_store_si128(RTE_CAST_PTR(__m128i *, desc), data_desc);
+}
+
+static __rte_always_inline
+void sxe2_tx_desc_fill_avx512(volatile union sxe2_tx_data_desc *desc, struct rte_mbuf **pkts,
+	uint16_t pkts_num, uint64_t desc_cmd, bool with_offloads)
+{
+	__m512i desc_group;
+	uint64_t desc0_qw1;
+	uint64_t desc1_qw1;
+	uint64_t desc2_qw1;
+	uint64_t desc3_qw1;
+
+	const uint64_t desc_qw1_com = (SXE2_TX_DESC_DTYPE_DATA |
+					((uint64_t)desc_cmd) << SXE2_TX_DATA_DESC_CMD_SHIFT);
+	uint32_t desc_offset[4] = {0};
+
+	while (pkts_num > 3) {
+		desc3_qw1 = desc_qw1_com |
+				((uint64_t)pkts[3]->data_len) << SXE2_TX_DATA_DESC_BUF_SZ_SHIFT;
+
+		desc_offset[3] = SXE2_TX_DATA_DESC_MACLEN_VAL(pkts[3]->l2_len);
+		desc3_qw1 |= ((uint64_t)desc_offset[3]) << SXE2_TX_DATA_DESC_OFFSET_SHIFT;
+		if (with_offloads)
+			sxe2_tx_desc_fill_offloads(pkts[3], &desc3_qw1);
+
+		desc2_qw1 = desc_qw1_com |
+				((uint64_t)pkts[2]->data_len) << SXE2_TX_DATA_DESC_BUF_SZ_SHIFT;
+		desc_offset[2] = SXE2_TX_DATA_DESC_MACLEN_VAL(pkts[2]->l2_len);
+		desc2_qw1 |= ((uint64_t)desc_offset[2]) << SXE2_TX_DATA_DESC_OFFSET_SHIFT;
+		if (with_offloads)
+			sxe2_tx_desc_fill_offloads(pkts[2], &desc2_qw1);
+
+		desc1_qw1 = (desc_qw1_com |
+				((uint64_t)pkts[1]->data_len) << SXE2_TX_DATA_DESC_BUF_SZ_SHIFT);
+		desc_offset[1] = SXE2_TX_DATA_DESC_MACLEN_VAL(pkts[1]->l2_len);
+		desc1_qw1 |= ((uint64_t)desc_offset[1]) << SXE2_TX_DATA_DESC_OFFSET_SHIFT;
+		if (with_offloads)
+			sxe2_tx_desc_fill_offloads(pkts[1], &desc1_qw1);
+
+		desc0_qw1 = (desc_qw1_com |
+				((uint64_t)pkts[0]->data_len) << SXE2_TX_DATA_DESC_BUF_SZ_SHIFT);
+		desc_offset[0] = SXE2_TX_DATA_DESC_MACLEN_VAL(pkts[0]->l2_len);
+		desc0_qw1 |= ((uint64_t)desc_offset[0]) << SXE2_TX_DATA_DESC_OFFSET_SHIFT;
+		if (with_offloads)
+			sxe2_tx_desc_fill_offloads(pkts[0], &desc0_qw1);
+
+		desc_group =
+			_mm512_set_epi64(desc3_qw1, rte_pktmbuf_iova(pkts[3]),
+					 desc2_qw1, rte_pktmbuf_iova(pkts[2]),
+					 desc1_qw1, rte_pktmbuf_iova(pkts[1]),
+					 desc0_qw1, rte_pktmbuf_iova(pkts[0]));
+
+		_mm512_storeu_si512(RTE_CAST_PTR(void *, desc), desc_group);
+
+		pkts_num -= 4;
+		desc     += 4;
+		pkts     += 4;
+	}
+
+	while (pkts_num) {
+		sxe2_tx_desc_fill_one_avx512(desc, *pkts, desc_cmd, with_offloads);
+
+		pkts_num--;
+		desc++;
+		pkts++;
+	}
+}
+
+static __rte_always_inline void
+sxe2_tx_pkts_mbuf_fill_avx512(struct sxe2_tx_buffer_vec *buffer,
+	struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	uint16_t i;
+
+	for (i = 0; i < nb_pkts; ++i)
+		buffer[i].mbuf = tx_pkts[i];
+}
+
+static __rte_always_inline uint16_t
+sxe2_tx_pkts_vec_avx512_batch(struct sxe2_tx_queue *txq, struct rte_mbuf **tx_pkts,
+	uint16_t nb_pkts, bool with_offloads)
+{
+	volatile union sxe2_tx_data_desc *desc;
+	struct sxe2_tx_buffer_vec *buffer;
+	uint16_t next_use;
+	uint16_t res_num;
+	uint16_t tx_num;
+
+	if (txq->desc_free_num < txq->free_thresh)
+		(void)sxe2_tx_bufs_free_vec_avx512(txq);
+
+	nb_pkts = RTE_MIN(txq->desc_free_num, nb_pkts);
+	if (unlikely(nb_pkts == 0)) {
+		PMD_LOG_DEBUG(TX, "Tx pkts avx512 batch: may not enough free desc, "
+				"free_desc=%u, need_tx_pkts=%u",
+				txq->desc_free_num, nb_pkts);
+		goto l_end;
+	}
+	tx_num = nb_pkts;
+
+	next_use = txq->next_use;
+	desc     = &txq->desc_ring[next_use];
+	buffer   = (struct sxe2_tx_buffer_vec *)txq->buffer_ring;
+	buffer  += next_use;
+
+	txq->desc_free_num -= nb_pkts;
+
+	res_num = txq->ring_depth - txq->next_use;
+
+	if (tx_num >= res_num) {
+		sxe2_tx_pkts_mbuf_fill_avx512(buffer, tx_pkts, res_num);
+
+		sxe2_tx_desc_fill_avx512(desc, tx_pkts, res_num,
+					SXE2_TX_DATA_DESC_CMD_EOP, with_offloads);
+		tx_pkts += (res_num - 1);
+		desc    += (res_num - 1);
+
+		sxe2_tx_desc_fill_one_avx512(desc, *tx_pkts++,
+					(SXE2_TX_DATA_DESC_CMD_EOP | SXE2_TX_DATA_DESC_CMD_RS),
+					with_offloads);
+
+		tx_num -= res_num;
+
+		next_use     = 0;
+		txq->next_rs = txq->rs_thresh - 1;
+		desc         = txq->desc_ring;
+		buffer       = (struct sxe2_tx_buffer_vec *)txq->buffer_ring;
+	}
+
+	sxe2_tx_pkts_mbuf_fill_avx512(buffer, tx_pkts, tx_num);
+
+	sxe2_tx_desc_fill_avx512(desc, tx_pkts, tx_num,
+			SXE2_TX_DATA_DESC_CMD_EOP, with_offloads);
+
+	next_use += tx_num;
+	if (next_use > txq->next_rs) {
+		txq->desc_ring[txq->next_rs].read.type_cmd_off_bsz_l2t |=
+			rte_cpu_to_le_64(SXE2_TX_DATA_DESC_CMD_RS_MASK);
+
+		txq->next_rs += txq->rs_thresh;
+	}
+	txq->next_use = next_use;
+
+	SXE2_PCI_REG_WRITE_WC(txq->tdt_reg_addr, next_use);
+	PMD_LOG_DEBUG(TX, "port_id=%u queue_id=%u next_use=%u send_pkts=%u",
+			txq->port_id, txq->queue_id, next_use, nb_pkts);
+l_end:
+	return nb_pkts;
+}
+
+static __rte_always_inline uint16_t
+sxe2_tx_pkts_vec_avx512_common(struct sxe2_tx_queue *txq, struct rte_mbuf **tx_pkts,
+	uint16_t nb_pkts, bool with_offloads)
+{
+	uint16_t tx_done_num = 0;
+	uint16_t tx_once_num;
+	uint16_t tx_need_num;
+
+	while (nb_pkts) {
+		tx_need_num = RTE_MIN(nb_pkts, txq->rs_thresh);
+		tx_once_num =
+			sxe2_tx_pkts_vec_avx512_batch(txq, tx_pkts + tx_done_num,
+					     tx_need_num, with_offloads);
+		nb_pkts     -= tx_once_num;
+		tx_done_num += tx_once_num;
+		if (tx_once_num < tx_need_num)
+			break;
+	}
+
+	return tx_done_num;
+}
+
+uint16_t sxe2_tx_pkts_vec_avx512_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	return sxe2_tx_pkts_vec_avx512_common((struct sxe2_tx_queue *)tx_queue,
+					      tx_pkts, nb_pkts, false);
+}
+
+uint16_t sxe2_tx_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	return sxe2_tx_pkts_vec_avx512_common((struct sxe2_tx_queue *)tx_queue,
+					      tx_pkts, nb_pkts, true);
+}
+
+static inline void sxe2_rx_queue_rearm_avx512(struct sxe2_rx_queue *rxq)
+{
+	volatile union sxe2_rx_desc *desc;
+	struct rte_mbuf **buffer;
+	struct rte_mbuf *mbuf0, *mbuf1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i virt_addr0, virt_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, RTE_PKTMBUF_HEADROOM);
+	int32_t ret;
+	uint16_t i;
+	uint16_t new_tail;
+
+	buffer = &rxq->buffer_ring[rxq->realloc_start];
+	desc   = &rxq->desc_ring[rxq->realloc_start];
+
+	ret = rte_mempool_get_bulk(rxq->mb_pool, (void *)buffer, SXE2_RX_REARM_THRESH_VEC);
+	if (ret != 0) {
+		if ((rxq->realloc_num + SXE2_RX_REARM_THRESH_VEC) >= rxq->ring_depth) {
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < SXE2_RX_NUM_PER_LOOP_AVX; ++i) {
+				buffer[i] = &rxq->fake_mbuf;
+				_mm_store_si128(RTE_CAST_PTR(__m128i *, &desc[i].read),
+						dma_addr0);
+			}
+		}
+
+		rxq->vsi->adapter->dev_info.dev_data->rx_mbuf_alloc_failed +=
+			SXE2_RX_REARM_THRESH_VEC;
+		goto l_end;
+	}
+
+	for (i = 0; i < SXE2_RX_REARM_THRESH_VEC; i += 2, buffer += 2) {
+		mbuf0 = buffer[0];
+		mbuf1 = buffer[1];
+
+#if RTE_IOVA_IN_MBUF
+
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				 offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		virt_addr0 = _mm_loadu_si128((__m128i *)&mbuf0->buf_addr);
+		virt_addr1 = _mm_loadu_si128((__m128i *)&mbuf1->buf_addr);
+
+#if RTE_IOVA_IN_MBUF
+
+		dma_addr0 = _mm_unpackhi_epi64(virt_addr0, virt_addr0);
+		dma_addr1 = _mm_unpackhi_epi64(virt_addr1, virt_addr1);
+#else
+
+		dma_addr0 = _mm_unpacklo_epi64(virt_addr0, virt_addr0);
+		dma_addr1 = _mm_unpacklo_epi64(virt_addr1, virt_addr1);
+#endif
+
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, &desc++->read), dma_addr0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, &desc++->read), dma_addr1);
+	}
+
+	rxq->realloc_start += SXE2_RX_REARM_THRESH_VEC;
+	if (rxq->realloc_start >= rxq->ring_depth)
+		rxq->realloc_start = 0;
+	rxq->realloc_num -= SXE2_RX_REARM_THRESH_VEC;
+
+	new_tail = (rxq->realloc_start == 0) ? (rxq->ring_depth - 1) :
+		(rxq->realloc_start - 1);
+
+	SXE2_PCI_REG_WRITE_WC(rxq->rdt_reg_addr, new_tail);
+
+l_end:
+	return;
+}
+
+static __rte_always_inline uint16_t
+sxe2_rx_pkts_common_vec_avx512(struct sxe2_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+	uint16_t nb_pkts, uint8_t *split_rxe_flags,
+	uint8_t *umbcast_flags, bool do_offload)
+{
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0, rxq->mbuf_init_value);
+	struct rte_mbuf **buffer;
+	volatile union sxe2_rx_desc *desc;
+	__m512i mbufs4_7;
+	__m512i mbufs0_3;
+	__m256i mbufs6_7;
+	__m256i mbufs4_5;
+	__m256i mbufs2_3;
+	__m256i mbufs0_1;
+	uint32_t bit_num  = 0;
+	uint16_t done_num = 0;
+	uint16_t i = 0;
+	uint16_t j = 0;
+
+	buffer   = &rxq->buffer_ring[rxq->processing_idx];
+	desc     = &rxq->desc_ring[rxq->processing_idx];
+
+	rte_prefetch0(desc);
+
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, SXE2_RX_NUM_PER_LOOP_AVX);
+
+	if (rxq->realloc_num > SXE2_RX_REARM_THRESH_VEC)
+		sxe2_rx_queue_rearm_avx512(rxq);
+
+	if (0 == (rte_le_to_cpu_64(desc->wb.status_err_ptype_len) & SXE2_RX_DESC_STATUS_DD_MASK))
+		goto l_end;
+
+	const __m512i crc_adjust =
+			_mm512_set4_epi32(0, -rxq->crc_len, -rxq->crc_len, 0);
+
+	const __m256i dd_mask = _mm256_set1_epi32(1);
+
+	const __m512i rvp_shuf_mask =
+			_mm512_set4_epi32((7 << 24) | (6 << 16) | (5 << 8) | 4,
+					  (3 << 24) | (2 << 16) | (13 << 8) | 12,
+					  (0xFFU << 24) | (0xFF << 16) | (13 << 8) | 12,
+					  0xFFFFFFFF);
+
+	const __m128i eop_shuf_mask =
+		_mm_set_epi8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			     0xFF, 0xFF, 8, 0, 10, 2, 12, 4, 14, 6);
+
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	for (i = 0; i < nb_pkts; i += SXE2_RX_NUM_PER_LOOP_AVX,
+				desc += SXE2_RX_NUM_PER_LOOP_AVX) {
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+			_mm256_loadu_si256((void *)&buffer[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256((void *)&rx_pkts[i + 4],
+			_mm256_loadu_si256((void *)&buffer[i + 4]));
+#endif
+
+		const __m128i desc7 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 7));
+		rte_compiler_barrier();
+		const __m128i desc6 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 6));
+		rte_compiler_barrier();
+		const __m128i desc5 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 5));
+		rte_compiler_barrier();
+		const __m128i desc4 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 4));
+		rte_compiler_barrier();
+		const __m128i desc3 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 3));
+		rte_compiler_barrier();
+		const __m128i desc2 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 2));
+		rte_compiler_barrier();
+		const __m128i desc1 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 1));
+		rte_compiler_barrier();
+		const __m128i desc0 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, desc + 0));
+
+		const __m256i descs6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(desc6), desc7, 1);
+		const __m256i descs4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(desc4), desc5, 1);
+		const __m256i descs2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(desc2), desc3, 1);
+		const __m256i descs0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(desc0), desc1, 1);
+
+		const __m512i descs4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(descs4_5), descs6_7, 1);
+		const __m512i descs0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(descs0_1), descs2_3, 1);
+
+		if (split_rxe_flags != NULL) {
+			for (j = 0; j < SXE2_RX_NUM_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		mbufs4_7 = _mm512_shuffle_epi8(descs4_7, rvp_shuf_mask);
+		mbufs0_3 = _mm512_shuffle_epi8(descs0_3, rvp_shuf_mask);
+
+		mbufs4_7 = _mm512_add_epi32(mbufs4_7, crc_adjust);
+		mbufs0_3 = _mm512_add_epi32(mbufs0_3, crc_adjust);
+
+		const __m512i ptype_mask = _mm512_set1_epi64(SXE2_RX_FLEX_DESC_PTYPE_M <<
+					SXE2_RX_FLEX_DESC_PTYPE_S);
+
+		__m512i ptypes4_7 = _mm512_and_si512(descs4_7, ptype_mask);
+		__m512i ptypes0_3 = _mm512_and_si512(descs0_3, ptype_mask);
+
+		const __m256i ptypes6_7 = _mm512_extracti64x4_epi64(ptypes4_7, 1);
+		const __m256i ptypes4_5 = _mm512_extracti64x4_epi64(ptypes4_7, 0);
+		const __m256i ptypes2_3 = _mm512_extracti64x4_epi64(ptypes0_3, 1);
+		const __m256i ptypes0_1 = _mm512_extracti64x4_epi64(ptypes0_3, 0);
+
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 13);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 5);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 13);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 5);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 13);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 5);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 13);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 5);
+
+		const __m512i ptype_mask4_7 =
+				_mm512_set_epi32(0, 0, 0, ptype_tbl[ptype7],
+						 0, 0, 0, ptype_tbl[ptype6],
+						 0, 0, 0, ptype_tbl[ptype5],
+						 0, 0, 0, ptype_tbl[ptype4]);
+		const __m512i ptype_mask0_3 =
+				_mm512_set_epi32(0, 0, 0, ptype_tbl[ptype3],
+						 0, 0, 0, ptype_tbl[ptype2],
+						 0, 0, 0, ptype_tbl[ptype1],
+						 0, 0, 0, ptype_tbl[ptype0]);
+
+		mbufs4_7 = _mm512_or_si512(mbufs4_7, ptype_mask4_7);
+		mbufs0_3 = _mm512_or_si512(mbufs0_3, ptype_mask0_3);
+
+		mbufs6_7 = _mm512_extracti64x4_epi64(mbufs4_7, 1);
+		mbufs4_5 = _mm512_extracti64x4_epi64(mbufs4_7, 0);
+		mbufs2_3 = _mm512_extracti64x4_epi64(mbufs0_3, 1);
+		mbufs0_1 = _mm512_extracti64x4_epi64(mbufs0_3, 0);
+
+		const __m512i staterr_per_mask =
+			_mm512_set_epi32(0x17, 0x1F, 0x07, 0x0F,
+					 0x13, 0x1B, 0x03, 0x0B,
+					 0x16, 0x1E, 0x06, 0x0E,
+					 0x12, 0x1A, 0x02, 0x0A);
+		__m512i qw1_0_7 = _mm512_permutex2var_epi32(descs4_7,
+							    staterr_per_mask,
+							    descs0_3);
+
+		__m256i staterrs0_7 = _mm512_extracti64x4_epi64(qw1_0_7, 0);
+
+		__m256i stu_len0_7 = _mm512_extracti64x4_epi64(qw1_0_7, 1);
+		__m256i mbuf_flags = _mm256_setzero_si256();
+
+		if (do_offload) {
+			const __m256i desc_flags_mask = _mm256_set1_epi32(0xC0001C04);
+			const __m256i desc_flags_rss_mask = _mm256_set1_epi32(0x20000000);
+			const __m256i vlan_flags =
+				_mm256_set_epi8(0, 0, 0, 0,
+					0, 0, 0, 0,
+					0, 0, 0, RTE_MBUF_F_RX_VLAN |
+						RTE_MBUF_F_RX_VLAN_STRIPPED,
+					0, 0, 0, 0,
+					0, 0, 0, 0,
+					0, 0, 0, 0,
+					0, 0, 0, RTE_MBUF_F_RX_VLAN |
+						RTE_MBUF_F_RX_VLAN_STRIPPED,
+					0, 0, 0, 0);
+
+			const __m256i rss_flags =
+				_mm256_set_epi8(0, 0, 0, 0,
+					0, 0, 0, 0,
+					0, 0, 0, RTE_MBUF_F_RX_RSS_HASH,
+					0, 0, 0, 0,
+					0, 0, 0, 0,
+					0, 0, 0, 0,
+					0, 0, 0, RTE_MBUF_F_RX_RSS_HASH,
+					0, 0, 0, 0);
+
+			const __m256i cksum_flags =
+			_mm256_set_epi8(0, 0, 0, 0, 0, 0, 0,
+			0,
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+				RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+				RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+				RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+				RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			0, 0, 0, 0, 0, 0, 0, 0,
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+				RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+				RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+				RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+				RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1),
+			((RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1));
+
+			const __m256i cksum_mask =
+				_mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_MASK |
+						  RTE_MBUF_F_RX_L4_CKSUM_MASK |
+						  RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK |
+						  RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD);
+
+			const __m256i vlan_mask =
+				_mm256_set1_epi32(RTE_MBUF_F_RX_VLAN |
+						  RTE_MBUF_F_RX_VLAN_STRIPPED);
+
+			__m256i tmp_flags;
+			__m256i descs_flags = _mm256_and_si256(staterrs0_7, desc_flags_mask);
+			stu_len0_7 = _mm256_and_si256(stu_len0_7, desc_flags_rss_mask);
+
+			tmp_flags = _mm256_shuffle_epi8(vlan_flags, descs_flags);
+			mbuf_flags = _mm256_and_si256(tmp_flags, vlan_mask);
+
+			descs_flags = _mm256_srli_epi32(descs_flags, 10);
+			tmp_flags   = _mm256_shuffle_epi8(cksum_flags, descs_flags);
+			tmp_flags   = _mm256_slli_epi32(tmp_flags, 1);
+			tmp_flags   = _mm256_and_si256(tmp_flags, cksum_mask);
+			mbuf_flags  = _mm256_or_si256(mbuf_flags, tmp_flags);
+
+			descs_flags = _mm256_srli_epi32(stu_len0_7, 27);
+			tmp_flags   = _mm256_shuffle_epi8(rss_flags, descs_flags);
+			mbuf_flags  = _mm256_or_si256(mbuf_flags, tmp_flags);
+
+#ifndef RTE_LIBRTE_SXE2_16BYTE_RX_DESC
+			if (rxq->fnav_enable) {
+				__m256i fnav_vld0_3, fnav_vld4_7;
+				__m256i fnav_vld0_7;
+				__m256i v_zeros, v_ffff, v_u32_one;
+				const __m256i fdir_flags =
+					_mm256_set1_epi32(RTE_MBUF_F_RX_FDIR |
+							  RTE_MBUF_F_RX_FDIR_ID);
+				fnav_vld0_3 = _mm256_unpacklo_epi32(descs2_3, descs0_1);
+				fnav_vld4_7 = _mm256_unpacklo_epi32(descs6_7, descs4_5);
+
+				fnav_vld0_7 = _mm256_unpacklo_epi64(fnav_vld4_7, fnav_vld0_3);
+
+				fnav_vld0_7 = _mm256_slli_epi32(fnav_vld0_7, 26);
+				fnav_vld0_7 = _mm256_srli_epi32(fnav_vld0_7, 31);
+
+				v_zeros = _mm256_setzero_si256();
+				v_ffff = _mm256_cmpeq_epi32(v_zeros, v_zeros);
+				v_u32_one = _mm256_srli_epi32(v_ffff, 31);
+
+				tmp_flags = _mm256_cmpeq_epi32(fnav_vld0_7, v_u32_one);
+
+				tmp_flags = _mm256_and_si256(tmp_flags, fdir_flags);
+
+				mbuf_flags = _mm256_or_si256(mbuf_flags, tmp_flags);
+
+				rx_pkts[i + 0]->hash.fdir.hi = desc[0].wb.fd_filter_id;
+				rx_pkts[i + 1]->hash.fdir.hi = desc[1].wb.fd_filter_id;
+				rx_pkts[i + 2]->hash.fdir.hi = desc[2].wb.fd_filter_id;
+				rx_pkts[i + 3]->hash.fdir.hi = desc[3].wb.fd_filter_id;
+				rx_pkts[i + 4]->hash.fdir.hi = desc[4].wb.fd_filter_id;
+				rx_pkts[i + 5]->hash.fdir.hi = desc[5].wb.fd_filter_id;
+				rx_pkts[i + 6]->hash.fdir.hi = desc[6].wb.fd_filter_id;
+				rx_pkts[i + 7]->hash.fdir.hi = desc[7].wb.fd_filter_id;
+			}
+#endif
+		}
+
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rx_descriptor_fields1) !=
+				offsetof(struct rte_mbuf, rearm_data) + 16);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+
+		__m256i rearm_arr[8];
+
+		rearm_arr[6] = _mm256_blend_epi32(mbuf_init,
+					_mm256_slli_si256(mbuf_flags, 8), 0x04);
+		rearm_arr[4] = _mm256_blend_epi32(mbuf_init,
+					_mm256_slli_si256(mbuf_flags, 4), 0x04);
+		rearm_arr[2] = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm_arr[0] = _mm256_blend_epi32(mbuf_init,
+					_mm256_srli_si256(mbuf_flags, 4), 0x04);
+
+		rearm_arr[6] = _mm256_permute2f128_si256(rearm_arr[6], mbufs6_7, 0x20);
+		rearm_arr[4] = _mm256_permute2f128_si256(rearm_arr[4], mbufs4_5, 0x20);
+		rearm_arr[2] = _mm256_permute2f128_si256(rearm_arr[2], mbufs2_3, 0x20);
+		rearm_arr[0] = _mm256_permute2f128_si256(rearm_arr[0], mbufs0_1, 0x20);
+
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data, rearm_arr[6]);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data, rearm_arr[4]);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data, rearm_arr[2]);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data, rearm_arr[0]);
+
+		const __m256i tmp_mbuf_flags =
+				_mm256_castsi128_si256(_mm256_extracti128_si256(mbuf_flags, 1));
+
+		rearm_arr[7] = _mm256_blend_epi32(mbuf_init,
+					_mm256_slli_si256(tmp_mbuf_flags, 8), 4);
+		rearm_arr[5] = _mm256_blend_epi32(mbuf_init,
+					_mm256_slli_si256(tmp_mbuf_flags, 4), 4);
+		rearm_arr[3] = _mm256_blend_epi32(mbuf_init, tmp_mbuf_flags, 4);
+		rearm_arr[1] = _mm256_blend_epi32(mbuf_init,
+					_mm256_srli_si256(tmp_mbuf_flags, 4), 4);
+
+		rearm_arr[7] = _mm256_blend_epi32(rearm_arr[7], mbufs6_7, 0XF0);
+		rearm_arr[5] = _mm256_blend_epi32(rearm_arr[5], mbufs4_5, 0XF0);
+		rearm_arr[3] = _mm256_blend_epi32(rearm_arr[3], mbufs2_3, 0XF0);
+		rearm_arr[1] = _mm256_blend_epi32(rearm_arr[1], mbufs0_1, 0XF0);
+
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data, rearm_arr[7]);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data, rearm_arr[5]);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data, rearm_arr[3]);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data, rearm_arr[1]);
+
+		if (umbcast_flags) {
+			const __m256i umbcast_mask =
+				_mm256_set1_epi32(SXE2_RX_DESC_STATUS_UMBCAST_MASK);
+			__m256i umbcast_bits_256 =
+				_mm256_and_si256(staterrs0_7, umbcast_mask);
+
+			umbcast_bits_256 = _mm256_srli_epi32(umbcast_bits_256, 24);
+			__m128i umbcast_bits_128 =
+				_mm_packs_epi32(_mm256_castsi256_si128(umbcast_bits_256),
+						_mm256_extractf128_si256(umbcast_bits_256, 1));
+
+			umbcast_bits_128 = _mm_shuffle_epi8(umbcast_bits_128, eop_shuf_mask);
+
+			*(uint64_t *)umbcast_flags = _mm_cvtsi128_si64(umbcast_bits_128);
+			umbcast_flags += SXE2_RX_NUM_PER_LOOP_AVX;
+		}
+
+		if (split_rxe_flags) {
+			const __m256i eop_rxe_mask =
+					_mm256_set1_epi32(SXE2_RX_DESC_STATUS_EOP_MASK |
+								SXE2_RX_DESC_ERROR_RXE_MASK |
+								SXE2_RX_DESC_ERROR_OVERSIZE_MASK);
+			const __m128i eop_mask_128 =
+					_mm_set1_epi16(SXE2_RX_DESC_STATUS_EOP_MASK);
+			const __m128i rxe_mask_128 =
+					_mm_set1_epi16(SXE2_RX_DESC_ERROR_RXE_MASK |
+							SXE2_RX_DESC_ERROR_OVERSIZE_MASK);
+
+			const __m256i tmp_stats = _mm256_and_si256(staterrs0_7, eop_rxe_mask);
+
+			const __m128i eop_rxe_bits = _mm_packs_epi32
+							(_mm256_castsi256_si128(tmp_stats),
+							 _mm256_extractf128_si256(tmp_stats, 1));
+
+			__m128i not_eop_bits = _mm_andnot_si128(eop_rxe_bits, eop_mask_128);
+
+			not_eop_bits =
+				_mm_or_si128(not_eop_bits,
+					     _mm_srli_epi16(_mm_and_si128(eop_rxe_bits,
+									       rxe_mask_128),
+							      7));
+
+			not_eop_bits = _mm_shuffle_epi8(not_eop_bits, eop_shuf_mask);
+
+			*(uint64_t *)split_rxe_flags = _mm_cvtsi128_si64(not_eop_bits);
+			split_rxe_flags += SXE2_RX_NUM_PER_LOOP_AVX;
+		}
+
+		staterrs0_7 = _mm256_and_si256(staterrs0_7, dd_mask);
+
+		staterrs0_7 = _mm256_packs_epi32(staterrs0_7, _mm256_setzero_si256());
+
+		bit_num = rte_popcount64
+				(_mm_cvtsi128_si64(_mm256_extracti128_si256(staterrs0_7, 1)));
+		bit_num += rte_popcount64
+				(_mm_cvtsi128_si64(_mm256_castsi256_si128(staterrs0_7)));
+		done_num += bit_num;
+
+		if (bit_num != SXE2_RX_NUM_PER_LOOP_AVX)
+			break;
+	}
+
+	rxq->processing_idx += done_num;
+	rxq->processing_idx &= (rxq->ring_depth - 1);
+	if ((rxq->processing_idx & 1) == 1 && done_num > 1) {
+		rxq->processing_idx--;
+		done_num--;
+	}
+	rxq->realloc_num     += done_num;
+
+l_end:
+	PMD_LOG_DEBUG(RX, "port_id=%u queue_id=%u last_id=%u recv_pkts=%d",
+			rxq->port_id, rxq->queue_id, rxq->processing_idx, done_num);
+	return done_num;
+}
+
+static __rte_always_inline uint16_t
+sxe2_rx_pkts_scattered_batch_vec_avx512(struct sxe2_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+	uint16_t nb_pkts, bool do_offload)
+{
+	const uint64_t *split_rxe_flags64;
+	uint8_t split_rxe_flags[SXE2_RX_PKTS_BURST_BATCH_NUM_VEC] = {0};
+	uint8_t umbcast_flags[SXE2_RX_PKTS_BURST_BATCH_NUM_VEC] = {0};
+	uint16_t rx_done_num;
+	uint16_t rx_pkt_done_num;
+
+	rx_pkt_done_num = 0;
+
+	if (rxq->vsi->adapter->devargs.sw_stats_en) {
+		rx_done_num = sxe2_rx_pkts_common_vec_avx512(rxq, rx_pkts,
+				nb_pkts, split_rxe_flags,
+				umbcast_flags, do_offload);
+	} else {
+		rx_done_num = sxe2_rx_pkts_common_vec_avx512(rxq, rx_pkts,
+				nb_pkts, split_rxe_flags,
+			    NULL, do_offload);
+	}
+	if (rx_done_num == 0)
+		goto l_end;
+
+	if (!rxq->vsi->adapter->devargs.sw_stats_en) {
+		split_rxe_flags64 = (uint64_t *)split_rxe_flags;
+
+		if (rxq->pkt_first_seg == NULL &&
+				!split_rxe_flags64[0] && !split_rxe_flags64[1] &&
+				!split_rxe_flags64[2] && !split_rxe_flags64[3]) {
+			rx_pkt_done_num = rx_done_num;
+			goto l_end;
+		}
+
+		if (rxq->pkt_first_seg == NULL) {
+			while (rx_pkt_done_num < rx_done_num &&
+			       split_rxe_flags[rx_pkt_done_num] == 0)
+				rx_pkt_done_num++;
+
+			if (rx_pkt_done_num == rx_done_num)
+				goto l_end;
+
+			rxq->pkt_first_seg = rx_pkts[rx_pkt_done_num];
+		}
+	}
+
+	rx_pkt_done_num += sxe2_rx_pkts_refactor(rxq, &rx_pkts[rx_pkt_done_num],
+			rx_done_num - rx_pkt_done_num, &split_rxe_flags[rx_pkt_done_num],
+			&umbcast_flags[rx_pkt_done_num]);
+
+l_end:
+
+	return rx_pkt_done_num;
+}
+
+static __rte_always_inline uint16_t
+sxe2_rx_pkts_scattered_common_vec_avx512(void *rx_queue,
+	struct rte_mbuf **rx_pkts, uint16_t nb_pkts, bool offload)
+{
+	uint16_t done_num = 0;
+	uint16_t once_num  = 0;
+
+	while (nb_pkts > SXE2_RX_PKTS_BURST_BATCH_NUM) {
+		once_num = sxe2_rx_pkts_scattered_batch_vec_avx512(rx_queue, rx_pkts + done_num,
+			SXE2_RX_PKTS_BURST_BATCH_NUM, offload);
+
+		done_num  += once_num;
+		nb_pkts -= once_num;
+
+		if (once_num < SXE2_RX_PKTS_BURST_BATCH_NUM)
+			goto end;
+	}
+
+	done_num += sxe2_rx_pkts_scattered_batch_vec_avx512(rx_queue,
+		rx_pkts + done_num, nb_pkts, offload);
+
+end:
+	return done_num;
+}
+
+uint16_t sxe2_rx_pkts_scattered_vec_avx512(void *rx_queue,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	return sxe2_rx_pkts_scattered_common_vec_avx512(rx_queue,
+			rx_pkts, nb_pkts, false);
+}
+
+uint16_t sxe2_rx_pkts_scattered_vec_avx512_offload(void *rx_queue,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	return sxe2_rx_pkts_scattered_common_vec_avx512(rx_queue,
+			rx_pkts, nb_pkts, true);
+}
+
+#endif
-- 
2.52.0


  reply	other threads:[~2026-06-02 15:52 UTC|newest]

Thread overview: 200+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-30 18:46 [PATCH v1 00/20] net/sxe2: added Linkdata sxe ethernet driver liujie5
2026-05-30 18:46 ` [PATCH v1 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-05-30 18:46 ` [PATCH v1 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-05-31 22:29   ` Stephen Hemminger
2026-05-30 18:46 ` [PATCH v1 03/20] drivers: add supported packet types get callback liujie5
2026-05-30 18:46 ` [PATCH v1 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-05-30 18:46 ` [PATCH v1 05/20] drivers: support RSS feature liujie5
2026-05-30 18:46 ` [PATCH v1 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-05-30 18:46 ` [PATCH v1 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-05-30 18:46 ` [PATCH v1 08/20] net/sxe2: support statistics and multi-process liujie5
2026-05-30 18:46 ` [PATCH v1 09/20] drivers: interrupt handling liujie5
2026-05-30 18:46 ` [PATCH v1 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-05-30 18:46 ` [PATCH v1 11/20] drivers: add support for VF representors liujie5
2026-05-30 18:46 ` [PATCH v1 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-05-30 18:46 ` [PATCH v1 13/20] net/sxe2: support firmware version reading liujie5
2026-05-30 18:46 ` [PATCH v1 14/20] net/sxe2: implement get monitor address liujie5
2026-05-30 18:46 ` [PATCH v1 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-05-30 18:46 ` [PATCH v1 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-05-30 18:46 ` [PATCH v1 17/20] net/sxe2: implement private dump info liujie5
2026-05-30 18:46 ` [PATCH v1 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-05-30 18:46 ` [PATCH v1 19/20] drivers: add testpmd commands for private features liujie5
2026-05-31 22:31   ` Stephen Hemminger
2026-05-31 22:32   ` Stephen Hemminger
2026-05-30 18:46 ` [PATCH v1 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-01  6:29   ` [PATCH v2 00/20] net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-01  6:29     ` [PATCH v2 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-01  6:29     ` [PATCH v2 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-01  6:29     ` [PATCH v2 03/20] drivers: add supported packet types get callback liujie5
2026-06-01  6:29     ` [PATCH v2 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-01  6:29     ` [PATCH v2 05/20] drivers: support RSS feature liujie5
2026-06-01  6:29     ` [PATCH v2 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-01  6:29     ` [PATCH v2 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-01  6:29     ` [PATCH v2 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-01  6:29     ` [PATCH v2 09/20] drivers: interrupt handling liujie5
2026-06-01  6:29     ` [PATCH v2 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-01  6:29     ` [PATCH v2 11/20] drivers: add support for VF representors liujie5
2026-06-01  6:29     ` [PATCH v2 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-01  6:29     ` [PATCH v2 13/20] net/sxe2: support firmware version reading liujie5
2026-06-01  6:30     ` [PATCH v2 14/20] net/sxe2: implement get monitor address liujie5
2026-06-01  6:30     ` [PATCH v2 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-01  6:30     ` [PATCH v2 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-01  6:30     ` [PATCH v2 17/20] net/sxe2: implement private dump info liujie5
2026-06-01  6:30     ` [PATCH v2 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-01  6:30     ` [PATCH v2 19/20] drivers: add testpmd commands for private features liujie5
2026-06-01  6:30     ` [PATCH v2 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-01  8:49       ` [PATCH v3 00/20]net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-01  8:49         ` [PATCH v3 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-01  8:49         ` [PATCH v3 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-01  8:49         ` [PATCH v3 03/20] drivers: add supported packet types get callback liujie5
2026-06-01  8:49         ` [PATCH v3 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-01  8:49         ` [PATCH v3 05/20] drivers: support RSS feature liujie5
2026-06-01  8:49         ` [PATCH v3 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-01  8:49         ` [PATCH v3 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-01  8:49         ` [PATCH v3 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-01  8:49         ` [PATCH v3 09/20] drivers: interrupt handling liujie5
2026-06-01  8:49         ` [PATCH v3 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-01  8:49         ` [PATCH v3 11/20] drivers: add support for VF representors liujie5
2026-06-01  8:49         ` [PATCH v3 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-01  8:49         ` [PATCH v3 13/20] net/sxe2: support firmware version reading liujie5
2026-06-01  8:49         ` [PATCH v3 14/20] net/sxe2: implement get monitor address liujie5
2026-06-01  8:49         ` [PATCH v3 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-01  8:49         ` [PATCH v3 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-01  8:49         ` [PATCH v3 17/20] net/sxe2: implement private dump info liujie5
2026-06-01  8:49         ` [PATCH v3 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-01  8:49         ` [PATCH v3 19/20] drivers: add testpmd commands for private features liujie5
2026-06-01  8:49         ` [PATCH v3 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-02  3:16           ` [PATCH v4 00/20] net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-02  3:16             ` [PATCH v4 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-02  3:16             ` [PATCH v4 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-02  3:16             ` [PATCH v4 03/20] drivers: add supported packet types get callback liujie5
2026-06-02  3:16             ` [PATCH v4 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-02  3:16             ` [PATCH v4 05/20] drivers: support RSS feature liujie5
2026-06-02  3:16             ` [PATCH v4 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-02  3:16             ` [PATCH v4 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-02  3:16             ` [PATCH v4 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-02  3:16             ` [PATCH v4 09/20] drivers: interrupt handling liujie5
2026-06-02  3:16             ` [PATCH v4 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-02  3:16             ` [PATCH v4 11/20] drivers: add support for VF representors liujie5
2026-06-02  3:16             ` [PATCH v4 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-02  3:16             ` [PATCH v4 13/20] net/sxe2: support firmware version reading liujie5
2026-06-02  3:17             ` [PATCH v4 14/20] net/sxe2: implement get monitor address liujie5
2026-06-02  3:17             ` [PATCH v4 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-02  3:17             ` [PATCH v4 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-02  3:17             ` [PATCH v4 17/20] net/sxe2: implement private dump info liujie5
2026-06-02  3:17             ` [PATCH v4 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-02  3:17             ` [PATCH v4 19/20] drivers: add testpmd commands for private features liujie5
2026-06-02  3:17             ` [PATCH v4 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-02  5:53               ` [PATCH v5 00/20] net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-02  5:53                 ` [PATCH v5 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-02  5:53                 ` [PATCH v5 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-02  5:53                 ` [PATCH v5 03/20] drivers: add supported packet types get callback liujie5
2026-06-02  5:53                 ` [PATCH v5 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-02  5:53                 ` [PATCH v5 05/20] drivers: support RSS feature liujie5
2026-06-02  5:53                 ` [PATCH v5 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-02  5:54                 ` [PATCH v5 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-02  5:54                 ` [PATCH v5 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-02  5:54                 ` [PATCH v5 09/20] drivers: interrupt handling liujie5
2026-06-02  5:54                 ` [PATCH v5 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-02  5:54                 ` [PATCH v5 11/20] drivers: add support for VF representors liujie5
2026-06-02  5:54                 ` [PATCH v5 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-02  5:54                 ` [PATCH v5 13/20] net/sxe2: support firmware version reading liujie5
2026-06-02  5:54                 ` [PATCH v5 14/20] net/sxe2: implement get monitor address liujie5
2026-06-02  5:54                 ` [PATCH v5 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-02  5:54                 ` [PATCH v5 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-02  5:54                 ` [PATCH v5 17/20] net/sxe2: implement private dump info liujie5
2026-06-02  5:54                 ` [PATCH v5 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-02  5:54                 ` [PATCH v5 19/20] drivers: add testpmd commands for private features liujie5
2026-06-02  5:54                 ` [PATCH v5 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-02 15:52                   ` [PATCH v6 00/20] net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-02 15:52                     ` liujie5 [this message]
2026-06-02 15:52                     ` [PATCH v6 02/20] net/sxe2: add AVX2 vector data path for Rx and Tx liujie5
2026-06-02 15:52                     ` [PATCH v6 03/20] drivers: add supported packet types get callback liujie5
2026-06-02 15:52                     ` [PATCH v6 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-02 15:52                     ` [PATCH v6 05/20] drivers: support RSS feature liujie5
2026-06-02 15:52                     ` [PATCH v6 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-02 15:52                     ` [PATCH v6 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-02 15:52                     ` [PATCH v6 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-02 15:52                     ` [PATCH v6 09/20] drivers: interrupt handling liujie5
2026-06-02 15:52                     ` [PATCH v6 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-02 15:52                     ` [PATCH v6 11/20] drivers: add support for VF representors liujie5
2026-06-02 15:52                     ` [PATCH v6 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-02 15:52                     ` [PATCH v6 13/20] net/sxe2: support firmware version reading liujie5
2026-06-02 15:52                     ` [PATCH v6 14/20] net/sxe2: implement get monitor address liujie5
2026-06-02 15:52                     ` [PATCH v6 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-02 20:34                       ` Stephen Hemminger
2026-06-02 15:52                     ` [PATCH v6 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-02 15:52                     ` [PATCH v6 17/20] net/sxe2: implement private dump info liujie5
2026-06-02 15:52                     ` [PATCH v6 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-02 15:52                     ` [PATCH v6 19/20] drivers: add testpmd commands for private features liujie5
2026-06-02 15:52                     ` [PATCH v6 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-03  2:21                       ` [PATCH v7 00/20]net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-03  2:21                         ` [PATCH v7 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-03  2:21                         ` [PATCH v7 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-03  2:21                         ` [PATCH v7 03/20] drivers: add supported packet types get callback liujie5
2026-06-03  2:21                         ` [PATCH v7 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-03  2:21                         ` [PATCH v7 05/20] drivers: support RSS feature liujie5
2026-06-03  2:21                         ` [PATCH v7 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-03  2:21                         ` [PATCH v7 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-03  2:21                         ` [PATCH v7 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-03  2:21                         ` [PATCH v7 09/20] drivers: interrupt handling liujie5
2026-06-03  2:21                         ` [PATCH v7 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-03  2:21                         ` [PATCH v7 11/20] drivers: add support for VF representors liujie5
2026-06-03  2:21                         ` [PATCH v7 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-03  2:21                         ` [PATCH v7 13/20] net/sxe2: support firmware version reading liujie5
2026-06-03  2:21                         ` [PATCH v7 14/20] net/sxe2: implement get monitor address liujie5
2026-06-03  2:21                         ` [PATCH v7 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-03  2:21                         ` [PATCH v7 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-03  2:21                         ` [PATCH v7 17/20] net/sxe2: implement private dump info liujie5
2026-06-03  2:21                         ` [PATCH v7 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-03  2:21                         ` [PATCH v7 19/20] drivers: add testpmd commands for private features liujie5
2026-06-03  2:21                         ` [PATCH v7 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-03  6:29                           ` [PATCH v8 00/20] net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-03  6:29                             ` [PATCH v8 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-03  6:29                             ` [PATCH v8 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-03  6:29                             ` [PATCH v8 03/20] drivers: add supported packet types get callback liujie5
2026-06-03  6:29                             ` [PATCH v8 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-03 18:21                               ` Stephen Hemminger
2026-06-03  6:29                             ` [PATCH v8 05/20] drivers: support RSS feature liujie5
2026-06-03  6:29                             ` [PATCH v8 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-03  6:29                             ` [PATCH v8 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-03 18:17                               ` Stephen Hemminger
2026-06-03  6:29                             ` [PATCH v8 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-03  6:29                             ` [PATCH v8 09/20] drivers: interrupt handling liujie5
2026-06-03  6:29                             ` [PATCH v8 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-03  6:29                             ` [PATCH v8 11/20] drivers: add support for VF representors liujie5
2026-06-03 18:22                               ` Stephen Hemminger
2026-06-03  6:29                             ` [PATCH v8 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-03  6:29                             ` [PATCH v8 13/20] net/sxe2: support firmware version reading liujie5
2026-06-03  6:29                             ` [PATCH v8 14/20] net/sxe2: implement get monitor address liujie5
2026-06-03  6:29                             ` [PATCH v8 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-03  6:29                             ` [PATCH v8 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-03  6:29                             ` [PATCH v8 17/20] net/sxe2: implement private dump info liujie5
2026-06-03  6:29                             ` [PATCH v8 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-03  6:29                             ` [PATCH v8 19/20] drivers: add testpmd commands for private features liujie5
2026-06-03 18:23                               ` Stephen Hemminger
2026-06-03  6:29                             ` [PATCH v8 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-03 18:19                               ` Stephen Hemminger
2026-06-04  1:53                               ` [PATCH v9 00/20] net/sxe2: added Linkdata sxe2 ethernet driver liujie5
2026-06-04  1:53                                 ` [PATCH v9 01/20] net/sxe2: support AVX512 vectorized path for Rx and Tx liujie5
2026-06-04  1:53                                 ` [PATCH v9 02/20] net/sxe2: add AVX2 vector data " liujie5
2026-06-04  1:53                                 ` [PATCH v9 03/20] drivers: add supported packet types get callback liujie5
2026-06-04  1:53                                 ` [PATCH v9 04/20] net/sxe2: support L2 filtering and MAC config liujie5
2026-06-04  1:53                                 ` [PATCH v9 05/20] drivers: support RSS feature liujie5
2026-06-04  1:53                                 ` [PATCH v9 06/20] net/sxe2: support TM hierarchy and shaping liujie5
2026-06-04  1:53                                 ` [PATCH v9 07/20] net/sxe2: support IPsec inline protocol offload liujie5
2026-06-04  1:53                                 ` [PATCH v9 08/20] net/sxe2: support statistics and multi-process liujie5
2026-06-04  1:53                                 ` [PATCH v9 09/20] drivers: interrupt handling liujie5
2026-06-04  1:53                                 ` [PATCH v9 10/20] net/sxe2: add NEON vec Rx/Tx burst functions liujie5
2026-06-04  1:53                                 ` [PATCH v9 11/20] drivers: add support for VF representors liujie5
2026-06-04  1:53                                 ` [PATCH v9 12/20] net/sxe2: add support for custom UDP tunnel ports liujie5
2026-06-04  1:53                                 ` [PATCH v9 13/20] net/sxe2: support firmware version reading liujie5
2026-06-04  1:53                                 ` [PATCH v9 14/20] net/sxe2: implement get monitor address liujie5
2026-06-04  1:53                                 ` [PATCH v9 15/20] common/sxe2: add shared SFP module definitions liujie5
2026-06-04  1:54                                 ` [PATCH v9 16/20] net/sxe2: support SFP module info and EEPROM access liujie5
2026-06-04  1:54                                 ` [PATCH v9 17/20] net/sxe2: implement private dump info liujie5
2026-06-04  1:54                                 ` [PATCH v9 18/20] net/sxe2: add mbuf validation in Tx debug mode liujie5
2026-06-04  1:54                                 ` [PATCH v9 19/20] drivers: add testpmd commands for private features liujie5
2026-06-04  1:54                                 ` [PATCH v9 20/20] net/sxe2: update sxe2 feature matrix docs liujie5
2026-06-01 15:40         ` [PATCH v3 00/20]net/sxe2: added Linkdata sxe2 ethernet driver Stephen Hemminger
2026-05-31 22:33 ` [PATCH v1 00/20] net/sxe2: added Linkdata sxe " Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260602155240.1002602-2-liujie5@linkdatatechnology.com \
    --to=liujie5@linkdatatechnology.com \
    --cc=dev@dpdk.org \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox