DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Jay Wang <jay.wang2@arm.com>
To: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Cc: dev@dpdk.org, nd@arm.com, Jay Wang <jay.wang2@arm.com>
Subject: [PATCH v1 1/3] net/iavf: add Rx scattered function for 32B desc
Date: Fri, 17 Apr 2026 13:08:29 +0000	[thread overview]
Message-ID: <20260417130833.2503592-2-jay.wang2@arm.com> (raw)
In-Reply-To: <20260417130833.2503592-1-jay.wang2@arm.com>

Added the scattered burst function on AArch64 so that we can leverage
the NEON-optimised Rx raw burst function to handle scattered packets for
the legacy 32B descriptor.

Signed-off-by: Jay Wang <jay.wang2@arm.com>
---
 drivers/net/intel/iavf/iavf.h               |   1 +
 drivers/net/intel/iavf/iavf_rxtx.c          |  16 ++-
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c | 110 +++++++++++++++++++-
 drivers/net/intel/iavf/meson.build          |   2 +-
 4 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 403c61e2e8..e4936f3566 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -334,6 +334,7 @@ enum iavf_rx_func_type {
 	IAVF_RX_BULK_ALLOC,
 	IAVF_RX_BULK_ALLOC_FLEX_RXD,
 	IAVF_RX_NEON,
+	IAVF_RX_NEON_SCATTERED,
 	IAVF_RX_AVX2,
 	IAVF_RX_AVX2_SCATTERED,
 	IAVF_RX_AVX2_OFFLOAD,
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 4ff6c18dc4..15566a0e18 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -3551,16 +3551,26 @@ static const struct ci_rx_path_info iavf_rx_path_infos[] = {
 		}
 	},
 #endif
-#elif defined RTE_ARCH_ARM
+#elif defined(RTE_ARCH_ARM64)
 	[IAVF_RX_NEON] = {
 		.pkt_burst = iavf_recv_pkts_vec,
 		.info = "Vector Neon",
 		.features = {
-			.rx_offloads = IAVF_RX_SCALAR_OFFLOADS,
+			.rx_offloads = IAVF_RX_VECTOR_OFFLOADS,
 			.simd_width = RTE_VECT_SIMD_128,
 			.bulk_alloc = true
 		}
 	},
+	[IAVF_RX_NEON_SCATTERED] = {
+		.pkt_burst = iavf_recv_scattered_pkts_vec,
+		.info = "Vector Scattered Neon",
+		.features = {
+			.rx_offloads = IAVF_RX_VECTOR_OFFLOADS | RTE_ETH_RX_OFFLOAD_SCATTER,
+			.simd_width = RTE_VECT_SIMD_128,
+			.scattered = true,
+			.bulk_alloc = true
+		}
+	},
 #endif
 };
 
@@ -3839,7 +3849,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (adapter->rx_bulk_alloc_allowed) {
 		req_features.bulk_alloc = true;
 		default_path = IAVF_RX_BULK_ALLOC;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 		if (iavf_rx_vec_dev_check(dev) != -1)
 			req_features.simd_width = iavf_get_max_simd_bitwidth();
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index 28c90b2a72..45e377d728 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2022 Intel Corporation
- * Copyright(c) 2022 Arm Limited
+ * Copyright(c) 2022-2026 Arm Limited
  */
 
 #include <stdint.h>
@@ -145,8 +145,6 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
-	RTE_SET_USED(split_packet);
-
 	volatile union ci_rx_desc *rxdp;
 	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
@@ -164,6 +162,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		4, 5, 6, 7    /* octet 4~7, 32bits rss */
 		};
 
+	uint8x16_t eop_check = {
+		0x02, 0x00, 0x02, 0x00,
+		0x02, 0x00, 0x02, 0x00,
+		0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00
+	};
+
 	uint16x8_t crc_adjust = {
 		0, 0,         /* ignore pkt_type field */
 		rxq->crc_len, /* sub crc on pkt_len */
@@ -238,6 +243,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
 		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
 
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
 		/* pkts shift the pktlen field to be 16-bit aligned*/
 		uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]),
 					    len_shl);
@@ -306,6 +318,32 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		staterr = vzipq_u16(sterr_tmp1.val[1],
 				    sterr_tmp2.val[1]).val[0];
 
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			uint8x16_t eop_shuf_mask = {
+				0x00, 0x02, 0x04, 0x06,
+				0xFF, 0xFF, 0xFF, 0xFF,
+				0xFF, 0xFF, 0xFF, 0xFF,
+				0xFF, 0xFF, 0xFF, 0xFF
+			};
+			uint8x16_t eop_bits;
+
+			/* and with mask to extract bits, flipping 1-0 */
+			eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr));
+			eop_bits = vandq_u8(eop_bits, eop_check);
+			/* the staterr values are not in order, as the count
+			 * of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask);
+
+			/* store the resulting 32-bit value */
+			vst1q_lane_u32((uint32_t *)split_packet,
+				vreinterpretq_u32_u8(eop_bits), 0);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
 		staterr = vshlq_n_u16(staterr, IAVF_UINT16_BIT - 1);
 		staterr = vreinterpretq_u16_s16(
 				vshrq_n_s16(vreinterpretq_s16_u16(staterr),
@@ -341,6 +379,72 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/*
+ * vPMD receive routine that reassembles single burst of 32 scattered
+ * packets.
+ *
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ */
+static __rte_always_inline uint16_t
+iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	struct ci_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+						split_flags);
+
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be assembled */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+	if (!rxq->pkt_first_seg &&
+			split_fl64[0] == 0 && split_fl64[1] == 0 &&
+			split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassmble any packets that need reassembly */
+	unsigned int i = 0;
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassmeble then */
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + ci_rx_reassemble_packets(&rx_pkts[i], nb_bufs - i,
+			&split_flags[i], &rxq->pkt_first_seg, &rxq->pkt_last_seg,
+			rxq->crc_len);
+}
+
+/*
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
+		uint16_t burst;
+		burst = iavf_recv_scattered_burst_vec(rx_queue,
+				rx_pkts + retval, IAVF_VPMD_RX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_BURST)
+			return retval;
+	}
+	/* The last one burst or nb_pkts <= IAVF_VPMD_RX_BURST */
+	return retval + iavf_recv_scattered_burst_vec(rx_queue,
+			rx_pkts + retval, nb_pkts);
+}
+
 void __rte_cold
 iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
diff --git a/drivers/net/intel/iavf/meson.build b/drivers/net/intel/iavf/meson.build
index f9576586f6..50630a88c8 100644
--- a/drivers/net/intel/iavf/meson.build
+++ b/drivers/net/intel/iavf/meson.build
@@ -29,7 +29,7 @@ sources = files(
 if arch_subdir == 'x86'
     sources_avx2 += files('iavf_rxtx_vec_avx2.c')
     sources_avx512 += files('iavf_rxtx_vec_avx512.c')
-elif arch_subdir == 'arm'
+elif arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
     sources += files('iavf_rxtx_vec_neon.c')
 endif
 
-- 
2.43.0


  reply	other threads:[~2026-04-17 13:08 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-17 13:08 [PATCH v1 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-04-17 13:08 ` Jay Wang [this message]
2026-04-17 13:08 ` [PATCH v1 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-04-17 13:08 ` [PATCH v1 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-04-20 10:30 ` [PATCH v2 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-04-20 10:30   ` [PATCH v2 1/3] net/iavf: add Rx scattered function for 32B desc Jay Wang
2026-04-20 10:30   ` [PATCH v2 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-04-20 10:30   ` [PATCH v2 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-05-05 11:07   ` [PATCH v3 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-05-05 11:07     ` [PATCH v3 1/3] net/iavf: add Rx scattered function for 32B desc Jay Wang
2026-05-05 11:07     ` [PATCH v3 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-05-05 11:07     ` [PATCH v3 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-05-05 14:06     ` [PATCH v3 0/3] net/iavf: add NEON support for Rx/Tx paths Bruce Richardson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260417130833.2503592-2-jay.wang2@arm.com \
    --to=jay.wang2@arm.com \
    --cc=dev@dpdk.org \
    --cc=nd@arm.com \
    --cc=vladimir.medvedkin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox