All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jay Wang <jay.wang2@arm.com>
To: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Cc: dev@dpdk.org, nd@arm.com, Jay Wang <jay.wang2@arm.com>
Subject: [PATCH v3 1/3] net/iavf: add Rx scattered function for 32B desc
Date: Tue,  5 May 2026 11:07:42 +0000	[thread overview]
Message-ID: <20260505110745.206312-2-jay.wang2@arm.com> (raw)
In-Reply-To: <20260505110745.206312-1-jay.wang2@arm.com>

Added the scattered burst function on AArch64 so that we can leverage
the NEON-optimised Rx raw burst function to handle scattered packets for
the legacy 32B descriptor.

Signed-off-by: Jay Wang <jay.wang2@arm.com>
---
 drivers/net/intel/iavf/iavf.h               |   1 +
 drivers/net/intel/iavf/iavf_rxtx.c          |  16 ++-
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c | 110 +++++++++++++++++++-
 drivers/net/intel/iavf/meson.build          |   2 +-
 4 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 403c61e2e8..e4936f3566 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -334,6 +334,7 @@ enum iavf_rx_func_type {
 	IAVF_RX_BULK_ALLOC,
 	IAVF_RX_BULK_ALLOC_FLEX_RXD,
 	IAVF_RX_NEON,
+	IAVF_RX_NEON_SCATTERED,
 	IAVF_RX_AVX2,
 	IAVF_RX_AVX2_SCATTERED,
 	IAVF_RX_AVX2_OFFLOAD,
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 4ff6c18dc4..15566a0e18 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -3551,16 +3551,26 @@ static const struct ci_rx_path_info iavf_rx_path_infos[] = {
 		}
 	},
 #endif
-#elif defined RTE_ARCH_ARM
+#elif defined(RTE_ARCH_ARM64)
 	[IAVF_RX_NEON] = {
 		.pkt_burst = iavf_recv_pkts_vec,
 		.info = "Vector Neon",
 		.features = {
-			.rx_offloads = IAVF_RX_SCALAR_OFFLOADS,
+			.rx_offloads = IAVF_RX_VECTOR_OFFLOADS,
 			.simd_width = RTE_VECT_SIMD_128,
 			.bulk_alloc = true
 		}
 	},
+	[IAVF_RX_NEON_SCATTERED] = {
+		.pkt_burst = iavf_recv_scattered_pkts_vec,
+		.info = "Vector Scattered Neon",
+		.features = {
+			.rx_offloads = IAVF_RX_VECTOR_OFFLOADS | RTE_ETH_RX_OFFLOAD_SCATTER,
+			.simd_width = RTE_VECT_SIMD_128,
+			.scattered = true,
+			.bulk_alloc = true
+		}
+	},
 #endif
 };
 
@@ -3839,7 +3849,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (adapter->rx_bulk_alloc_allowed) {
 		req_features.bulk_alloc = true;
 		default_path = IAVF_RX_BULK_ALLOC;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 		if (iavf_rx_vec_dev_check(dev) != -1)
 			req_features.simd_width = iavf_get_max_simd_bitwidth();
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index 28c90b2a72..45e377d728 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2022 Intel Corporation
- * Copyright(c) 2022 Arm Limited
+ * Copyright(c) 2022-2026 Arm Limited
  */
 
 #include <stdint.h>
@@ -145,8 +145,6 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
-	RTE_SET_USED(split_packet);
-
 	volatile union ci_rx_desc *rxdp;
 	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
@@ -164,6 +162,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		4, 5, 6, 7    /* octet 4~7, 32bits rss */
 		};
 
+	uint8x16_t eop_check = {
+		0x02, 0x00, 0x02, 0x00,
+		0x02, 0x00, 0x02, 0x00,
+		0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00
+	};
+
 	uint16x8_t crc_adjust = {
 		0, 0,         /* ignore pkt_type field */
 		rxq->crc_len, /* sub crc on pkt_len */
@@ -238,6 +243,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
 		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
 
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
 		/* pkts shift the pktlen field to be 16-bit aligned*/
 		uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]),
 					    len_shl);
@@ -306,6 +318,32 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		staterr = vzipq_u16(sterr_tmp1.val[1],
 				    sterr_tmp2.val[1]).val[0];
 
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			uint8x16_t eop_shuf_mask = {
+				0x00, 0x02, 0x04, 0x06,
+				0xFF, 0xFF, 0xFF, 0xFF,
+				0xFF, 0xFF, 0xFF, 0xFF,
+				0xFF, 0xFF, 0xFF, 0xFF
+			};
+			uint8x16_t eop_bits;
+
+			/* and with mask to extract bits, flipping 1-0 */
+			eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr));
+			eop_bits = vandq_u8(eop_bits, eop_check);
+			/* the staterr values are not in order, as the count
+			 * of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask);
+
+			/* store the resulting 32-bit value */
+			vst1q_lane_u32((uint32_t *)split_packet,
+				vreinterpretq_u32_u8(eop_bits), 0);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
 		staterr = vshlq_n_u16(staterr, IAVF_UINT16_BIT - 1);
 		staterr = vreinterpretq_u16_s16(
 				vshrq_n_s16(vreinterpretq_s16_u16(staterr),
@@ -341,6 +379,72 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/*
+ * vPMD receive routine that reassembles single burst of 32 scattered
+ * packets.
+ *
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ */
+static __rte_always_inline uint16_t
+iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	struct ci_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+						split_flags);
+
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be assembled */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+	if (!rxq->pkt_first_seg &&
+			split_fl64[0] == 0 && split_fl64[1] == 0 &&
+			split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassmble any packets that need reassembly */
+	unsigned int i = 0;
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassmeble then */
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + ci_rx_reassemble_packets(&rx_pkts[i], nb_bufs - i,
+			&split_flags[i], &rxq->pkt_first_seg, &rxq->pkt_last_seg,
+			rxq->crc_len);
+}
+
+/*
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
+		uint16_t burst;
+		burst = iavf_recv_scattered_burst_vec(rx_queue,
+				rx_pkts + retval, IAVF_VPMD_RX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_BURST)
+			return retval;
+	}
+	/* The last one burst or nb_pkts <= IAVF_VPMD_RX_BURST */
+	return retval + iavf_recv_scattered_burst_vec(rx_queue,
+			rx_pkts + retval, nb_pkts);
+}
+
 void __rte_cold
 iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
diff --git a/drivers/net/intel/iavf/meson.build b/drivers/net/intel/iavf/meson.build
index f9576586f6..50630a88c8 100644
--- a/drivers/net/intel/iavf/meson.build
+++ b/drivers/net/intel/iavf/meson.build
@@ -29,7 +29,7 @@ sources = files(
 if arch_subdir == 'x86'
     sources_avx2 += files('iavf_rxtx_vec_avx2.c')
     sources_avx512 += files('iavf_rxtx_vec_avx512.c')
-elif arch_subdir == 'arm'
+elif arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
     sources += files('iavf_rxtx_vec_neon.c')
 endif
 
-- 
2.43.0


  reply	other threads:[~2026-05-05 11:08 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-17 13:08 [PATCH v1 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-04-17 13:08 ` [PATCH v1 1/3] net/iavf: add Rx scattered function for 32B desc Jay Wang
2026-04-17 13:08 ` [PATCH v1 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-04-17 13:08 ` [PATCH v1 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-04-20 10:30 ` [PATCH v2 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-04-20 10:30   ` [PATCH v2 1/3] net/iavf: add Rx scattered function for 32B desc Jay Wang
2026-04-20 10:30   ` [PATCH v2 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-04-20 10:30   ` [PATCH v2 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-05-05 11:07   ` [PATCH v3 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-05-05 11:07     ` Jay Wang [this message]
2026-05-05 11:07     ` [PATCH v3 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-05-05 11:07     ` [PATCH v3 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-05-05 14:06     ` [PATCH v3 0/3] net/iavf: add NEON support for Rx/Tx paths Bruce Richardson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260505110745.206312-2-jay.wang2@arm.com \
    --to=jay.wang2@arm.com \
    --cc=dev@dpdk.org \
    --cc=nd@arm.com \
    --cc=vladimir.medvedkin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.