From: Jay Wang <jay.wang2@arm.com>
To: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Cc: dev@dpdk.org, nd@arm.com, Jay Wang <jay.wang2@arm.com>
Subject: [PATCH v1 1/3] net/iavf: add Rx scattered function for 32B desc
Date: Fri, 17 Apr 2026 13:08:29 +0000 [thread overview]
Message-ID: <20260417130833.2503592-2-jay.wang2@arm.com> (raw)
In-Reply-To: <20260417130833.2503592-1-jay.wang2@arm.com>
Added the scattered burst function on AArch64 so that we can leverage
the NEON-optimised Rx raw burst function to handle scattered packets for
the legacy 32B descriptor.
Signed-off-by: Jay Wang <jay.wang2@arm.com>
---
drivers/net/intel/iavf/iavf.h | 1 +
drivers/net/intel/iavf/iavf_rxtx.c | 16 ++-
drivers/net/intel/iavf/iavf_rxtx_vec_neon.c | 110 +++++++++++++++++++-
drivers/net/intel/iavf/meson.build | 2 +-
4 files changed, 122 insertions(+), 7 deletions(-)
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 403c61e2e8..e4936f3566 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -334,6 +334,7 @@ enum iavf_rx_func_type {
IAVF_RX_BULK_ALLOC,
IAVF_RX_BULK_ALLOC_FLEX_RXD,
IAVF_RX_NEON,
+ IAVF_RX_NEON_SCATTERED,
IAVF_RX_AVX2,
IAVF_RX_AVX2_SCATTERED,
IAVF_RX_AVX2_OFFLOAD,
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 4ff6c18dc4..15566a0e18 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -3551,16 +3551,26 @@ static const struct ci_rx_path_info iavf_rx_path_infos[] = {
}
},
#endif
-#elif defined RTE_ARCH_ARM
+#elif defined(RTE_ARCH_ARM64)
[IAVF_RX_NEON] = {
.pkt_burst = iavf_recv_pkts_vec,
.info = "Vector Neon",
.features = {
- .rx_offloads = IAVF_RX_SCALAR_OFFLOADS,
+ .rx_offloads = IAVF_RX_VECTOR_OFFLOADS,
.simd_width = RTE_VECT_SIMD_128,
.bulk_alloc = true
}
},
+ [IAVF_RX_NEON_SCATTERED] = {
+ .pkt_burst = iavf_recv_scattered_pkts_vec,
+ .info = "Vector Scattered Neon",
+ .features = {
+ .rx_offloads = IAVF_RX_VECTOR_OFFLOADS | RTE_ETH_RX_OFFLOAD_SCATTER,
+ .simd_width = RTE_VECT_SIMD_128,
+ .scattered = true,
+ .bulk_alloc = true
+ }
+ },
#endif
};
@@ -3839,7 +3849,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
if (adapter->rx_bulk_alloc_allowed) {
req_features.bulk_alloc = true;
default_path = IAVF_RX_BULK_ALLOC;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
if (iavf_rx_vec_dev_check(dev) != -1)
req_features.simd_width = iavf_get_max_simd_bitwidth();
#endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index 28c90b2a72..45e377d728 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2022 Intel Corporation
- * Copyright(c) 2022 Arm Limited
+ * Copyright(c) 2022-2026 Arm Limited
*/
#include <stdint.h>
@@ -145,8 +145,6 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
struct rte_mbuf **__rte_restrict rx_pkts,
uint16_t nb_pkts, uint8_t *split_packet)
{
- RTE_SET_USED(split_packet);
-
volatile union ci_rx_desc *rxdp;
struct ci_rx_entry *sw_ring;
uint16_t nb_pkts_recd;
@@ -164,6 +162,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
4, 5, 6, 7 /* octet 4~7, 32bits rss */
};
+ uint8x16_t eop_check = {
+ 0x02, 0x00, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+ };
+
uint16x8_t crc_adjust = {
0, 0, /* ignore pkt_type field */
rxq->crc_len, /* sub crc on pkt_len */
@@ -238,6 +243,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
+ if (split_packet) {
+ rte_mbuf_prefetch_part2(rx_pkts[pos]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+ }
+
/* pkts shift the pktlen field to be 16-bit aligned*/
uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]),
len_shl);
@@ -306,6 +318,32 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
staterr = vzipq_u16(sterr_tmp1.val[1],
sterr_tmp2.val[1]).val[0];
+ /* C* extract and record EOP bit */
+ if (split_packet) {
+ uint8x16_t eop_shuf_mask = {
+ 0x00, 0x02, 0x04, 0x06,
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF
+ };
+ uint8x16_t eop_bits;
+
+ /* and with mask to extract bits, flipping 1-0 */
+ eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr));
+ eop_bits = vandq_u8(eop_bits, eop_check);
+ /* the staterr values are not in order, as the count
+ * of dd bits doesn't care. However, for end of
+ * packet tracking, we do care, so shuffle. This also
+ * compresses the 32-bit values to 8-bit
+ */
+ eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask);
+
+ /* store the resulting 32-bit value */
+ vst1q_lane_u32((uint32_t *)split_packet,
+ vreinterpretq_u32_u8(eop_bits), 0);
+ split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+ }
+
staterr = vshlq_n_u16(staterr, IAVF_UINT16_BIT - 1);
staterr = vreinterpretq_u16_s16(
vshrq_n_s16(vreinterpretq_s16_u16(staterr),
@@ -341,6 +379,72 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
}
+/*
+ * vPMD receive routine that reassembles single burst of 32 scattered
+ * packets.
+ *
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ */
+static __rte_always_inline uint16_t
+iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct ci_rx_queue *rxq = rx_queue;
+ uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
+
+ /* get some new buffers */
+ uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+ split_flags);
+
+ if (nb_bufs == 0)
+ return 0;
+
+ /* happy day case, full burst + no packets to be assembled */
+ const uint64_t *split_fl64 = (uint64_t *)split_flags;
+ if (!rxq->pkt_first_seg &&
+ split_fl64[0] == 0 && split_fl64[1] == 0 &&
+ split_fl64[2] == 0 && split_fl64[3] == 0)
+ return nb_bufs;
+
+ /* reassmble any packets that need reassembly */
+ unsigned int i = 0;
+ if (!rxq->pkt_first_seg) {
+ /* find the first split flag, and only reassmeble then */
+ while (i < nb_bufs && !split_flags[i])
+ i++;
+ if (i == nb_bufs)
+ return nb_bufs;
+ rxq->pkt_first_seg = rx_pkts[i];
+ }
+ return i + ci_rx_reassemble_packets(&rx_pkts[i], nb_bufs - i,
+ &split_flags[i], &rxq->pkt_first_seg, &rxq->pkt_last_seg,
+ rxq->crc_len);
+}
+
+/*
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t retval = 0;
+
+ while (nb_pkts > IAVF_VPMD_RX_BURST) {
+ uint16_t burst;
+ burst = iavf_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval, IAVF_VPMD_RX_BURST);
+ retval += burst;
+ nb_pkts -= burst;
+ if (burst < IAVF_VPMD_RX_BURST)
+ return retval;
+ }
+ /* The last one burst or nb_pkts <= IAVF_VPMD_RX_BURST */
+ return retval + iavf_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval, nb_pkts);
+}
+
void __rte_cold
iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
{
diff --git a/drivers/net/intel/iavf/meson.build b/drivers/net/intel/iavf/meson.build
index f9576586f6..50630a88c8 100644
--- a/drivers/net/intel/iavf/meson.build
+++ b/drivers/net/intel/iavf/meson.build
@@ -29,7 +29,7 @@ sources = files(
if arch_subdir == 'x86'
sources_avx2 += files('iavf_rxtx_vec_avx2.c')
sources_avx512 += files('iavf_rxtx_vec_avx512.c')
-elif arch_subdir == 'arm'
+elif arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
sources += files('iavf_rxtx_vec_neon.c')
endif
--
2.43.0
next prev parent reply other threads:[~2026-04-17 13:08 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-17 13:08 [PATCH v1 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-04-17 13:08 ` Jay Wang [this message]
2026-04-17 13:08 ` [PATCH v1 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-04-17 13:08 ` [PATCH v1 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-04-20 10:30 ` [PATCH v2 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-04-20 10:30 ` [PATCH v2 1/3] net/iavf: add Rx scattered function for 32B desc Jay Wang
2026-04-20 10:30 ` [PATCH v2 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-04-20 10:30 ` [PATCH v2 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-05-05 11:07 ` [PATCH v3 0/3] net/iavf: add NEON support for Rx/Tx paths Jay Wang
2026-05-05 11:07 ` [PATCH v3 1/3] net/iavf: add Rx scattered function for 32B desc Jay Wang
2026-05-05 11:07 ` [PATCH v3 2/3] net/iavf: add NEON-optimised Tx burst function Jay Wang
2026-05-05 11:07 ` [PATCH v3 3/3] net/iavf: add NEON support for Rx flex desc Jay Wang
2026-05-05 14:06 ` [PATCH v3 0/3] net/iavf: add NEON support for Rx/Tx paths Bruce Richardson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260417130833.2503592-2-jay.wang2@arm.com \
--to=jay.wang2@arm.com \
--cc=dev@dpdk.org \
--cc=nd@arm.com \
--cc=vladimir.medvedkin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox