DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] node: lookup with RISC-V vector extension
@ 2026-02-06  8:16 Sun Yuechi
  2026-02-06  8:18 ` sunyuechi
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-02-06  8:16 UTC (permalink / raw)
  To: dev
  Cc: Sun Yuechi, Zijian, Stanisław Kardach, Nithin Dabilpuram,
	Pavan Nikhilesh

Implement ip4_lookup_node_process_vec function for RISC-V architecture
using RISC-V Vector Extension instruction set

Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>
---
 doc/guides/rel_notes/release_26_03.rst |   4 +
 lib/eal/riscv/include/rte_vect.h       |   2 +-
 lib/node/ip4_lookup.c                  |   5 +-
 lib/node/ip4_lookup_rvv.h              | 167 +++++++++++++++++++++++++
 4 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst
index 15dabee7a1..769c4dcebb 100644
--- a/doc/guides/rel_notes/release_26_03.rst
+++ b/doc/guides/rel_notes/release_26_03.rst
@@ -55,6 +55,10 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Added RISC-V vector extension support for node library.**
+
+  * Added vectorized IPv4 LPM lookup implementation for the node library
+    using RISC-V Vector Extension instruction set.
 
 Removed Items
 -------------
diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index a4357e266a..4d16082449 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
 
 typedef int32_t		xmm_t __attribute__((vector_size(16)));
 
diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
index f6db3219f0..42db993142 100644
--- a/lib/node/ip4_lookup.c
+++ b/lib/node/ip4_lookup.c
@@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "ip4_lookup_rvv.h"
 #endif
 
 static uint16_t
@@ -208,7 +210,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
 	IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket];
 	IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn;
 
-#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
+#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
+	(defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V))
 	if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
 		node->process = ip4_lookup_node_process_vec;
 #endif
diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
new file mode 100644
index 0000000000..74b4aac24c
--- /dev/null
+++ b/lib/node/ip4_lookup_rvv.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
+ */
+
+#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
+#define __INCLUDE_IP4_LOOKUP_RVV_H__
+
+#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+static __rte_always_inline vuint32m8_t
+bswap32_vec(vuint32m8_t v, size_t vl)
+{
+	vuint32m8_t low16 = __riscv_vor_vv_u32m8(
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl),
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl),
+		vl);
+
+	vuint32m8_t high16 = __riscv_vor_vv_u32m8(
+		__riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl),
+		__riscv_vsrl_vx_u32m8(v, 24, vl),
+		vl);
+
+	return __riscv_vor_vv_u32m8(low16, high16, vl);
+}
+
+static __rte_always_inline void
+rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+			uint32_t *hop, size_t vl, uint32_t defv)
+{
+	/* Load IP addresses (network byte order) */
+	vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
+
+	vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
+			__riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
+
+	vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
+		(const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
+
+	vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
+		RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+	vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
+		__riscv_vadd_vv_u32m8(
+			__riscv_vsll_vx_u32m8(
+				__riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl),
+			__riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
+		2, vl);
+
+	vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
+		mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);
+
+	vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl);
+	mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl);
+
+	vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
+
+	__riscv_vse32_v_u32m8(hop, vnext_hop, vl);
+}
+
+/* Can be increased further for VLEN > 256 */
+#define RVV_MAX_BURST 64U
+
+static uint16_t
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+	struct rte_mbuf **pkts;
+	struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
+	const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
+	rte_edge_t next_index;
+	void **to_next, **from;
+	uint16_t last_spec = 0;
+	uint16_t n_left_from;
+	uint16_t held = 0;
+	uint32_t drop_nh;
+
+	/* Temporary arrays for batch processing */
+	uint32_t ips[RVV_MAX_BURST];
+	uint32_t res[RVV_MAX_BURST];
+	rte_edge_t next_hops[RVV_MAX_BURST];
+
+	/* Speculative next */
+	next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
+	/* Drop node */
+	drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
+
+	pkts = (struct rte_mbuf **)objs;
+	from = objs;
+	n_left_from = nb_objs;
+
+	/* Get stream for the speculated next node */
+	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+
+	while (n_left_from > 0) {
+		rte_edge_t fix_spec = 0;
+
+		size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST));
+
+		/* Extract IP addresses and metadata from current batch */
+		for (size_t i = 0; i < vl; i++) {
+			struct rte_ipv4_hdr *ipv4_hdr =
+				rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *,
+						sizeof(struct rte_ether_hdr));
+			ips[i] = ipv4_hdr->dst_addr;
+			node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum;
+			node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live;
+		}
+
+		/* Perform LPM lookup */
+		rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
+
+		for (size_t i = 0; i < vl; i++) {
+			/* Update statistics */
+			if ((res[i] >> 16) == (drop_nh >> 16))
+				NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
+
+			/* Extract next hop and next node */
+			node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
+			next_hops[i] = res[i] >> 16;
+
+			/* Check speculation */
+			fix_spec |= (next_index ^ next_hops[i]);
+		}
+
+		if (unlikely(fix_spec)) {
+			/* Copy successfully speculated packets before this batch */
+			memcpy(to_next, from, last_spec * sizeof(from[0]));
+			from += last_spec;
+			to_next += last_spec;
+			held += last_spec;
+			last_spec = 0;
+
+			/* Process each packet in current batch individually */
+			for (size_t i = 0; i < vl; i++) {
+				if (next_index == next_hops[i]) {
+					*to_next++ = from[i];
+					held++;
+				} else {
+					rte_node_enqueue_x1(graph, node, next_hops[i], from[i]);
+				}
+			}
+
+			from += vl;
+		} else {
+			last_spec += vl;
+		}
+
+		pkts += vl;
+		n_left_from -= vl;
+	}
+
+	/* Handle successfully speculated packets */
+	if (likely(last_spec == nb_objs)) {
+		rte_node_next_stream_move(graph, node, next_index);
+		return nb_objs;
+	}
+
+	held += last_spec;
+	memcpy(to_next, from, last_spec * sizeof(from[0]));
+	rte_node_next_stream_put(graph, node, next_index, held);
+
+	return nb_objs;
+}
+#endif
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] node: lookup with RISC-V vector extension
  2026-02-06  8:16 [PATCH v3] node: lookup with RISC-V vector extension Sun Yuechi
@ 2026-02-06  8:18 ` sunyuechi
  2026-03-28 13:53 ` sunyuechi
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 17+ messages in thread
From: sunyuechi @ 2026-02-06  8:18 UTC (permalink / raw)
  To: dev; +Cc: Zijian, Stanisław Kardach, Nithin Dabilpuram,
	Pavan Nikhilesh

v3: rte_memcpy -> memcpy


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] node: lookup with RISC-V vector extension
  2026-02-06  8:16 [PATCH v3] node: lookup with RISC-V vector extension Sun Yuechi
  2026-02-06  8:18 ` sunyuechi
@ 2026-03-28 13:53 ` sunyuechi
  2026-05-04 16:21   ` Stephen Hemminger
  2026-03-30 20:54 ` Stephen Hemminger
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
  3 siblings, 1 reply; 17+ messages in thread
From: sunyuechi @ 2026-03-28 13:53 UTC (permalink / raw)
  To: dev
  Cc: Zijian, Stanisław Kardach, Nithin Dabilpuram,
	Pavan Nikhilesh, Thomas Monjalon

On 2/6/26 4:16 PM, Sun Yuechi wrote:

> Implement ip4_lookup_node_process_vec function for RISC-V architecture
> using RISC-V Vector Extension instruction set
>
> Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
> Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>
> ---
>   doc/guides/rel_notes/release_26_03.rst |   4 +
>   lib/eal/riscv/include/rte_vect.h       |   2 +-
>   lib/node/ip4_lookup.c                  |   5 +-
>   lib/node/ip4_lookup_rvv.h              | 167 +++++++++++++++++++++++++
>   4 files changed, 176 insertions(+), 2 deletions(-)
>   create mode 100644 lib/node/ip4_lookup_rvv.h

ping


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] node: lookup with RISC-V vector extension
  2026-02-06  8:16 [PATCH v3] node: lookup with RISC-V vector extension Sun Yuechi
  2026-02-06  8:18 ` sunyuechi
  2026-03-28 13:53 ` sunyuechi
@ 2026-03-30 20:54 ` Stephen Hemminger
  2026-03-31  3:06   ` Sun Yuechi
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
  3 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2026-03-30 20:54 UTC (permalink / raw)
  To: Sun Yuechi
  Cc: dev, Zijian, Stanisław Kardach, Nithin Dabilpuram,
	Pavan Nikhilesh

On Fri,  6 Feb 2026 16:16:35 +0800
Sun Yuechi <sunyuechi@iscas.ac.cn> wrote:

> Implement ip4_lookup_node_process_vec function for RISC-V architecture
> using RISC-V Vector Extension instruction set
> 
> Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
> Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>
> ---

Since RISC-V changes do not seem to get looked at, did AI review and it
found several things that need addressing.

Review: [PATCH v3] node: lookup with RISC-V vector extension

Errors
------

1. Macro redefinition of RTE_LPM_LOOKUP_SUCCESS and
   RTE_LPM_VALID_EXT_ENTRY_BITMASK (ip4_lookup_rvv.h lines 8-9).

   ip4_lookup.c already includes <rte_lpm.h> (line 14) before
   including ip4_lookup_rvv.h. Both rte_lpm.h and this header
   #define RTE_LPM_LOOKUP_SUCCESS and RTE_LPM_VALID_EXT_ENTRY_BITMASK,
   which will produce compiler warnings for macro redefinition.

   Remove both #defines from ip4_lookup_rvv.h — the values are
   already available from rte_lpm.h.

   (The upstream rte_lpm_rvv.h has the same issue, but that file is
   included from rte_lpm.h itself, so the include ordering is
   different. In the node header the double-define is unavoidable
   without removing them.)

2. RTE_VECT_DEFAULT_SIMD_BITWIDTH change is too broad
   (rte_vect.h: SIMD_DISABLED -> SIMD_128).

   This change affects every DPDK subsystem that calls
   rte_vect_get_max_simd_bitwidth() on RISC-V, not just the node
   library. It globally enables SIMD code paths across all libraries
   and drivers that gate on this value. This should be a separate
   patch with its own justification and testing, not bundled with a
   node-library feature patch. If a RISC-V platform cannot actually
   execute 128-bit vector operations at runtime, this default would
   cause failures.

Warnings
--------

3. Duplicated LPM lookup logic instead of using rte_lpm_lookupx4().

   The NEON and SSE implementations call rte_lpm_lookupx4() which is
   already vectorized for RISC-V via rte_lpm_rvv.h upstream. The
   new rte_lpm_lookup_vec() in ip4_lookup_rvv.h reimplements the
   same tbl24/tbl8 lookup logic. While the wider LMUL (m8 vs m1)
   enables larger batch sizes, duplicating LPM internals means any
   future LPM bug fix or optimization must be applied in two places.

   Consider either:
   (a) Using rte_lpm_lookupx4() in a loop (as NEON/SSE do) with a
       scalar tail, or
   (b) Adding a variable-length bulk lookup to the LPM library
       itself (e.g., extending rte_lpm_lookup_bulk to use RVV
       internally) so the node code can call it without duplicating
       table access logic.

4. No prefetching of packet data.

   The NEON and SSE implementations prefetch both mbuf object lines
   and packet data (Ethernet + IP headers) for upcoming batches.
   This implementation has no prefetch calls at all. For large
   bursts the L1 miss penalty on the rte_pktmbuf_mtod_offset access
   in the IP extraction loop could be significant. Consider adding
   rte_prefetch0 for the next batch's packet headers.

5. Stack arrays sized for VLEN > 256 may be excessive.

   RVV_MAX_BURST is 64, giving 3 * 64 * 4 = 768 bytes of stack
   arrays (ips, res, next_hops). The comment says "can be increased
   further for VLEN > 256" but the current value already exceeds
   what any in-tree RISC-V platform uses today. 32 would be more
   conservative and still handle VLEN=256 (m8 gives 64 elements at
   e32 with VLEN=256, so 64 is correct for that). This is minor but
   worth noting for stack-constrained lcore contexts.

Info
----

6. The patch is a nice addition bringing RISC-V vector support to
   the node library. The use of vsetvl for natural tail handling
   (no scalar remainder loop needed) is a good RVV idiom.

7. The fix_spec logic uses bitwise OR accumulation across the batch
   rather than the all-equal AND chain used by NEON. Both are
   correct — the OR detects any mismatch. The NEON approach detects
   exact same next-hop for all four, while the RVV approach detects
   any difference from next_index. The RVV approach is actually
   slightly more precise since it checks against the speculated
   index rather than checking all-same.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] node: lookup with RISC-V vector extension
  2026-03-30 20:54 ` Stephen Hemminger
@ 2026-03-31  3:06   ` Sun Yuechi
  0 siblings, 0 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-03-31  3:06 UTC (permalink / raw)
  To: stephen; +Cc: dev

Hi,

I've addressed the first two points in v4:
- Removed the duplicate macro definitions from ip4_lookup_rvv.h
- Split the SIMD bitwidth change into a separate patch

For the remaining suggestions (3-5), I don't think changes are
necessary at this time. If any human reviewer feels otherwise,
I'm happy to discuss.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v4 0/2] RISC-V vector extension support
  2026-02-06  8:16 [PATCH v3] node: lookup with RISC-V vector extension Sun Yuechi
                   ` (2 preceding siblings ...)
  2026-03-30 20:54 ` Stephen Hemminger
@ 2026-03-31  3:10 ` Sun Yuechi
  2026-03-31  3:10   ` [PATCH v4 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
                     ` (5 more replies)
  3 siblings, 6 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-03-31  3:10 UTC (permalink / raw)
  To: dev; +Cc: Sun Yuechi

v4:
- Removed duplicate macro definitions of RTE_LPM_LOOKUP_SUCCESS and
  RTE_LPM_VALID_EXT_ENTRY_BITMASK
- Split SIMD bitwidth change into separate patch

Sun Yuechi (2):
  eal/riscv: set default SIMD bitwidth to 128
  node: lookup with RISC-V vector extension

 doc/guides/rel_notes/release_26_03.rst |   4 +
 lib/eal/riscv/include/rte_vect.h       |   2 +-
 lib/node/ip4_lookup.c                  |   5 +-
 lib/node/ip4_lookup_rvv.h              | 164 +++++++++++++++++++++++++
 4 files changed, 173 insertions(+), 2 deletions(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

-- 
2.53.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v4 1/2] eal/riscv: set default SIMD bitwidth to 128
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
@ 2026-03-31  3:10   ` Sun Yuechi
  2026-03-31  3:10   ` [PATCH v4 2/2] node: lookup with RISC-V vector extension Sun Yuechi
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-03-31  3:10 UTC (permalink / raw)
  To: dev; +Cc: Sun Yuechi, Stanisław Kardach

Enable vector code paths on RISC-V platforms with V extension.

Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
---
 lib/eal/riscv/include/rte_vect.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index a4357e266a..4d16082449 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
 
 typedef int32_t		xmm_t __attribute__((vector_size(16)));
 
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v4 2/2] node: lookup with RISC-V vector extension
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
  2026-03-31  3:10   ` [PATCH v4 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
@ 2026-03-31  3:10   ` Sun Yuechi
  2026-05-04  7:05   ` [PATCH v4 0/2] RISC-V vector extension support sunyuechi
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-03-31  3:10 UTC (permalink / raw)
  To: dev
  Cc: Sun Yuechi, Zijian, Nithin Dabilpuram, Pavan Nikhilesh,
	Stanisław Kardach

Implement ip4_lookup_node_process_vec function for RISC-V architecture
using RISC-V Vector Extension instruction set.

Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>
---
 doc/guides/rel_notes/release_26_03.rst |   4 +
 lib/node/ip4_lookup.c                  |   5 +-
 lib/node/ip4_lookup_rvv.h              | 164 +++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst
index 6bb18433f5..ea03c95477 100644
--- a/doc/guides/rel_notes/release_26_03.rst
+++ b/doc/guides/rel_notes/release_26_03.rst
@@ -153,6 +153,10 @@ New Features
   Added handling of the key combination Control+L
   to clear the screen before redisplaying the prompt.
 
+* **Added RISC-V vector extension support for node library.**
+
+  * Added vectorized IPv4 LPM lookup implementation for the node library
+    using RISC-V Vector Extension instruction set.
 
 Removed Items
 -------------
diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
index f6db3219f0..42db993142 100644
--- a/lib/node/ip4_lookup.c
+++ b/lib/node/ip4_lookup.c
@@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "ip4_lookup_rvv.h"
 #endif
 
 static uint16_t
@@ -208,7 +210,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
 	IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket];
 	IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn;
 
-#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
+#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
+	(defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V))
 	if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
 		node->process = ip4_lookup_node_process_vec;
 #endif
diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
new file mode 100644
index 0000000000..0d9db2e19b
--- /dev/null
+++ b/lib/node/ip4_lookup_rvv.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
+ */
+
+#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
+#define __INCLUDE_IP4_LOOKUP_RVV_H__
+
+static __rte_always_inline vuint32m8_t
+bswap32_vec(vuint32m8_t v, size_t vl)
+{
+	vuint32m8_t low16 = __riscv_vor_vv_u32m8(
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl),
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl),
+		vl);
+
+	vuint32m8_t high16 = __riscv_vor_vv_u32m8(
+		__riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl),
+		__riscv_vsrl_vx_u32m8(v, 24, vl),
+		vl);
+
+	return __riscv_vor_vv_u32m8(low16, high16, vl);
+}
+
+static __rte_always_inline void
+rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+			uint32_t *hop, size_t vl, uint32_t defv)
+{
+	/* Load IP addresses (network byte order) */
+	vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
+
+	vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
+			__riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
+
+	vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
+		(const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
+
+	vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
+		RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+	vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
+		__riscv_vadd_vv_u32m8(
+			__riscv_vsll_vx_u32m8(
+				__riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl),
+			__riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
+		2, vl);
+
+	vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
+		mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);
+
+	vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl);
+	mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl);
+
+	vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
+
+	__riscv_vse32_v_u32m8(hop, vnext_hop, vl);
+}
+
+/* Can be increased further for VLEN > 256 */
+#define RVV_MAX_BURST 64U
+
+static uint16_t
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+	struct rte_mbuf **pkts;
+	struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
+	const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
+	rte_edge_t next_index;
+	void **to_next, **from;
+	uint16_t last_spec = 0;
+	uint16_t n_left_from;
+	uint16_t held = 0;
+	uint32_t drop_nh;
+
+	/* Temporary arrays for batch processing */
+	uint32_t ips[RVV_MAX_BURST];
+	uint32_t res[RVV_MAX_BURST];
+	rte_edge_t next_hops[RVV_MAX_BURST];
+
+	/* Speculative next */
+	next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
+	/* Drop node */
+	drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
+
+	pkts = (struct rte_mbuf **)objs;
+	from = objs;
+	n_left_from = nb_objs;
+
+	/* Get stream for the speculated next node */
+	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+
+	while (n_left_from > 0) {
+		rte_edge_t fix_spec = 0;
+
+		size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST));
+
+		/* Extract IP addresses and metadata from current batch */
+		for (size_t i = 0; i < vl; i++) {
+			struct rte_ipv4_hdr *ipv4_hdr =
+				rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *,
+						sizeof(struct rte_ether_hdr));
+			ips[i] = ipv4_hdr->dst_addr;
+			node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum;
+			node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live;
+		}
+
+		/* Perform LPM lookup */
+		rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
+
+		for (size_t i = 0; i < vl; i++) {
+			/* Update statistics */
+			if ((res[i] >> 16) == (drop_nh >> 16))
+				NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
+
+			/* Extract next hop and next node */
+			node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
+			next_hops[i] = res[i] >> 16;
+
+			/* Check speculation */
+			fix_spec |= (next_index ^ next_hops[i]);
+		}
+
+		if (unlikely(fix_spec)) {
+			/* Copy successfully speculated packets before this batch */
+			memcpy(to_next, from, last_spec * sizeof(from[0]));
+			from += last_spec;
+			to_next += last_spec;
+			held += last_spec;
+			last_spec = 0;
+
+			/* Process each packet in current batch individually */
+			for (size_t i = 0; i < vl; i++) {
+				if (next_index == next_hops[i]) {
+					*to_next++ = from[i];
+					held++;
+				} else {
+					rte_node_enqueue_x1(graph, node, next_hops[i], from[i]);
+				}
+			}
+
+			from += vl;
+		} else {
+			last_spec += vl;
+		}
+
+		pkts += vl;
+		n_left_from -= vl;
+	}
+
+	/* Handle successfully speculated packets */
+	if (likely(last_spec == nb_objs)) {
+		rte_node_next_stream_move(graph, node, next_index);
+		return nb_objs;
+	}
+
+	held += last_spec;
+	memcpy(to_next, from, last_spec * sizeof(from[0]));
+	rte_node_next_stream_put(graph, node, next_index, held);
+
+	return nb_objs;
+}
+#endif
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v4 0/2] RISC-V vector extension support
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
  2026-03-31  3:10   ` [PATCH v4 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
  2026-03-31  3:10   ` [PATCH v4 2/2] node: lookup with RISC-V vector extension Sun Yuechi
@ 2026-05-04  7:05   ` sunyuechi
  2026-05-05  6:21   ` [PATCH v5 " Sun Yuechi
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: sunyuechi @ 2026-05-04  7:05 UTC (permalink / raw)
  To: dev

On 3/31/26 11:10 AM, Sun Yuechi wrote:

> v4:
> - Removed duplicate macro definitions of RTE_LPM_LOOKUP_SUCCESS and
>    RTE_LPM_VALID_EXT_ENTRY_BITMASK
> - Split SIMD bitwidth change into separate patch
>
> Sun Yuechi (2):
>    eal/riscv: set default SIMD bitwidth to 128
>    node: lookup with RISC-V vector extension
>
>   doc/guides/rel_notes/release_26_03.rst |   4 +
>   lib/eal/riscv/include/rte_vect.h       |   2 +-
>   lib/node/ip4_lookup.c                  |   5 +-
>   lib/node/ip4_lookup_rvv.h              | 164 +++++++++++++++++++++++++
>   4 files changed, 173 insertions(+), 2 deletions(-)
>   create mode 100644 lib/node/ip4_lookup_rvv.h
ping


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] node: lookup with RISC-V vector extension
  2026-03-28 13:53 ` sunyuechi
@ 2026-05-04 16:21   ` Stephen Hemminger
  2026-05-05  6:20     ` Sun Yuechi
  0 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2026-05-04 16:21 UTC (permalink / raw)
  To: sunyuechi
  Cc: dev, Zijian, Stanisław Kardach, Nithin Dabilpuram,
	Pavan Nikhilesh, Thomas Monjalon

On Sat, 28 Mar 2026 21:53:27 +0800
sunyuechi <sunyuechi@iscas.ac.cn> wrote:

> On 2/6/26 4:16 PM, Sun Yuechi wrote:
> 
> > Implement ip4_lookup_node_process_vec function for RISC-V architecture
> > using RISC-V Vector Extension instruction set
> >
> > Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
> > Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>
> > ---
> >   doc/guides/rel_notes/release_26_03.rst |   4 +
> >   lib/eal/riscv/include/rte_vect.h       |   2 +-
> >   lib/node/ip4_lookup.c                  |   5 +-
> >   lib/node/ip4_lookup_rvv.h              | 167 +++++++++++++++++++++++++
> >   4 files changed, 176 insertions(+), 2 deletions(-)
> >   create mode 100644 lib/node/ip4_lookup_rvv.h  
> 
> ping
> 

There as no ack yet.
Ran it through AI for review and it had lots of feedback.
The only item worth noting is the naming of rte_lpm_lookup_vec
which should match other arch.
---


This series adds RISC-V Vector Extension (RVV) support to the IPv4 LPM
lookup node.  Patch 1/2 is a clean one-liner enabling the default SIMD
bitwidth on RISC-V; cross-checked against the arm/ppc/x86 conventions
in lib/eal/*/include/rte_vect.h, the change is correct and consistent
with how those architectures handle the same define.  No findings on
patch 1/2.

Findings on patch 2/2 below.


[PATCH v4 2/2] node: lookup with RISC-V vector extension
========================================================

Warnings
--------

* lib/node/ip4_lookup_rvv.h:14: the static inline helper is named
  rte_lpm_lookup_vec().  The rte_lpm_* prefix is reserved for the LPM
  library's API namespace (see lib/lpm/rte_lpm*.h).  Defining a static
  inline with that prefix in a node-library private header is
  misleading -- it implies a public LPM API where there is none.

  For comparison, the SVE bulk lookup at lib/lpm/rte_lpm_sve.h:16 uses
  __rte_lpm_lookup_vec (double underscore, internal) and lives in the
  LPM library proper, exposed through rte_lpm.h's #undef/#define
  rte_lpm_lookup_bulk override.  The NEON and SSE node paths
  (lib/node/ip4_lookup_neon.h:114, lib/node/ip4_lookup_sse.h:116) do
  not define their own helpers at all -- they call the public
  rte_lpm_lookupx4() from the LPM library.

  Other static helpers in lib/node/ use the node_* prefix
  (e.g. node_mbuf_priv1, node_mbuf_priv2 in lib/node/node_private.h).

  Two suggested options, in order of preference:

  1. Move the bulk lookup into lib/lpm/rte_lpm_rvv.h as
     __rte_lpm_lookup_vec() with the same signature pattern as the SVE
     version, and have lib/lpm/rte_lpm.h conditionally override
     rte_lpm_lookup_bulk for the RVV case.  The node path then becomes
     a plain rte_lpm_lookup_bulk() call and the implementation is
     reusable by other consumers (FIB, l3fwd, etc.).

  2. Keep the helper local to the node header but rename it -- e.g.
     ip4_lookup_rvv_lpm_lookup() or just lpm_lookup_vec() -- so it
     does not occupy the rte_lpm_* namespace.

Info
----

* lib/node/ip4_lookup_rvv.h: unlike ip4_lookup_neon.h, the RVV path
  does no prefetching of upcoming mbufs or packet headers.  NEON
  prefetches both the next-line of objs[] and the next four packets'
  L3 headers.  On RISC-V cores with hardware prefetchers this may be
  a wash, but on cores without one the per-iteration vl-wide gather
  over pkts[i] and the IPv4 header reads may stall.  Worth measuring.

* lib/node/ip4_lookup_rvv.h: the per-mbuf metadata is written in two
  passes -- cksum/ttl in the first loop, nh in the second.  The NEON
  path packs all three into a uint64_t and writes once via
  node_mbuf_priv1(mbuf, dyn)->u = ...; (the overload struct is laid
  out as { uint16_t nh; uint16_t ttl; uint32_t cksum; } in
  rte_node_mbuf_dynfield.h:48).  A single 64-bit store per mbuf would
  halve the store traffic to the dynfield region.

* The release-notes entry is correctly placed under "New Features".
  Consider mentioning the dependency on RTE_RISCV_FEATURE_V (i.e.
  that this only activates when toolchain/-march reports the V
  extension), so users on non-V RISC-V builds know why they don't
  see a perf change.


Notes from cross-checking (no action needed)
--------------------------------------------

- The bswap32_vec() open-coded byte reversal is correct for the
  little-endian RISC-V configuration DPDK targets (rte_byteorder.h
  defines RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN unconditionally for
  riscv).

- The byte-offset arithmetic for vluxei32 into tbl24 and tbl8 matches
  the scalar lookup in lib/lpm/rte_lpm.h:295-320 (entry index *
  sizeof(uint32_t) via <<2; tbl8 group_idx * 256 + ip_low).  The
  static_assert at rte_lpm.h:121 guarantees
  sizeof(rte_lpm_tbl_entry) == 4.

- The mu (mask-undisturbed) policy on the second vluxei32 correctly
  mirrors the scalar's "only follow tbl8 when VALID_EXT bit is set",
  and per the V spec masked-off elements raise no exceptions, so the
  unconditional pre-computation of vtbl8_index for masked-off lanes
  is safe even when those lanes contain garbage offsets.

- vbool4_t is the correct mask type for SEW=32, LMUL=8 (ratio 4).

- RVV_MAX_BURST=64 with the outer `while (n_left_from > 0)` loop
  correctly chunks the full nb_objs (up to RTE_GRAPH_BURST_SIZE=256)
  through repeated vsetvl calls.

- The miss-counting heuristic `(res[i] >> 16) == (drop_nh >> 16)`
  matches what NEON does at lib/node/ip4_lookup_neon.h:117-120; it
  diverges from the scalar's "rc != 0" only when a user's LPM table
  legitimately resolves to the drop next-node, which is the same
  behavior already present in the existing vector paths.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] node: lookup with RISC-V vector extension
  2026-05-04 16:21   ` Stephen Hemminger
@ 2026-05-05  6:20     ` Sun Yuechi
  0 siblings, 0 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-05-05  6:20 UTC (permalink / raw)
  To: stephen; +Cc: dev

Thanks. I have renamed rte_lpm_lookup_vec to ip4_lookup_rvv_lpm_lookup in v5.

Thanks,
Yuechi


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v5 0/2] RISC-V vector extension support
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
                     ` (2 preceding siblings ...)
  2026-05-04  7:05   ` [PATCH v4 0/2] RISC-V vector extension support sunyuechi
@ 2026-05-05  6:21   ` Sun Yuechi
  2026-06-02 16:19     ` Thomas Monjalon
  2026-05-05  6:21   ` [PATCH v5 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
  2026-05-05  6:21   ` [PATCH v5 2/2] node: lookup with RISC-V vector extension Sun Yuechi
  5 siblings, 1 reply; 17+ messages in thread
From: Sun Yuechi @ 2026-05-05  6:21 UTC (permalink / raw)
  To: dev; +Cc: Sun Yuechi

v5:
  Rename rte_lpm_lookup_vec to ip4_lookup_rvv_lpm_lookup to avoid the
  rte_lpm_ namespace reserved for the LPM library API.

v4:
  Removed duplicate macro definitions of RTE_LPM_LOOKUP_SUCCESS and
  RTE_LPM_VALID_EXT_ENTRY_BITMASK.
  Split SIMD bitwidth change into separate patch.

Sun Yuechi (2):
  eal/riscv: set default SIMD bitwidth to 128
  node: lookup with RISC-V vector extension

 doc/guides/rel_notes/release_26_03.rst |   4 +
 lib/eal/riscv/include/rte_vect.h       |   2 +-
 lib/node/ip4_lookup.c                  |   5 +-
 lib/node/ip4_lookup_rvv.h              | 164 +++++++++++++++++++++++++
 4 files changed, 173 insertions(+), 2 deletions(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

-- 
2.54.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v5 1/2] eal/riscv: set default SIMD bitwidth to 128
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
                     ` (3 preceding siblings ...)
  2026-05-05  6:21   ` [PATCH v5 " Sun Yuechi
@ 2026-05-05  6:21   ` Sun Yuechi
  2026-05-05  6:21   ` [PATCH v5 2/2] node: lookup with RISC-V vector extension Sun Yuechi
  5 siblings, 0 replies; 17+ messages in thread
From: Sun Yuechi @ 2026-05-05  6:21 UTC (permalink / raw)
  To: dev; +Cc: Sun Yuechi, Stanisław Kardach

Enable vector code paths on RISC-V platforms with V extension.

Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
---
 lib/eal/riscv/include/rte_vect.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index a4357e266a..4d16082449 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
 
 typedef int32_t		xmm_t __attribute__((vector_size(16)));
 
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v5 2/2] node: lookup with RISC-V vector extension
  2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
                     ` (4 preceding siblings ...)
  2026-05-05  6:21   ` [PATCH v5 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
@ 2026-05-05  6:21   ` Sun Yuechi
  2026-06-01 21:01     ` Thomas Monjalon
  5 siblings, 1 reply; 17+ messages in thread
From: Sun Yuechi @ 2026-05-05  6:21 UTC (permalink / raw)
  To: dev
  Cc: Sun Yuechi, Zijian, Nithin Dabilpuram, Pavan Nikhilesh,
	Stanisław Kardach

Implement ip4_lookup_node_process_vec function for RISC-V architecture
using RISC-V Vector Extension instruction set.

Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>
---
 doc/guides/rel_notes/release_26_03.rst |   4 +
 lib/node/ip4_lookup.c                  |   5 +-
 lib/node/ip4_lookup_rvv.h              | 164 +++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst
index 6bb18433f5..ea03c95477 100644
--- a/doc/guides/rel_notes/release_26_03.rst
+++ b/doc/guides/rel_notes/release_26_03.rst
@@ -153,6 +153,10 @@ New Features
   Added handling of the key combination Control+L
   to clear the screen before redisplaying the prompt.
 
+* **Added RISC-V vector extension support for node library.**
+
+  * Added vectorized IPv4 LPM lookup implementation for the node library
+    using RISC-V Vector Extension instruction set.
 
 Removed Items
 -------------
diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
index f6db3219f0..42db993142 100644
--- a/lib/node/ip4_lookup.c
+++ b/lib/node/ip4_lookup.c
@@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "ip4_lookup_rvv.h"
 #endif
 
 static uint16_t
@@ -208,7 +210,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
 	IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket];
 	IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn;
 
-#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
+#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
+	(defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V))
 	if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
 		node->process = ip4_lookup_node_process_vec;
 #endif
diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
new file mode 100644
index 0000000000..d03b78e111
--- /dev/null
+++ b/lib/node/ip4_lookup_rvv.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
+ */
+
+#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
+#define __INCLUDE_IP4_LOOKUP_RVV_H__
+
+static __rte_always_inline vuint32m8_t
+bswap32_vec(vuint32m8_t v, size_t vl)
+{
+	vuint32m8_t low16 = __riscv_vor_vv_u32m8(
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl),
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl),
+		vl);
+
+	vuint32m8_t high16 = __riscv_vor_vv_u32m8(
+		__riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl),
+		__riscv_vsrl_vx_u32m8(v, 24, vl),
+		vl);
+
+	return __riscv_vor_vv_u32m8(low16, high16, vl);
+}
+
+static __rte_always_inline void
+ip4_lookup_rvv_lpm_lookup(const struct rte_lpm *lpm, const uint32_t *ips,
+			uint32_t *hop, size_t vl, uint32_t defv)
+{
+	/* Load IP addresses (network byte order) */
+	vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
+
+	vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
+			__riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
+
+	vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
+		(const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
+
+	vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
+		RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+	vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
+		__riscv_vadd_vv_u32m8(
+			__riscv_vsll_vx_u32m8(
+				__riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl),
+			__riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
+		2, vl);
+
+	vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
+		mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);
+
+	vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl);
+	mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl);
+
+	vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
+
+	__riscv_vse32_v_u32m8(hop, vnext_hop, vl);
+}
+
+/* Can be increased further for VLEN > 256 */
+#define RVV_MAX_BURST 64U
+
+static uint16_t
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+	struct rte_mbuf **pkts;
+	struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
+	const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
+	rte_edge_t next_index;
+	void **to_next, **from;
+	uint16_t last_spec = 0;
+	uint16_t n_left_from;
+	uint16_t held = 0;
+	uint32_t drop_nh;
+
+	/* Temporary arrays for batch processing */
+	uint32_t ips[RVV_MAX_BURST];
+	uint32_t res[RVV_MAX_BURST];
+	rte_edge_t next_hops[RVV_MAX_BURST];
+
+	/* Speculative next */
+	next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
+	/* Drop node */
+	drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
+
+	pkts = (struct rte_mbuf **)objs;
+	from = objs;
+	n_left_from = nb_objs;
+
+	/* Get stream for the speculated next node */
+	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+
+	while (n_left_from > 0) {
+		rte_edge_t fix_spec = 0;
+
+		size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST));
+
+		/* Extract IP addresses and metadata from current batch */
+		for (size_t i = 0; i < vl; i++) {
+			struct rte_ipv4_hdr *ipv4_hdr =
+				rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *,
+						sizeof(struct rte_ether_hdr));
+			ips[i] = ipv4_hdr->dst_addr;
+			node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum;
+			node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live;
+		}
+
+		/* Perform LPM lookup */
+		ip4_lookup_rvv_lpm_lookup(lpm, ips, res, vl, drop_nh);
+
+		for (size_t i = 0; i < vl; i++) {
+			/* Update statistics */
+			if ((res[i] >> 16) == (drop_nh >> 16))
+				NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
+
+			/* Extract next hop and next node */
+			node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
+			next_hops[i] = res[i] >> 16;
+
+			/* Check speculation */
+			fix_spec |= (next_index ^ next_hops[i]);
+		}
+
+		if (unlikely(fix_spec)) {
+			/* Copy successfully speculated packets before this batch */
+			memcpy(to_next, from, last_spec * sizeof(from[0]));
+			from += last_spec;
+			to_next += last_spec;
+			held += last_spec;
+			last_spec = 0;
+
+			/* Process each packet in current batch individually */
+			for (size_t i = 0; i < vl; i++) {
+				if (next_index == next_hops[i]) {
+					*to_next++ = from[i];
+					held++;
+				} else {
+					rte_node_enqueue_x1(graph, node, next_hops[i], from[i]);
+				}
+			}
+
+			from += vl;
+		} else {
+			last_spec += vl;
+		}
+
+		pkts += vl;
+		n_left_from -= vl;
+	}
+
+	/* Handle successfully speculated packets */
+	if (likely(last_spec == nb_objs)) {
+		rte_node_next_stream_move(graph, node, next_index);
+		return nb_objs;
+	}
+
+	held += last_spec;
+	memcpy(to_next, from, last_spec * sizeof(from[0]));
+	rte_node_next_stream_put(graph, node, next_index, held);
+
+	return nb_objs;
+}
+#endif
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v5 2/2] node: lookup with RISC-V vector extension
  2026-05-05  6:21   ` [PATCH v5 2/2] node: lookup with RISC-V vector extension Sun Yuechi
@ 2026-06-01 21:01     ` Thomas Monjalon
  2026-06-02  5:21       ` sunyuechi
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Monjalon @ 2026-06-01 21:01 UTC (permalink / raw)
  To: Zijian
  Cc: dev, Sun Yuechi, Nithin Dabilpuram, Pavan Nikhilesh,
	Stanisław Kardach

Hello,

05/05/2026 08:21, Sun Yuechi:
> Implement ip4_lookup_node_process_vec function for RISC-V architecture
> using RISC-V Vector Extension instruction set.
> 
> Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
> Signed-off-by: Zijian <zijian.oerv@isrc.iscas.ac.cn>

Signed-off confirms we can apply the DCO rule:
https://en.wikipedia.org/wiki/Developer_Certificate_of_Origin

For this, we require the (quite) full name of the contributor.
Here I feel Zijian is not the complete name.
Please could you help by telling how we should identify you
with english alphabet?
Thank you



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v5 2/2] node: lookup with RISC-V vector extension
  2026-06-01 21:01     ` Thomas Monjalon
@ 2026-06-02  5:21       ` sunyuechi
  0 siblings, 0 replies; 17+ messages in thread
From: sunyuechi @ 2026-06-02  5:21 UTC (permalink / raw)
  To: Thomas Monjalon, Zijian
  Cc: dev, Nithin Dabilpuram, Pavan Nikhilesh, Stanisław Kardach

 > On 6/2/26 5:01 AM, Thomas Monjalon wrote:

Hi, his full name is Li Zijian

Signed-off-by: Li Zijian <zijian.oerv@isrc.iscas.ac.cn>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v5 0/2] RISC-V vector extension support
  2026-05-05  6:21   ` [PATCH v5 " Sun Yuechi
@ 2026-06-02 16:19     ` Thomas Monjalon
  0 siblings, 0 replies; 17+ messages in thread
From: Thomas Monjalon @ 2026-06-02 16:19 UTC (permalink / raw)
  To: Sun Yuechi; +Cc: dev

> Sun Yuechi (2):
>   eal/riscv: set default SIMD bitwidth to 128
>   node: lookup with RISC-V vector extension

Applied, thanks.




^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2026-06-02 16:19 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-06  8:16 [PATCH v3] node: lookup with RISC-V vector extension Sun Yuechi
2026-02-06  8:18 ` sunyuechi
2026-03-28 13:53 ` sunyuechi
2026-05-04 16:21   ` Stephen Hemminger
2026-05-05  6:20     ` Sun Yuechi
2026-03-30 20:54 ` Stephen Hemminger
2026-03-31  3:06   ` Sun Yuechi
2026-03-31  3:10 ` [PATCH v4 0/2] RISC-V vector extension support Sun Yuechi
2026-03-31  3:10   ` [PATCH v4 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
2026-03-31  3:10   ` [PATCH v4 2/2] node: lookup with RISC-V vector extension Sun Yuechi
2026-05-04  7:05   ` [PATCH v4 0/2] RISC-V vector extension support sunyuechi
2026-05-05  6:21   ` [PATCH v5 " Sun Yuechi
2026-06-02 16:19     ` Thomas Monjalon
2026-05-05  6:21   ` [PATCH v5 1/2] eal/riscv: set default SIMD bitwidth to 128 Sun Yuechi
2026-05-05  6:21   ` [PATCH v5 2/2] node: lookup with RISC-V vector extension Sun Yuechi
2026-06-01 21:01     ` Thomas Monjalon
2026-06-02  5:21       ` sunyuechi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox