DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 05/24] bpf/validate: introduce debugging interface
From: Marat Khalili @ 2026-06-23 14:31 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev
In-Reply-To: <20260623143215.95318-1-marat.khalili@huawei.com>

Introduce debugging interface for BPF validator. New API lets one
observe evaluation of the validated BPF program, including step
evaluation, setting break- and catchpoints, inspecting possible jumps
and memory accesses in current state, as well as formatting current
state elements for the user. It can be used to build both automated
tests and interactive validation debuggers without tight coupling to a
specific validator implementation.

Signed-off-by: Marat Khalili <marat.khalili@huawei.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
---
 doc/guides/prog_guide/bpf_lib.rst      |  31 ++
 doc/guides/rel_notes/release_26_07.rst |  10 +-
 lib/bpf/bpf_validate.c                 | 448 ++++++++++++++++-
 lib/bpf/bpf_validate.h                 |  60 +++
 lib/bpf/bpf_validate_debug.c           | 659 +++++++++++++++++++++++++
 lib/bpf/bpf_validate_debug.h           |  86 ++++
 lib/bpf/bpf_value_set.c                | 403 +++++++++++++++
 lib/bpf/bpf_value_set.h                | 126 +++++
 lib/bpf/meson.build                    |   9 +-
 lib/bpf/rte_bpf.h                      |   4 +
 lib/bpf/rte_bpf_validate_debug.h       | 375 ++++++++++++++
 11 files changed, 2205 insertions(+), 6 deletions(-)
 create mode 100644 lib/bpf/bpf_validate.h
 create mode 100644 lib/bpf/bpf_validate_debug.c
 create mode 100644 lib/bpf/bpf_validate_debug.h
 create mode 100644 lib/bpf/bpf_value_set.c
 create mode 100644 lib/bpf/bpf_value_set.h
 create mode 100644 lib/bpf/rte_bpf_validate_debug.h

diff --git a/doc/guides/prog_guide/bpf_lib.rst b/doc/guides/prog_guide/bpf_lib.rst
index ed07a9f9a2c0..b5e52c097cb1 100644
--- a/doc/guides/prog_guide/bpf_lib.rst
+++ b/doc/guides/prog_guide/bpf_lib.rst
@@ -116,6 +116,37 @@ For example, ``(BPF_IND | BPF_W | BPF_LD)`` means:
 and ``R1-R5`` were scratched.
 
 
+Validation Debugging
+--------------------
+
+The DPDK BPF library includes a validation debugging API designed primarily for
+writing comprehensive unit tests for the eBPF verifier. It allows developers to
+introspect the abstract interpretation process step-by-step to guarantee that
+the verifier correctly models the semantics of eBPF instructions.
+
+The validation debugging API operates using a gdb-like approach:
+
+1.  **Initialization:** Create a debug session using
+    ``rte_bpf_validate_debug_create()`` and pass it to the loader via the
+    ``debug`` field in ``struct rte_bpf_prm_ex``.
+2.  **Breakpoints and Catchpoints:** Before loading, use
+    ``rte_bpf_validate_debug_break()`` or ``rte_bpf_validate_debug_catch()``
+    to register callback functions that trigger at specific instruction indices
+    (program counters) or upon specific validation events.
+3.  **State Introspection:** Within the callbacks, the API provides functions
+    like ``rte_bpf_validate_debug_can_access()``,
+    ``rte_bpf_validate_debug_may_jump()``, and various formatting functions
+    to safely inspect the verifier's internal belief about register bounds
+    and memory states at that specific execution point.
+
+When adding a test for a new eBPF instruction or fixing a validator bug,
+developers should utilize the harness provided in
+``app/test/test_bpf_validate.c``. This harness encapsulates the debugging API,
+allowing you to define the expected abstract domains (signed and unsigned
+intervals) for registers before and after a tested instruction, generating
+the necessary eBPF bytecode and breakpoints automatically.
+
+
 Not currently supported eBPF features
 -------------------------------------
 
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 0b1cac3e0d2f..8471966a4992 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -164,12 +164,20 @@ New Features
     for installing already loaded BPF programs as port callbacks
     (as opposed to loading them directly from ELF files).
 
+* **Added BPF validation debugging API.**
+
+  * Introduced a new set of APIs (prefixed with ``rte_bpf_validate_debug_``) to
+    introspect the BPF validator. This provides a mechanism to set breakpoints
+    or catchpoints during validation and inspect the verifier's internal state
+    (such as tracked register bounds). This API is crucial primarily for writing
+    comprehensive tests for the validator, but also serves as a foundation for a
+    future interactive eBPF validation debugger.
+
 * **Added AI review helpers.**
 
   Added AGENTS.md file for AI review
   and supporting scripts to review patches and documentation.
 
-
 Removed Items
 -------------
 
diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c
index 362d00c77095..f3f462920a3d 100644
--- a/lib/bpf/bpf_validate.c
+++ b/lib/bpf/bpf_validate.c
@@ -9,9 +9,13 @@
 #include <stdint.h>
 #include <inttypes.h>
 
+#include <rte_bpf_validate_debug.h>
 #include <rte_common.h>
 
 #include "bpf_impl.h"
+#include "bpf_validate.h"
+#include "bpf_validate_debug.h"
+#include "bpf_value_set.h"
 
 #define BPF_ARG_PTR_STACK RTE_BPF_ARG_RESERVED
 
@@ -92,6 +96,7 @@ struct bpf_verifier {
 	struct inst_node *evin;
 	struct evst_pool evst_sr_pool; /* for evst save/restore */
 	struct evst_pool evst_tp_pool; /* for evst track/prune */
+	struct rte_bpf_validate_debug *debug;
 };
 
 struct bpf_ins_check {
@@ -118,6 +123,409 @@ struct bpf_ins_check {
 /* For LD_IND R6 is an implicit CTX register. */
 #define	IND_SRC_REGS	(WRT_REGS ^ 1 << EBPF_REG_6)
 
+/*
+ * Debugging internal interface and helpers.
+ */
+
+static bool
+reg_val_range_is_valid(const struct bpf_reg_val *rv)
+{
+	if (rv->v.type == RTE_BPF_ARG_UNDEF)
+		return true;
+
+	if (rv->s.min > rv->s.max)
+		return false;
+
+	if (rv->u.min > rv->u.max)
+		return false;
+
+	/* If one of the ranges does not change sign, the other should match. */
+	if (rv->s.min >= 0 || rv->s.max < 0 ||
+			rv->u.min > INT64_MAX || rv->u.max <= INT64_MAX)
+		return rv->u.min == (uint64_t)rv->s.min &&
+			rv->u.max == (uint64_t)rv->s.max;
+
+	return true;
+}
+
+int
+__rte_bpf_validate_state_is_valid(const struct bpf_verifier *verifier)
+{
+	const struct bpf_eval_state *const st = verifier->evst;
+
+	for (int reg = 0; reg != RTE_DIM(st->rv); ++reg)
+		if (!reg_val_range_is_valid(st->rv + reg))
+			return false;
+
+	for (int var = 0; var != RTE_DIM(st->sv); ++var)
+		if (!reg_val_range_is_valid(st->sv + var))
+			return false;
+
+	return true;
+}
+
+int
+__rte_bpf_validate_can_access(const struct bpf_verifier *verifier,
+	const struct ebpf_insn *access, uint64_t off64)
+{
+	const struct bpf_eval_state *const st = verifier->evst;
+	const struct bpf_reg_val *rv;
+	/* Set of accessed byte offsets relative to memory area base. */
+	struct value_set access_set;
+	uint32_t opsz;
+
+	switch (BPF_CLASS(access->code)) {
+	case BPF_LDX:
+		rv = &st->rv[access->src_reg];
+		if (rv->v.type == BPF_ARG_PTR_STACK)
+			/* Not supporting stack access queries yet. */
+			return -ENOTSUP;
+		break;
+	case BPF_ST:
+		rv = &st->rv[access->dst_reg];
+		break;
+	case BPF_STX:
+		rv = &st->rv[access->dst_reg];
+		if (st->rv[access->src_reg].v.type == RTE_BPF_ARG_UNDEF)
+			return false;
+		break;
+	default:
+		return -ENOTSUP;
+	}
+
+	if (!RTE_BPF_ARG_PTR_TYPE(rv->v.type) || rv->v.size == 0)
+		return false;
+
+	access_set = value_set_from_pair(rv->s.min, rv->s.max, rv->u.min, rv->u.max);
+	value_set_translate(&access_set, off64);
+	opsz = bpf_size(BPF_SIZE(access->code));
+	value_set_add_contiguous(&access_set, 0, opsz - 1);
+
+	return value_set_is_covered_by_contiguous(&access_set, 0, rv->v.size - 1);
+}
+
+/* Return true if instruction `code` is supported by `may_jump`. */
+static bool
+may_jump_code_is_supported(uint8_t code)
+{
+	if (BPF_CLASS(code) != BPF_JMP)
+		return false;
+
+	switch (BPF_OP(code)) {
+	case BPF_JEQ:
+	case BPF_JGT:
+	case BPF_JGE:
+	case EBPF_JNE:
+	case EBPF_JSGT:
+	case EBPF_JSGE:
+	case EBPF_JLT:
+	case EBPF_JLE:
+	case EBPF_JSLT:
+	case EBPF_JSLE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Return true if instruction `code` corresponds to a signed comparison. */
+static bool
+may_jump_code_is_signed(uint8_t code)
+{
+	switch (BPF_OP(code)) {
+	case EBPF_JSGT:
+	case EBPF_JSGE:
+	case EBPF_JSLT:
+	case EBPF_JSLE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Return true the specified jump condition _may_ be true. */
+static bool
+may_jump(uint8_t code, const struct value_set *origin,
+	const struct value_set *dst_set, const struct value_set *src_set)
+{
+	switch (BPF_OP(code)) {
+	case BPF_JEQ:
+		return value_sets_intersect(dst_set, src_set);
+	case EBPF_JNE:
+		return !(value_set_is_singleton(dst_set) &&
+			value_sets_equal(dst_set, src_set));
+	case BPF_JGT:
+	case EBPF_JSGT:
+		return !value_sets_based_less_or_equal(origin, dst_set, src_set);
+	case BPF_JGE:
+	case EBPF_JSGE:
+		return !value_sets_based_less(origin, dst_set, src_set);
+	case EBPF_JLT:
+	case EBPF_JSLT:
+		return !value_sets_based_less_or_equal(origin, src_set, dst_set);
+	case EBPF_JSLE:
+	case EBPF_JLE:
+		return !value_sets_based_less(origin, src_set, dst_set);
+	}
+	/* may_jump_code_is_supported should have caught this */
+	RTE_ASSERT(false);
+	return false;
+}
+
+/* Return instruction code for jump condition complement (negated result). */
+static uint8_t
+may_jump_code_complement(uint8_t code)
+{
+	switch (BPF_OP(code)) {
+	case BPF_JEQ:
+	case EBPF_JNE:
+		return code ^ BPF_JEQ ^ EBPF_JNE;
+	case BPF_JGT:
+	case EBPF_JLE:
+		return code ^ BPF_JGT ^ EBPF_JLE;
+	case BPF_JGE:
+	case EBPF_JLT:
+		return code ^ BPF_JGE ^ EBPF_JLT;
+	case EBPF_JSGT:
+	case EBPF_JSLE:
+		return code ^ EBPF_JSGT ^ EBPF_JSLE;
+	case EBPF_JSGE:
+	case EBPF_JSLT:
+		return code ^ EBPF_JSGE ^ EBPF_JSLT;
+	}
+	/* may_jump_code_is_supported should have caught this */
+	RTE_ASSERT(false);
+	return 0;
+}
+
+int
+__rte_bpf_validate_may_jump(const struct bpf_verifier *verifier,
+	const struct ebpf_insn *jump, uint64_t imm64)
+{
+	const struct bpf_eval_state *const st = verifier->evst;
+	const struct bpf_reg_val *rd, *rs;
+	struct value_set dst_set, src_set, origin;
+	int result;
+
+	if (!may_jump_code_is_supported(jump->code))
+		return -ENOTSUP;
+
+	rd = &st->rv[jump->dst_reg];
+	dst_set = (rd->v.type == RTE_BPF_ARG_UNDEF) ? value_set_full :
+		value_set_from_pair(rd->s.min, rd->s.max, rd->u.min, rd->u.max);
+
+	rs = BPF_SRC(jump->code) == BPF_X ? &st->rv[jump->src_reg] : NULL;
+	src_set = rs == NULL ? value_set_singleton((int64_t)jump->imm) :
+		rs->v.type == RTE_BPF_ARG_UNDEF ? value_set_full :
+		value_set_from_pair(rs->s.min, rs->s.max, rs->u.min, rs->u.max);
+
+	value_set_translate(&src_set, imm64);
+
+	if (RTE_BPF_ARG_PTR_TYPE(rd->v.type) &&
+			(rs != NULL && RTE_BPF_ARG_PTR_TYPE(rs->v.type)) &&
+			rd->v.size == rs->v.size) {
+		/*
+		 * Both sides are pointers with the same memory area size.
+		 * Until tracking of memory areas is implemented we will consider them
+		 * pointing to the same memory area just because of this.
+		 * In this case our value sets represent offsets from the memory area base,
+		 * which is some unknown distance from the scalar zero (NULL).
+		 * We know however that the memory area cannot cross zero address.
+		 * Thus range of origin relative to memory base starts with 1 byte gap
+		 * after the memory area and ends just before it.
+		 */
+		origin = value_set_contiguous(rd->v.size + 1, -1);
+	} else {
+		/* Scalar value of a pointer depends on the memory area base address. */
+		if (RTE_BPF_ARG_PTR_TYPE(rd->v.type))
+			value_set_add_contiguous(&dst_set, 1, UINT64_MAX - rd->v.size);
+		if (rs != NULL && RTE_BPF_ARG_PTR_TYPE(rs->v.type))
+			value_set_add_contiguous(&dst_set, 1, UINT64_MAX - rs->v.size);
+		origin = value_set_singleton(0);
+	}
+
+	if (may_jump_code_is_signed(jump->code))
+		/* Shift origin to the minimal value for signed comparisons. */
+		value_set_translate(&origin, INT64_MIN);
+
+	result = 0;
+
+	if (may_jump(jump->code, &origin, &dst_set, &src_set))
+		result |= RTE_BPF_VALIDATE_DEBUG_MAY_BE_TRUE;
+
+	if (may_jump(may_jump_code_complement(jump->code), &origin, &dst_set, &src_set))
+		result |= RTE_BPF_VALIDATE_DEBUG_MAY_BE_FALSE;
+
+	return result;
+}
+
+/* Like snprintf, but advances (except for overflow) ptr and reduces szleft. */
+__rte_format_printf(3, 4)
+static int
+buf_printf(char **ptr, ssize_t *szleft, const char *format, ...)
+{
+	va_list args;
+	int rc;
+
+	va_start(args, format);
+	rc = vsnprintf(*ptr, RTE_MAX(0, *szleft), format, args);
+	va_end(args);
+
+	if (rc > 0) {
+		*szleft -= rc;
+		if (*szleft > 0)
+			*ptr += rc;
+	}
+
+	return rc;
+}
+
+static int
+format_memory_area(char **ptr, ssize_t *szleft, const struct bpf_reg_val *rv)
+{
+	switch (rv->v.type) {
+	case RTE_BPF_ARG_RAW:
+		return 0;
+	case RTE_BPF_ARG_PTR:
+		return buf_printf(ptr, szleft, "%%buffer<%zu> + ",
+			(size_t)rv->v.size);
+	case RTE_BPF_ARG_PTR_MBUF:
+		return buf_printf(ptr, szleft, "%%mbuf<%zu, %zu> + ",
+			(size_t)rv->v.size, (size_t)rv->v.buf_size);
+	case BPF_ARG_PTR_STACK:
+		return buf_printf(ptr, szleft, "%%stack + ");
+	default:
+		return -ENOTSUP;
+	}
+}
+
+/* Format min..max interval using validate-debug API and updating ptr and szleft. */
+static int
+buf_print_interval(char **ptr, ssize_t *szleft, char format, uint64_t min, uint64_t max)
+{
+	int rc;
+
+	rc = rte_bpf_validate_debug_format_interval(*ptr, RTE_MAX(0, *szleft),
+		format, min, max);
+
+	if (rc > 0) {
+		*szleft -= rc;
+		if (*szleft > 0)
+			*ptr += rc;
+	}
+
+	return rc;
+}
+
+/* Format rv roughly as "<signed-range> INTERSECT <unsigned-hex-range>" */
+static int
+format_register_range(char **ptr, ssize_t *szleft, const struct bpf_reg_val *rv)
+{
+	int rc;
+	uint64_t expected_unsigned_min, expected_unsigned_max;
+	const bool valid = reg_val_range_is_valid(rv);
+
+	/* Print signed unless trivial. */
+	if (!valid || rv->s.min != INT64_MIN || rv->s.max != INT64_MAX) {
+		rc = buf_print_interval(ptr, szleft, 'd', rv->s.min, rv->s.max);
+		if (rc < 0)
+			return rc;
+
+		if (valid) {
+			/* Skip printing unsigned if it has expected values. */
+			if (rv->s.min >= 0 || rv->s.max < 0) {
+				expected_unsigned_min = (uint64_t)rv->s.min;
+				expected_unsigned_max = (uint64_t)rv->s.max;
+			} else {
+				expected_unsigned_min = 0;
+				expected_unsigned_max = UINT64_MAX;
+			}
+
+			if (rv->u.min == expected_unsigned_min &&
+					rv->u.max == expected_unsigned_max)
+				return 0;
+		}
+
+		rc = buf_printf(ptr, szleft, " INTERSECT ");
+		if (rc < 0)
+			return rc;
+	}
+
+	rc = buf_print_interval(ptr, szleft, 'x', rv->u.min, rv->u.max);
+	if (rc < 0)
+		return rc;
+
+	if (!valid) {
+		rc = buf_printf(ptr, szleft, " (!)");
+		if (rc < 0)
+			return rc;
+	}
+
+	return 0;
+}
+
+/* Format rv roughly as "<memory-object> + <offsets-range>" */
+static int
+format_reg_val(char *buffer, size_t bufsz, const struct bpf_reg_val *rv)
+{
+	char *ptr = buffer;
+	ssize_t szleft = bufsz;
+	int rc;
+
+	if (rv->v.type == RTE_BPF_ARG_UNDEF)
+		return snprintf(buffer, bufsz, "%%undefined");
+
+	/* Print data area info, if any. */
+	rc = format_memory_area(&ptr, &szleft, rv);
+	if (rc < 0)
+		return rc;
+
+	rc = format_register_range(&ptr, &szleft, rv);
+	if (rc < 0)
+		return rc;
+
+	/* At least one snprintf was called and added terminating zero. */
+	RTE_ASSERT(szleft < (ssize_t)bufsz);
+	--szleft;
+
+	return bufsz - szleft;
+}
+
+int
+__rte_bpf_validate_format_register_info(const struct bpf_verifier *verifier,
+	char *buffer, size_t bufsz, uint8_t reg)
+{
+	if (reg >= EBPF_REG_NUM)
+		return -EINVAL;
+
+	return format_reg_val(buffer, bufsz, &verifier->evst->rv[reg]);
+}
+
+int
+__rte_bpf_validate_format_frame_info(const struct bpf_verifier *verifier,
+	char *buffer, size_t bufsz, int32_t offset)
+{
+	if (offset % sizeof(uint64_t) != 0)
+		return -EINVAL;
+
+	if (offset >= 0 || offset < -MAX_BPF_STACK_SIZE)
+		return -ERANGE;
+
+	offset = (MAX_BPF_STACK_SIZE + offset) / sizeof(uint64_t);
+
+	return format_reg_val(buffer, bufsz, &verifier->evst->sv[offset]);
+}
+
+int32_t
+__rte_bpf_validate_get_frame_size(const struct bpf_verifier *verifier)
+{
+	if (verifier->stack_sz > INT32_MAX)
+		return -ERANGE;
+
+	return verifier->stack_sz;
+}
+
+
 /*
  * check and evaluate functions for particular instruction types.
  */
@@ -2405,7 +2813,9 @@ evaluate(struct bpf_verifier *bvf)
 	const char *err;
 	const struct ebpf_insn *ins;
 	struct inst_node *next, *node;
-	int rc = 0;
+	int prev_nb_edge;  /* branching number of the previous instruction */
+	int rc, debug_rc;
+	struct rte_bpf_validate_debug *const debug = bvf->prm->debug;
 
 	struct {
 		uint32_t nb_eval;
@@ -2439,11 +2849,15 @@ evaluate(struct bpf_verifier *bvf)
 	ins = bvf->prm->raw.ins;
 	node = bvf->in;
 	next = node;
+	prev_nb_edge = 1;
 
 	memset(&stats, 0, sizeof(stats));
 
-	while (node != NULL) {
+	rc = __rte_bpf_validate_debug_evaluate_start(debug, bvf, bvf->prm);
+	if (rc < 0)
+		return rc;
 
+	while (node != NULL) {
 		/*
 		 * current node evaluation, make sure we evaluate
 		 * each node only once.
@@ -2464,6 +2878,13 @@ evaluate(struct bpf_verifier *bvf)
 			}
 
 			if (ins_chk[op].eval != NULL) {
+				rc = __rte_bpf_validate_debug_evaluate_step(
+					debug, idx, prev_nb_edge > 1 ?
+						RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_ENTER :
+						RTE_BPF_VALIDATE_DEBUG_EVENT_STEP);
+				if (rc < 0)
+					break;
+
 				err = ins_chk[op].eval(bvf, ins + idx);
 				stats.nb_eval++;
 				if (err != NULL) {
@@ -2499,10 +2920,17 @@ evaluate(struct bpf_verifier *bvf)
 			 */
 			if (node->nb_edge > 1 && prune_eval_state(bvf, node,
 					next) == 0) {
+				rc = __rte_bpf_validate_debug_evaluate_step(
+					debug, get_node_idx(bvf, next),
+					RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_PRUNE);
+				if (rc < 0)
+					break;
+
 				next = NULL;
 				stats.nb_prune++;
 			} else {
 				next->prev_node = node;
+				prev_nb_edge = node->nb_edge;
 				node = next;
 			}
 		} else {
@@ -2511,8 +2939,18 @@ evaluate(struct bpf_verifier *bvf)
 			 * mark it's @start state as safe for future references,
 			 * and proceed with parent.
 			 */
+
+			if (prev_nb_edge != 0) {
+				rc = __rte_bpf_validate_debug_evaluate_step(
+					debug, get_node_idx(bvf, node) + 1,
+					RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_RETURN);
+				if (rc < 0)
+					break;
+			}
+
 			node->cur_edge = 0;
 			save_safe_eval_state(bvf, node);
+			prev_nb_edge = 0;
 			node = node->prev_node;
 
 			/* first node will not have prev, signalling finish */
@@ -2532,7 +2970,11 @@ evaluate(struct bpf_verifier *bvf)
 		__func__, bvf, rc,
 		stats.nb_eval, stats.nb_prune, stats.nb_save, stats.nb_restore);
 
-	return rc;
+	debug_rc = __rte_bpf_validate_debug_evaluate_finish(debug, rc);
+	rc = debug_rc < 0 ? debug_rc : rc;
+
+	/* Caller does not expect positive values. */
+	return RTE_MIN(0, rc);
 }
 
 static bool
diff --git a/lib/bpf/bpf_validate.h b/lib/bpf/bpf_validate.h
new file mode 100644
index 000000000000..9912f4fd5c4f
--- /dev/null
+++ b/lib/bpf/bpf_validate.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Huawei Technologies Co., Ltd
+ */
+
+#ifndef _BPF_VALIDATE_H_
+#define _BPF_VALIDATE_H_
+
+/**
+ * @file bpf_validate.h
+ *
+ * Internal-use headers for eBPF validation observability.
+ */
+
+#include <bpf_def.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct bpf_verifier;
+
+/*
+ * Return 1 if the verifier passes internal self-check,
+ * 0 if it fails, or a negative error code.
+ */
+int
+__rte_bpf_validate_state_is_valid(const struct bpf_verifier *verifier);
+
+/*
+ * Return 1 if the specified access instruction is valid,
+ * 0 if it is invalid, or a negative error code.
+ */
+int
+__rte_bpf_validate_can_access(const struct bpf_verifier *verifier,
+	const struct ebpf_insn *access, uint64_t off64);
+
+/* Get possible truth values of the specified jump condition. */
+int
+__rte_bpf_validate_may_jump(const struct bpf_verifier *verifier,
+	const struct ebpf_insn *jump, uint64_t imm64);
+
+/* Format known information about the register for the user. */
+int
+__rte_bpf_validate_format_register_info(const struct bpf_verifier *verifier,
+	char *buffer, size_t bufsz, uint8_t reg);
+
+/* Format known information about the frame location for the user. */
+int
+__rte_bpf_validate_format_frame_info(const struct bpf_verifier *verifier,
+	char *buffer, size_t bufsz, int32_t offset);
+
+/* Return frame size. */
+int32_t
+__rte_bpf_validate_get_frame_size(const struct bpf_verifier *verifier);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_VALIDATE_H_ */
diff --git a/lib/bpf/bpf_validate_debug.c b/lib/bpf/bpf_validate_debug.c
new file mode 100644
index 000000000000..5d18804a74bc
--- /dev/null
+++ b/lib/bpf/bpf_validate_debug.c
@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Huawei Technologies Co., Ltd
+ */
+
+#include "bpf_impl.h"
+#include "bpf_validate.h"
+#include "bpf_validate_debug.h"
+
+#include <eal_export.h>
+#include <rte_bpf_validate_debug.h>
+#include <rte_errno.h>
+#include <rte_per_lcore.h>
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#ifndef LIST_FOREACH_SAFE
+/* We need this macro which neither Linux nor EAL for Linux include yet. */
+#define	LIST_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = LIST_FIRST((head));				\
+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+#endif
+
+#define EVENT_ARRAY_LENGTH RTE_BPF_VALIDATE_DEBUG_EVENT_END
+
+struct rte_bpf_validate_debug_point {
+	LIST_ENTRY(rte_bpf_validate_debug_point) list;
+	struct rte_bpf_validate_debug_callback callback;
+	uint32_t pc;
+};
+
+LIST_HEAD(point_list, rte_bpf_validate_debug_point);
+
+struct rte_bpf_validate_debug {
+	/* Accessible immediately after object creation. */
+	struct point_list pending_breakpoints;
+	struct point_list *catchpoint_lists;
+	struct rte_bpf_validate_debug_callback step_callback;
+
+	/* Accessible only after evaluate start. */
+	const struct bpf_verifier *verifier;
+	const struct rte_bpf_prm_ex *bpf_prm;
+	struct point_list *breakpoint_lists;
+	struct rte_bpf_validate_debug_point *last_point;
+	uint32_t pc;
+	/* Evaluate stage (only tracking `evaluate` part at the moment). */
+	bool evaluate_started;
+	bool evaluate_finished;
+	int evaluate_result;  /* Only valid if `evaluate_finished` is true. */
+};
+
+/* Point lists functions. */
+
+/* Destroy all points in the list. */
+static void
+point_list_destroy(struct point_list *point_list)
+{
+	struct rte_bpf_validate_debug_point *point, *next;
+
+	LIST_FOREACH_SAFE(point, point_list, list, next)
+		rte_bpf_validate_debug_point_destroy(point);
+
+	RTE_ASSERT(LIST_EMPTY(point_list));
+}
+
+/* Destroy all points in all lists in the array and free the array. */
+static void
+point_lists_destroy(struct point_list *point_lists, uint32_t length)
+{
+	if (point_lists == NULL)
+		return;
+
+	for (uint32_t pli = 0; pli != length; ++pli)
+		point_list_destroy(&point_lists[pli]);
+
+	free(point_lists);
+}
+
+/* Dynamically allocate and initialize an array of point lists. */
+static struct point_list *
+point_lists_create(uint32_t length)
+{
+	/* Allocate at least one element to avoid calloc(0, ...) shenanigans. */
+	struct point_list *const array =
+		calloc(RTE_MAX(1u, length), sizeof(*array));
+	if (array == NULL)
+		return NULL;
+
+	for (uint32_t pli = 0; pli != length; ++pli)
+		LIST_INIT(&array[pli]);
+
+	return array;
+}
+
+/* Move point to a different list. */
+static inline void
+point_move(struct rte_bpf_validate_debug_point *point,
+	struct point_list *destination)
+{
+	LIST_REMOVE(point, list);
+	LIST_INSERT_HEAD(destination, point, list);
+}
+
+/* Move all points between lists (the order is inverted). */
+static void
+points_move(struct point_list *source, struct point_list *destination)
+{
+	struct rte_bpf_validate_debug_point *point, *next;
+
+	LIST_FOREACH_SAFE(point, source, list, next)
+		point_move(point, destination);
+	RTE_ASSERT(LIST_EMPTY(source));
+}
+
+/* Pending breakpoints. */
+
+/* Return true if all pending breakpoints have pc less than nb_ins. */
+static bool
+debug_pending_breakpoints_are_valid(const struct rte_bpf_validate_debug *debug,
+	uint32_t nb_ins)
+{
+	const struct rte_bpf_validate_debug_point *breakpoint;
+
+	LIST_FOREACH(breakpoint, &debug->pending_breakpoints, list)
+		if (breakpoint->pc >= nb_ins)
+			return false;
+
+	return true;
+}
+
+/* Move all pending breakpoints to correct per-pc lists. */
+static void
+debug_pending_breakpoints_restore(struct rte_bpf_validate_debug *debug)
+{
+	struct rte_bpf_validate_debug_point *breakpoint, *next;
+	struct point_list breakpoints;
+
+	/* Invert the list first to preserve point order when we move them. */
+	LIST_INIT(&breakpoints);
+	points_move(&debug->pending_breakpoints, &breakpoints);
+
+	LIST_FOREACH_SAFE(breakpoint, &breakpoints, list, next)
+		point_move(breakpoint, &debug->breakpoint_lists[breakpoint->pc]);
+	RTE_ASSERT(LIST_EMPTY(&breakpoints));
+}
+
+/* Move all breakpoints from per-pc lists to the pending one. */
+static void
+debug_pending_breakpoints_save(struct rte_bpf_validate_debug *debug)
+{
+	struct point_list breakpoints;
+
+	LIST_INIT(&breakpoints);
+	for (uint32_t pc = 0; pc != debug->bpf_prm->raw.nb_ins; ++pc)
+		points_move(&debug->breakpoint_lists[pc], &breakpoints);
+
+	/* Invert the list to restore point order after we moved them. */
+	RTE_ASSERT(LIST_EMPTY(&debug->pending_breakpoints));
+	points_move(&breakpoints, &debug->pending_breakpoints);
+}
+
+/* Debug instance creation and destruction. */
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_destroy, 26.07)
+void
+rte_bpf_validate_debug_destroy(struct rte_bpf_validate_debug *debug)
+{
+	if (debug == NULL)
+		return;
+
+	/* Cannot destroy the instance during validation. */
+	RTE_ASSERT(!debug->evaluate_started);
+
+	point_lists_destroy(debug->catchpoint_lists, EVENT_ARRAY_LENGTH);
+	point_list_destroy(&debug->pending_breakpoints);
+	free(debug);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_create, 26.07)
+struct rte_bpf_validate_debug *
+rte_bpf_validate_debug_create(void)
+{
+	struct rte_bpf_validate_debug *const debug = calloc(1, sizeof(*debug));
+	if (debug == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	LIST_INIT(&debug->pending_breakpoints);
+
+	debug->catchpoint_lists = point_lists_create(EVENT_ARRAY_LENGTH);
+	if (debug->catchpoint_lists == NULL) {
+		free(debug);
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	return debug;
+}
+
+/* Managing callbacks. */
+
+/* Call back the user function with correct arguments for a point. */
+static inline int
+debug_point_call_back(struct rte_bpf_validate_debug *debug,
+	struct rte_bpf_validate_debug_point *point)
+{
+	debug->last_point = point;
+	return point->callback.fn(debug, point->callback.ctx);
+}
+
+/* Call back all points in point_list. */
+static int
+debug_points_call_back(struct rte_bpf_validate_debug *debug,
+	const struct point_list *point_list)
+{
+	struct rte_bpf_validate_debug_point *point, *next;
+	int rc = 0;
+
+	LIST_FOREACH_SAFE(point, point_list, list, next)
+		rc = rc < 0 ? rc : debug_point_call_back(debug, point);
+
+	return rc;
+}
+
+/* Call back all catchpoints for the specified event. */
+static int
+debug_send_event(struct rte_bpf_validate_debug *debug, debug_event_t event)
+{
+	return debug_points_call_back(debug, &debug->catchpoint_lists[event]);
+}
+
+/* Create new point and insert it into the specified list. */
+static struct rte_bpf_validate_debug_point *
+point_list_insert(struct point_list *point_list,
+	const struct rte_bpf_validate_debug_callback *callback, uint32_t pc)
+{
+	struct rte_bpf_validate_debug_point *const point =
+		malloc(sizeof(*point));
+	if (point == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	LIST_INSERT_HEAD(point_list, point, list);
+	point->callback = *callback;
+	point->pc = pc;
+	return point;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_break, 26.07)
+struct rte_bpf_validate_debug_point *
+rte_bpf_validate_debug_break(struct rte_bpf_validate_debug *debug, uint32_t pc,
+	const struct rte_bpf_validate_debug_callback *callback)
+{
+	if (debug == NULL || callback == NULL || callback->fn == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if (!debug->evaluate_started)
+		return point_list_insert(&debug->pending_breakpoints,
+			callback, pc);
+
+	if (pc >= debug->bpf_prm->raw.nb_ins) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return point_list_insert(&debug->breakpoint_lists[pc], callback, pc);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_catch, 26.07)
+struct rte_bpf_validate_debug_point *
+rte_bpf_validate_debug_catch(struct rte_bpf_validate_debug *debug,
+	debug_event_t event, const struct rte_bpf_validate_debug_callback *callback)
+{
+	if (debug == NULL || callback == NULL || callback->fn == NULL ||
+			event < 0 || event >= RTE_BPF_VALIDATE_DEBUG_EVENT_END) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	return point_list_insert(&debug->catchpoint_lists[event], callback, 0);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_point_destroy, 26.07)
+void
+rte_bpf_validate_debug_point_destroy(struct rte_bpf_validate_debug_point *point)
+{
+	if (point == NULL)
+		return;
+
+	LIST_REMOVE(point, list);
+	free(point);
+}
+
+/* Querying execution state. */
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_get_bpf_param, 26.07)
+const struct rte_bpf_prm_ex *
+rte_bpf_validate_debug_get_bpf_param(const struct rte_bpf_validate_debug *debug)
+{
+	if (debug == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if (!debug->evaluate_started) {
+		rte_errno = ECHILD;
+		return NULL;
+	}
+
+	return debug->bpf_prm;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_get_ins, 26.07)
+int
+rte_bpf_validate_debug_get_ins(const struct rte_bpf_validate_debug *debug,
+	const struct ebpf_insn **ins, uint32_t *nb_ins)
+{
+	if (debug == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_started)
+		return -ECHILD;
+
+	if (debug->bpf_prm->origin != RTE_BPF_ORIGIN_RAW)
+		return -ENOTSUP;
+
+	*ins = debug->bpf_prm->raw.ins;
+	*nb_ins = debug->bpf_prm->raw.nb_ins;
+	return 0;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_get_last_point, 26.07)
+struct rte_bpf_validate_debug_point *
+rte_bpf_validate_debug_get_last_point(const struct rte_bpf_validate_debug *debug)
+{
+	if (debug == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	return debug->last_point;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_get_pc, 26.07)
+uint32_t
+rte_bpf_validate_debug_get_pc(const struct rte_bpf_validate_debug *debug)
+{
+	if (debug == NULL || !debug->evaluate_started)
+		return UINT32_MAX;
+
+	return debug->pc;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_get_validation_result, 26.07)
+int
+rte_bpf_validate_debug_get_validation_result(const struct rte_bpf_validate_debug *debug,
+	int *result)
+{
+	if (debug == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_finished)
+		return -EAGAIN;
+
+	*result = debug->evaluate_result;
+
+	return 0;
+}
+
+/* Querying VM state. */
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_can_access, 26.07)
+int
+rte_bpf_validate_debug_can_access(const struct rte_bpf_validate_debug *debug,
+	const struct ebpf_insn *access, uint64_t off64)
+{
+	if (debug == NULL || access == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_started)
+		return -ECHILD;
+
+	return __rte_bpf_validate_can_access(debug->verifier, access, off64);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_may_jump, 26.07)
+int
+rte_bpf_validate_debug_may_jump(const struct rte_bpf_validate_debug *debug,
+	const struct ebpf_insn *jump, uint64_t imm64)
+{
+	if (debug == NULL || jump == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_started)
+		return -ECHILD;
+
+	return __rte_bpf_validate_may_jump(debug->verifier, jump, imm64);
+}
+
+/* Formatting VM state for user. */
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_format_register_info, 26.07)
+int
+rte_bpf_validate_debug_format_register_info(const struct rte_bpf_validate_debug *debug,
+	char *buffer, size_t bufsz, uint8_t reg)
+{
+	if (debug == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_started)
+		return -ECHILD;
+
+	return __rte_bpf_validate_format_register_info(debug->verifier, buffer,
+		bufsz, reg);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_format_frame_info, 26.07)
+int
+rte_bpf_validate_debug_format_frame_info(const struct rte_bpf_validate_debug *debug,
+	char *buffer, size_t bufsz, int32_t offset)
+{
+	if (debug == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_started)
+		return -ECHILD;
+
+	return __rte_bpf_validate_format_frame_info(debug->verifier, buffer,
+		bufsz, offset);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_get_frame_size, 26.07)
+int32_t
+rte_bpf_validate_debug_get_frame_size(const struct rte_bpf_validate_debug *debug)
+{
+	if (debug == NULL)
+		return -EINVAL;
+
+	if (!debug->evaluate_started)
+		return -ECHILD;
+
+	return __rte_bpf_validate_get_frame_size(debug->verifier);
+}
+
+/* Courtesy formatting functions for user-supplied values. */
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_format_value, 26.07)
+int
+rte_bpf_validate_debug_format_value(char *buffer, size_t bufsz, char format,
+	uint64_t value)
+{
+	static const struct {
+		uint64_t value;
+		const char *name;
+	} constants[] = {
+		{ .value = INT64_MIN, .name = "INT64_MIN" },
+		{ .value = INT32_MIN, .name = "INT32_MIN" },
+		{ .value = INT16_MIN, .name = "INT16_MIN" },
+		{ .value = INT8_MIN, .name = "INT8_MIN" },
+		{ .value = INT8_MAX, .name = "INT8_MAX" },
+		{ .value = UINT8_MAX, .name = "UINT8_MAX" },
+		{ .value = INT16_MAX, .name = "INT16_MAX" },
+		{ .value = UINT16_MAX, .name = "UINT16_MAX" },
+		{ .value = INT32_MAX, .name = "INT32_MAX" },
+		{ .value = UINT32_MAX, .name = "UINT32_MAX" },
+		{ .value = INT64_MAX, .name = "INT64_MAX" },
+		/* UINT64_MAX omitted on purpose, it looks better as -1 */
+	};
+
+	switch (format) {
+	case 'd':
+		for (int ci = 0; ci != RTE_DIM(constants); ++ci)
+			if (constants[ci].value == value)
+				return snprintf(buffer, bufsz, "%s", constants[ci].name);
+		/*
+		 * Special case numbers close to int32_t or int64_t range ends,
+		 * since they are hard to recognize in decimal otherwise.
+		 */
+		if (value - INT64_MIN < 1000000)
+			return snprintf(buffer, bufsz, "INT64_MIN+%" PRId64,
+				value - INT64_MIN);
+		if (INT64_MAX - value < 1000000)
+			return snprintf(buffer, bufsz, "INT64_MAX-%" PRId64,
+				INT64_MAX - value);
+		if (value - INT32_MIN < 1000)
+			return snprintf(buffer, bufsz, "INT32_MIN+%" PRId64,
+				value - INT32_MIN);
+		if (INT32_MAX - value < 1000)
+			return snprintf(buffer, bufsz, "INT32_MAX-%" PRId64,
+				INT32_MAX - value);
+		return snprintf(buffer, bufsz, "%" PRId64, value);
+	case 'x':
+		/* Special case only the common case of UINT64_MAX. */
+		if (value == UINT64_MAX)
+			return snprintf(buffer, bufsz, "%s", "UINT64_MAX");
+		return snprintf(buffer, bufsz, "%#" PRIx64, value);
+	default:
+		return -EINVAL;
+	}
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_validate_debug_format_interval, 26.07)
+int
+rte_bpf_validate_debug_format_interval(char *buffer, size_t bufsz, char format,
+	uint64_t min, uint64_t max)
+{
+	char min_buffer[32], max_buffer[32];
+	int rc;
+
+	if (min == max)
+		return rte_bpf_validate_debug_format_value(buffer, bufsz, format, min);
+
+	rc = rte_bpf_validate_debug_format_value(min_buffer, sizeof(min_buffer), format, min);
+	if (rc < 0)
+		return rc;
+
+	rc = rte_bpf_validate_debug_format_value(max_buffer, sizeof(max_buffer), format, max);
+	if (rc < 0)
+		return rc;
+
+	return snprintf(buffer, bufsz, "%s..%s", min_buffer, max_buffer);
+}
+
+/* Evaluation start and finish. */
+
+/* Free all resources associated with current evaluation. */
+static void
+debug_evaluate_close(struct rte_bpf_validate_debug *debug)
+{
+	RTE_ASSERT(debug->evaluate_started);
+	debug_pending_breakpoints_save(debug);
+	free(debug->breakpoint_lists);
+	debug->breakpoint_lists = NULL;
+	debug->evaluate_started = false;
+}
+
+int
+__rte_bpf_validate_debug_evaluate_start(struct rte_bpf_validate_debug *debug,
+	const struct bpf_verifier *verifier, const struct rte_bpf_prm_ex *bpf_prm)
+{
+	if (debug == NULL)
+		return 0;
+
+	if (verifier == NULL || bpf_prm == NULL ||
+			bpf_prm->origin != RTE_BPF_ORIGIN_RAW)
+		return -EINVAL;
+
+	if (debug->evaluate_started) {
+		RTE_BPF_LOG_FUNC_LINE(ERR, "already started");
+		return -EEXIST;
+	}
+
+	if (!debug_pending_breakpoints_are_valid(debug, bpf_prm->raw.nb_ins))
+		return -ENOENT;
+
+	debug->verifier = verifier;
+	debug->bpf_prm = bpf_prm;
+	debug->breakpoint_lists = point_lists_create(bpf_prm->raw.nb_ins);
+	if (debug->breakpoint_lists == NULL)
+		return -ENOMEM;
+	debug_pending_breakpoints_restore(debug);
+	debug->last_point = NULL;
+	debug->pc = 0;
+	debug->evaluate_started = true;
+
+	const int rc = debug_send_event(debug,
+		RTE_BPF_VALIDATE_DEBUG_EVENT_VALIDATION_START);
+	if (rc < 0) {
+		debug_evaluate_close(debug);
+		return rc;
+	}
+
+	RTE_BPF_LOG_FUNC_LINE(DEBUG, "evaluate started");
+	return 0;
+}
+
+int
+__rte_bpf_validate_debug_evaluate_step(struct rte_bpf_validate_debug *debug,
+	uint32_t pc, debug_event_t event)
+{
+	int rc;
+
+	if (debug == NULL)
+		return 0;
+
+	if (!debug->evaluate_started) {
+		RTE_BPF_LOG_FUNC_LINE(ERR, "not started");
+		return -ECHILD;
+	}
+
+	if (pc > debug->bpf_prm->raw.nb_ins || event < 0 ||
+			event >= RTE_BPF_VALIDATE_DEBUG_EVENT_END)
+		return -EINVAL;
+
+	debug->pc = pc;
+
+	rc = __rte_bpf_validate_state_is_valid(debug->verifier);
+	if (rc == 0)
+		rc = debug_send_event(debug,
+			RTE_BPF_VALIDATE_DEBUG_EVENT_INVALID_STATE);
+
+	if (event != RTE_BPF_VALIDATE_DEBUG_EVENT_STEP)
+		rc = rc < 0 ? rc : debug_send_event(debug, event);
+
+	if (event == RTE_BPF_VALIDATE_DEBUG_EVENT_STEP ||
+			event == RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_ENTER)
+		/* Stepping into a real instruction to execute. */
+		rc = rc < 0 ? rc : debug_points_call_back(debug,
+			&debug->breakpoint_lists[pc]);
+
+	rc = rc < 0 ? rc : debug_send_event(debug,
+		RTE_BPF_VALIDATE_DEBUG_EVENT_STEP);
+
+	return rc;
+}
+
+int
+__rte_bpf_validate_debug_evaluate_finish(struct rte_bpf_validate_debug *debug,
+	int result)
+{
+	int rc = 0;
+	uint32_t pc;
+	debug_event_t event;
+
+	if (debug == NULL)
+		return 0;
+
+	if (!debug->evaluate_started) {
+		RTE_BPF_LOG_FUNC_LINE(ERR, "not started");
+		return -ECHILD;
+	}
+
+	debug->evaluate_finished = true;
+	debug->evaluate_result = result;
+
+	if (result != -ECANCELED) {
+		if (result < 0) {
+			/* Last known pc is the place we failed. */
+			pc = debug->pc;
+			event = RTE_BPF_VALIDATE_DEBUG_EVENT_VALIDATION_FAILURE;
+		} else {
+			/* Show program end, not particular instruction. */
+			pc = debug->bpf_prm->raw.nb_ins;
+			event = RTE_BPF_VALIDATE_DEBUG_EVENT_VALIDATION_SUCCESS;
+		}
+
+		rc = __rte_bpf_validate_debug_evaluate_step(debug, pc, event);
+	}
+
+	debug_evaluate_close(debug);
+
+	return rc;
+}
diff --git a/lib/bpf/bpf_validate_debug.h b/lib/bpf/bpf_validate_debug.h
new file mode 100644
index 000000000000..a91f3e9c48b2
--- /dev/null
+++ b/lib/bpf/bpf_validate_debug.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Huawei Technologies Co., Ltd
+ */
+
+#ifndef _BPF_VALIDATE_DEBUG_H_
+#define _BPF_VALIDATE_DEBUG_H_
+
+/**
+ * @file bpf_validate_debug.h
+ *
+ * Internal-use headers for eBPF validation debug notifications.
+ */
+
+#include "rte_bpf_validate_debug.h"
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rte_bpf_prm_ex;
+struct rte_bpf_validate_debug;
+struct bpf_verifier;
+
+/* Type alias for validation event enum. */
+typedef enum rte_bpf_validate_debug_event debug_event_t;
+
+/*
+ * Signal beginning of evaluation process.
+ *
+ * Immediately return 0 if debug is NULL.
+ *
+ * @param debug
+ *   Validate debug instance configured by user, can be NULL.
+ * @param verifier
+ *   Opaque pointer that can be used for calling bpf_validate.h API.
+ * @param bpf_prm
+ *   Parameters struct of the validated eBPF program, including code with all
+ *   patches and relocations applied.
+ * @return
+ *   Non-negative value on success, negative errno on failure.
+ */
+int
+__rte_bpf_validate_debug_evaluate_start(struct rte_bpf_validate_debug *debug,
+	const struct bpf_verifier *verifier, const struct rte_bpf_prm_ex *bpf_prm);
+
+/*
+ * Signal each instruction, branch end, or evaluation end.
+ *
+ * Immediately return 0 if debug is NULL.
+ *
+ * @param debug
+ *   Validate debug instance configured by user, can be NULL.
+ * @param pc
+ *   Current value of the program counter, or next after last instruction.
+ * @param event
+ *   Specific evaluation event if any, or RTE_BPF_VALIDATE_DEBUG_EVENT_STEP.
+ * @return
+ *   Non-negative value: evaluation should continue;
+ *   -ECANCELED: evaluation should fail without calling this API again;
+ *   Other negative value: evaluation should fail signalling failure;
+ */
+int
+__rte_bpf_validate_debug_evaluate_step(struct rte_bpf_validate_debug *debug,
+	uint32_t pc, debug_event_t event);
+
+/*
+ * Signal end of evaluation process.
+ *
+ * Immediately return 0 if debug is NULL.
+ *
+ * @param debug
+ *   Validate debug instance configured by user, can be NULL.
+ * @return
+ *   Non-negative value on success, negative errno on failure.
+ */
+int
+__rte_bpf_validate_debug_evaluate_finish(struct rte_bpf_validate_debug *debug,
+	int result);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_VALIDATE_DEBUG_H_ */
diff --git a/lib/bpf/bpf_value_set.c b/lib/bpf/bpf_value_set.c
new file mode 100644
index 000000000000..86f46de66f2f
--- /dev/null
+++ b/lib/bpf/bpf_value_set.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2026 Huawei Technologies Co., Ltd
+ */
+
+#include "bpf_value_set.h"
+
+#include <rte_debug.h>
+
+/* Helper interval operations and checks.  */
+
+/* One of many possible full intervals. */
+static const struct value_set_interval canonical_full_interval = {
+	.first = 0,
+	.last = UINT64_MAX,
+};
+
+/* Translate ("shift") interval by `offset`. */
+static void
+interval_translate(struct value_set_interval *interval, uint64_t offset)
+{
+	interval->first += offset;
+	interval->last += offset;
+}
+
+/* Return true if the interval includes all possible values. */
+static bool
+interval_is_full(struct value_set_interval interval)
+{
+	return interval.last + 1 == interval.first;
+}
+
+/* Return true if the interval includes `value`. */
+static bool
+interval_contains(struct value_set_interval interval, uint64_t value)
+{
+	return value - interval.first <= interval.last - interval.first;
+}
+
+/* Return true if the interval `lhs` includes all values from `rhs`. */
+static bool
+interval_covers(struct value_set_interval lhs, struct value_set_interval rhs)
+{
+	const uint64_t offset = -lhs.first;
+	interval_translate(&lhs, offset);
+	interval_translate(&rhs, offset);
+	RTE_ASSERT(lhs.first == 0);
+
+	return lhs.last == UINT64_MAX ||
+		(lhs.last >= rhs.last && rhs.last >= rhs.first);
+}
+
+/* Return true if the interval includes step from UINT64_MAX to 0. */
+static bool
+interval_crosses_zero(struct value_set_interval interval)
+{
+	return interval.last < interval.first;
+}
+
+/* Return number of elements in a non-full elements, 0 for full interval. */
+static uint64_t
+interval_size(struct value_set_interval interval)
+{
+	return interval.last - interval.first + 1;
+}
+
+/* Return true if two intervals represent same sets of values. */
+static bool
+intervals_equal(struct value_set_interval lhs, struct value_set_interval rhs)
+{
+	return (interval_is_full(lhs) && interval_is_full(rhs)) ||
+		(lhs.first == rhs.first && lhs.last == rhs.last);
+}
+
+/* Return true if two intervals have common elements. */
+static bool
+intervals_intersect(struct value_set_interval lhs, struct value_set_interval rhs)
+{
+	return interval_contains(lhs, rhs.first) || interval_contains(rhs, lhs.first);
+}
+
+/* Return true if `rhs.first` follows `lhs.last` with some gap. Does not check other ends! */
+static bool
+intervals_follow_with_gap(struct value_set_interval lhs, struct value_set_interval rhs)
+{
+	return lhs.last != UINT64_MAX && rhs.first > lhs.last + 1;
+}
+
+/* Return true if `(l - o) < (r - o)` for all `(o in origin, l in lhs, r in rhs)`. */
+static bool
+intervals_based_less(struct value_set_interval origin, struct value_set_interval lhs,
+	struct value_set_interval rhs)
+{
+	/* Translate all intervals for the origin to start at 0. */
+	const uint64_t offset = -origin.first;
+	interval_translate(&origin, offset);
+	interval_translate(&lhs, offset);
+	interval_translate(&rhs, offset);
+	RTE_ASSERT(origin.first == 0);
+
+	return origin.last <= lhs.first &&
+		lhs.first <= lhs.last &&
+		lhs.last < rhs.first &&
+		rhs.first <= rhs.last;
+}
+
+/* Return true if `(l - o) <= (r - o)` for all `(o in origin, l in lhs, r in rhs)`. */
+static bool
+intervals_based_less_or_equal(struct value_set_interval origin, struct value_set_interval lhs,
+	struct value_set_interval rhs)
+{
+	/* Translate all intervals for the origin to start at 0. */
+	const uint64_t offset = -origin.first;
+	interval_translate(&origin, offset);
+	interval_translate(&lhs, offset);
+	interval_translate(&rhs, offset);
+	RTE_ASSERT(origin.first == 0);
+
+	/* Special cases. */
+	if (origin.last == 0 && lhs.first == 0 && lhs.last == 0)
+		return true;
+	if (origin.last == 0 && rhs.first == UINT64_MAX && rhs.last == UINT64_MAX)
+		return true;
+	if (lhs.first == lhs.last && lhs.last == rhs.first && rhs.first == rhs.last)
+		return true;
+
+	return origin.last <= lhs.first &&
+		lhs.first <= lhs.last &&
+		lhs.last <= rhs.first &&
+		rhs.first <= rhs.last;
+}
+
+/* Append interval rhs to list of intervals in lhs. */
+static void
+value_set_append(struct value_set *lhs, struct value_set_interval rhs)
+{
+	RTE_VERIFY(lhs->nb_interval < VALUE_SET_NB_INTERVAL_MAX);
+	RTE_VERIFY(lhs->nb_interval == 0 ||
+		intervals_follow_with_gap(lhs->interval[lhs->nb_interval - 1], rhs));
+	lhs->interval[lhs->nb_interval++] = rhs;
+}
+
+/*
+ * Helper operations on noncyclic value set and intervals.
+ * Noncyclic means no interval crosses zero,
+ * but in return last value set interval may touch first.
+ */
+
+static struct value_set
+noncyclic_value_set_union_interval(const struct value_set *lhs, const struct value_set_interval rhs)
+{
+	struct value_set result = {};
+	uint32_t index = 0;
+
+	RTE_ASSERT(lhs->nb_interval == 0 ||
+		!interval_crosses_zero(lhs->interval[lhs->nb_interval - 1]));
+	RTE_ASSERT(!interval_crosses_zero(rhs));
+
+	/* Append to result all lhs intervals preceding rhs. */
+	for (; index != lhs->nb_interval; ++index) {
+		const struct value_set_interval lhs_interval = lhs->interval[index];
+		if (!intervals_follow_with_gap(lhs_interval, rhs))
+			break;
+
+		value_set_append(&result, lhs_interval);
+	}
+
+	/* Appendinterval joined from rhs and all lhs intervals intersecting or touching it. */
+	struct value_set_interval joint_interval = rhs;
+	for (; index != lhs->nb_interval; ++index) {
+		const struct value_set_interval lhs_interval = lhs->interval[index];
+		if (intervals_follow_with_gap(rhs, lhs_interval))
+			break;
+
+		joint_interval.first = RTE_MIN(joint_interval.first, lhs_interval.first);
+		joint_interval.last = RTE_MAX(joint_interval.last, lhs_interval.last);
+	}
+	value_set_append(&result, joint_interval);
+
+	/* Append to result all lhs intervals following rhs. */
+	for (; index != lhs->nb_interval; ++index)
+		value_set_append(&result, lhs->interval[index]);
+
+	return result;
+}
+
+/* Make "normal" maximal disjoint interval value set out of noncyclic one. */
+static struct value_set
+value_set_from_noncyclic(const struct value_set *set)
+{
+	struct value_set result = {};
+	uint32_t index = 0;
+
+	if (set->nb_interval <= 1)
+		return *set;
+
+	struct value_set_interval last_interval = set->interval[set->nb_interval - 1];
+	if (last_interval.last == UINT64_MAX && set->interval[0].first == 0) {
+		/* Join first interval with the last one instead of copying it. */
+		last_interval.last = set->interval[0].last;
+		++index;
+	}
+
+	for (; index != set->nb_interval - 1; ++index)
+		value_set_append(&result, set->interval[index]);
+
+	value_set_append(&result, last_interval);
+
+	return result;
+}
+
+/* Make lhs a union of lhs and rhs. */
+static void
+value_set_union_interval(struct value_set *lhs, const struct value_set_interval rhs)
+{
+	struct value_set temp;
+
+	if (value_set_is_empty(lhs)) {
+		value_set_append(lhs, rhs);
+		return;
+	}
+
+	struct value_set_interval *const last_interval = &lhs->interval[lhs->nb_interval - 1];
+	const bool last_interval_crossed_zero = interval_crosses_zero(*last_interval);
+	const uint64_t wrapping_last = last_interval->last;
+
+	if (last_interval_crossed_zero)
+		/* Make value set noncyclic by removing crossing part of last interval. */
+		last_interval->last = UINT64_MAX;
+
+	if (interval_crosses_zero(rhs)) {
+		/* Add parts before and after zero separately. */
+		temp = noncyclic_value_set_union_interval(lhs,
+			(struct value_set_interval){
+				.first = rhs.first,
+				.last = UINT64_MAX,
+			});
+		temp = noncyclic_value_set_union_interval(lhs,
+			(struct value_set_interval){
+				.first = 0,
+				.last = rhs.last,
+			});
+	} else
+		temp = noncyclic_value_set_union_interval(lhs, rhs);
+
+	if (last_interval_crossed_zero)
+		/* Restore previously removed part. */
+		temp = noncyclic_value_set_union_interval(&temp,
+			(struct value_set_interval){
+				.first = 0,
+				.last = wrapping_last,
+			});
+
+	*lhs = value_set_from_noncyclic(&temp);
+}
+
+/* Set `lhs` to the set of possible sums between values from `lhs` and `rhs`. */
+static void
+value_set_add_interval(struct value_set *lhs, struct value_set_interval rhs)
+{
+	const struct value_set temp = *lhs;
+	lhs->nb_interval = 0;
+
+	for (uint32_t index = 0; index != temp.nb_interval; ++index) {
+		const struct value_set_interval interval = temp.interval[index];
+		if (interval_is_full(rhs) || interval_is_full(interval) ||
+				interval_size(interval) > UINT64_MAX - interval_size(rhs)) {
+			value_set_append(lhs, canonical_full_interval);
+			return;
+		}
+	}
+
+	for (uint32_t index = 0; index != temp.nb_interval; ++index)
+		value_set_union_interval(lhs, (struct value_set_interval){
+			/* Checked sizes above, so these interval expansions won't overflow. */
+			.first = temp.interval[index].first + rhs.first,
+			.last = temp.interval[index].last + rhs.last,
+		});
+}
+
+struct value_set
+value_set_singleton(uint64_t value)
+{
+	return value_set_contiguous(value, value);
+}
+
+struct value_set
+value_set_contiguous(uint64_t first, uint64_t last)
+{
+	return (struct value_set){
+		.nb_interval = 1,
+		.interval = {
+			{ .first = first, .last = last },
+		},
+	};
+}
+
+struct value_set
+value_set_from_pair(uint64_t first1, uint64_t last1, uint64_t first2, uint64_t last2)
+{
+	struct value_set result = {};
+
+	if (first1 - first2 <= last2 - first2)
+		/* Interval 1 starts within interval 2. */
+		value_set_union_interval(&result, (struct value_set_interval){
+				.first = first1,
+				.last = first1 + RTE_MIN(last1 - first1, last2 - first1),
+			});
+
+	if (first2 - first1 <= last1 - first1)
+		/* Interval 2 starts within interval 1. */
+		value_set_union_interval(&result, (struct value_set_interval){
+				.first = first2,
+				.last = first2 + RTE_MIN(last2 - first2, last1 - first2),
+			});
+
+	return result;
+}
+
+bool
+value_set_is_empty(const struct value_set *set)
+{
+	return set->nb_interval == 0;
+}
+
+bool
+value_set_is_singleton(const struct value_set *set)
+{
+	return set->nb_interval == 1 && interval_size(set->interval[0]) == 1;
+}
+
+bool
+value_sets_equal(const struct value_set *lhs, const struct value_set *rhs)
+{
+	if (lhs->nb_interval != rhs->nb_interval)
+		return false;
+
+	for (uint32_t index = 0; index != lhs->nb_interval; ++index)
+		if (!intervals_equal(lhs->interval[index], rhs->interval[index]))
+			return false;
+
+	return true;
+}
+
+bool
+value_sets_intersect(const struct value_set *lhs, const struct value_set *rhs)
+{
+	for (uint32_t lhs_index = 0; lhs_index != lhs->nb_interval; ++lhs_index)
+		for (uint32_t rhs_index = 0; rhs_index != rhs->nb_interval; ++rhs_index)
+			if (intervals_intersect(lhs->interval[lhs_index], rhs->interval[rhs_index]))
+				return true;
+
+	return false;
+}
+
+bool
+value_set_is_covered_by_contiguous(const struct value_set *lhs, uint64_t first, uint64_t last)
+{
+	const struct value_set_interval rhs = { .first = first, .last = last };
+	for (uint32_t lhs_index = 0; lhs_index != lhs->nb_interval; ++lhs_index)
+		if (!interval_covers(rhs, lhs->interval[lhs_index]))
+			return false;
+
+	return true;
+}
+
+bool
+value_sets_based_less(const struct value_set *origin, const struct value_set *lhs,
+	const struct value_set *rhs)
+{
+	for (uint32_t origin_index = 0; origin_index != origin->nb_interval; ++origin_index)
+		for (uint32_t lhs_index = 0; lhs_index != lhs->nb_interval; ++lhs_index)
+			for (uint32_t rhs_index = 0; rhs_index != rhs->nb_interval; ++rhs_index)
+				if (!intervals_based_less(origin->interval[origin_index],
+						lhs->interval[lhs_index], rhs->interval[rhs_index]))
+					return false;
+	return true;
+}
+
+bool
+value_sets_based_less_or_equal(const struct value_set *origin, const struct value_set *lhs,
+	const struct value_set *rhs)
+{
+	for (uint32_t origin_index = 0; origin_index != origin->nb_interval; ++origin_index)
+		for (uint32_t lhs_index = 0; lhs_index != lhs->nb_interval; ++lhs_index)
+			for (uint32_t rhs_index = 0; rhs_index != rhs->nb_interval; ++rhs_index)
+				if (!intervals_based_less_or_equal(origin->interval[origin_index],
+						lhs->interval[lhs_index], rhs->interval[rhs_index]))
+					return false;
+	return true;
+}
+
+void
+value_set_translate(struct value_set *set, uint64_t offset)
+{
+	for (uint32_t index = 0; index != set->nb_interval; ++index)
+		interval_translate(&set->interval[index], offset);
+}
+
+void
+value_set_add_contiguous(struct value_set *lhs, uint64_t first, uint64_t last)
+{
+	value_set_add_interval(lhs, (struct value_set_interval){ .first = first, .last = last });
+}
diff --git a/lib/bpf/bpf_value_set.h b/lib/bpf/bpf_value_set.h
new file mode 100644
index 000000000000..5e7f8e521f55
--- /dev/null
+++ b/lib/bpf/bpf_value_set.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2026 Huawei Technologies Co., Ltd
+ */
+
+#ifndef _BPF_VALUE_SET_H_
+#define _BPF_VALUE_SET_H_
+
+/**
+ * @file value_set.h
+ *
+ * Value set operations for BPF validate debug.
+ *
+ * This is not a general use library, only minimal set of operations is provided
+ * that are necessary for implementing validate debug interface.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VALUE_SET_NB_INTERVAL_MAX 3
+
+/*
+ * Cyclic interval on uint64_t.
+ *
+ * Cyclic means value of `last` might be numerically smaller than `first`,
+ * that is the interval may cross from UINT64_MAX to 0.
+ *
+ * Contains element `first` and all elements that can be obtained from it by
+ * adding 1 until the result reaches `last`, which is included.
+ * There is thus multiple representations of the full set and no representation
+ * of the empty set.
+ *
+ * When `first` and `last` are accepted separately as function arguments, the
+ * term _contiguous_ is being used. It means that values of `first` and `last`
+ * are used to create a contiguous set composed of a single cyclic interval
+ * defined by these points.
+ */
+struct value_set_interval {
+	uint64_t first;
+	uint64_t last;
+};
+
+/*
+ * Set of values represented as an ordered sequence of maximal disjoint cyclic intervals.
+ *
+ * Condition `maximal disjoint` means intervals do not intersect or touch each other.
+ *
+ * The sequence is ordered by member `first`. Only last interval may thus cross zero.
+ */
+struct value_set {
+	uint32_t nb_interval;
+	struct value_set_interval interval[VALUE_SET_NB_INTERVAL_MAX];
+};
+
+/* Empty value set. */
+static const struct value_set value_set_empty = {
+	.nb_interval = 0,
+};
+
+/* Full (including every possible value) value set. */
+static const struct value_set value_set_full = {
+	.nb_interval = 1,
+	.interval = {
+		{ .first = 0, .last = UINT64_MAX },
+	},
+};
+
+/* Return set containing only `value`. */
+struct value_set
+value_set_singleton(uint64_t value);
+
+/* Return set of all values between and including `first` and `last` (AKA first..last). */
+struct value_set
+value_set_contiguous(uint64_t first, uint64_t last);
+
+/* Return set of all values belonging to _both_ first1..last1 and first2..last. */
+struct value_set
+value_set_from_pair(uint64_t first1, uint64_t last1, uint64_t first2, uint64_t last2);
+
+/* Return true if the set is empty. */
+bool
+value_set_is_empty(const struct value_set *set);
+
+/* Return true if the set only contains one element. */
+bool
+value_set_is_singleton(const struct value_set *set);
+
+/* Return true if lhs and rhs represent the same set. */
+bool
+value_sets_equal(const struct value_set *lhs, const struct value_set *rhs);
+
+/* Return true if sets intersect (contain common elements). */
+bool
+value_sets_intersect(const struct value_set *lhs, const struct value_set *rhs);
+
+/* Return true if all elements in lhs belong to interval first..last */
+bool
+value_set_is_covered_by_contiguous(const struct value_set *lhs, uint64_t first, uint64_t last);
+
+/* Return true if `(l - o) < (r - o)` for all `(o in origin, l in lhs, r in rhs)`. */
+bool
+value_sets_based_less(const struct value_set *origin, const struct value_set *lhs,
+	const struct value_set *rhs);
+
+/* Return true if `(l - o) <= (r - o)` for all `(o in origin, l in lhs, r in rhs)`. */
+bool
+value_sets_based_less_or_equal(const struct value_set *origin, const struct value_set *lhs,
+	const struct value_set *rhs);
+
+/* Translate ("shift") all set elements by `offset`. */
+void
+value_set_translate(struct value_set *lhs, uint64_t rhs);
+
+/* Set `lhs` to the set of possible sums between values from `lhs` and `rhs`. */
+void
+value_set_add_contiguous(struct value_set *lhs, uint64_t first, uint64_t last);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_VALUE_SET_H */
diff --git a/lib/bpf/meson.build b/lib/bpf/meson.build
index 7e8a300e3f87..b74a5c232107 100644
--- a/lib/bpf/meson.build
+++ b/lib/bpf/meson.build
@@ -24,6 +24,8 @@ sources = files(
         'bpf_load_elf.c',
         'bpf_pkt.c',
         'bpf_validate.c',
+        'bpf_validate_debug.c',
+        'bpf_value_set.c',
 )
 
 if arch_subdir == 'x86' and dpdk_conf.get('RTE_ARCH_64')
@@ -32,9 +34,12 @@ elif dpdk_conf.has('RTE_ARCH_ARM64')
     sources += files('bpf_jit_arm64.c')
 endif
 
-headers = files('bpf_def.h',
+headers = files(
+        'bpf_def.h',
         'rte_bpf.h',
-        'rte_bpf_ethdev.h')
+        'rte_bpf_ethdev.h',
+        'rte_bpf_validate_debug.h',
+)
 
 deps += ['mbuf', 'net', 'ethdev']
 
diff --git a/lib/bpf/rte_bpf.h b/lib/bpf/rte_bpf.h
index b6c232704a56..052849945c45 100644
--- a/lib/bpf/rte_bpf.h
+++ b/lib/bpf/rte_bpf.h
@@ -118,6 +118,7 @@ enum rte_bpf_origin {
 };
 
 struct bpf_insn;
+struct rte_bpf_validate_debug;
 
 /**
  * Input parameters for loading eBPF code, extensible version.
@@ -158,6 +159,9 @@ struct rte_bpf_prm_ex {
 
 	struct rte_bpf_arg prog_arg[EBPF_FUNC_MAX_ARGS];  /**< program arguments */
 	uint32_t nb_prog_arg;  /**< program argument count */
+
+	/* Validate debug instance. */
+	struct rte_bpf_validate_debug *debug;
 };
 
 /**
diff --git a/lib/bpf/rte_bpf_validate_debug.h b/lib/bpf/rte_bpf_validate_debug.h
new file mode 100644
index 000000000000..89bf587f0211
--- /dev/null
+++ b/lib/bpf/rte_bpf_validate_debug.h
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Huawei Technologies Co., Ltd
+ */
+
+#ifndef _RTE_BPF_VALIDATE_DEBUG_H_
+#define _RTE_BPF_VALIDATE_DEBUG_H_
+
+/**
+ * @file rte_bpf_validate_debug.h
+ *
+ * Debugging interface for BPF validation.
+ *
+ * Can be used for debugging BPF validation problems as well as in tests.
+ */
+
+#include <bpf_def.h>
+#include <rte_compat.h>
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_BPF_VALIDATE_DEBUG_MAY_BE_FALSE	RTE_BIT32(0)
+#define RTE_BPF_VALIDATE_DEBUG_MAY_BE_TRUE	RTE_BIT32(1)
+
+/**
+ * Supported validate events.
+ *
+ * Valid events begin from 0 and end before `RTE_BPF_VALIDATE_DEBUG_EVENT_END`.
+ */
+enum rte_bpf_validate_debug_event {
+	/* Just before every instruction, at branch or validation end. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_STEP,
+	/* Validator has failed its internal self-checks. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_INVALID_STATE,
+	/* Start of validation. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_VALIDATION_START,
+	/* Successful finish of validation. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_VALIDATION_SUCCESS,
+	/* Finish of validation with error. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_VALIDATION_FAILURE,
+	/* Beginning of a branch just after the jump. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_ENTER,
+	/* Pruning branch as verified earlier. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_PRUNE,
+	/* End of branch verification, after the last verified instruction. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_BRANCH_RETURN,
+	/* Number of valid event values. */
+	RTE_BPF_VALIDATE_DEBUG_EVENT_END,
+};
+
+struct rte_bpf_validate_debug;
+struct rte_bpf_validate_debug_point;
+
+/** User callback description. */
+struct rte_bpf_validate_debug_callback {
+	int (*fn)(struct rte_bpf_validate_debug *debug, void *ctx);
+	void *ctx;
+};
+
+/** Invoked by rte_bpf_validate_debug_for_each_point for each breakpoint and catchpoint. */
+typedef int (*rte_bpf_validate_debug_point_process_t)(struct rte_bpf_validate_debug_point *point,
+	void *ctx);
+
+/**
+ * Create new debug instance.
+ *
+ * @return
+ *   Debug instance in case of success.
+ *   NULL with rte_errno set in case of a failure.
+ */
+__rte_experimental
+struct rte_bpf_validate_debug *
+rte_bpf_validate_debug_create(void);
+
+/**
+ * Destroy debug instance.
+ *
+ * Behavior is undefined if validation with this debug instance is ongoing.
+ *
+ * @param debug
+ *   Debug instance, or NULL.
+ */
+__rte_experimental
+void
+rte_bpf_validate_debug_destroy(struct rte_bpf_validate_debug *debug);
+
+/**
+ * Create new breakpoint at specified location.
+ *
+ * Can be called before the validation has started. If at validation start later
+ * the program will not have the specified instruction, the start will fail.
+ *
+ * It is allowed to create breakpoints for the same location a callback is
+ * currently executing for, but it will not be invoked in the same cycle.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param pc
+ *   Program counter to create breakpoint at.
+ * @param callback
+ *   Callback to invoke.
+ * @return
+ *   New breakpoint on success, NULL with rte_errno set on failure.
+ */
+__rte_experimental
+struct rte_bpf_validate_debug_point *
+rte_bpf_validate_debug_break(struct rte_bpf_validate_debug *debug, uint32_t pc,
+	const struct rte_bpf_validate_debug_callback *callback);
+
+/**
+ * Create new catchpoint for specified event.
+ *
+ * Can be called before the validation has started.
+ *
+ * It is allowed to create catchpoints for the same event a callback is
+ * currently executing for, but it will not be invoked in the same cycle.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param event
+ *   Validation event to create catchpoint for.
+ * @param callback
+ *   Callback to invoke.
+ * @return
+ *   New breakpoint on success, NULL with rte_errno set on failure.
+ */
+__rte_experimental
+struct rte_bpf_validate_debug_point *
+rte_bpf_validate_debug_catch(struct rte_bpf_validate_debug *debug,
+	enum rte_bpf_validate_debug_event event,
+	const struct rte_bpf_validate_debug_callback *callback);
+
+/**
+ * Delete breakpoint or catchpoint and free all associated resources.
+ *
+ * If a callback is currently being executed, calling this API is allowed for:
+ * - breakpoint or catchpoint the callback is executed for;
+ * - breakpoints or catchpoints for other locations or events;
+ * and NOT allowed for:
+ * - other breakpoints or catchpoints for the same location or event.
+ *
+ * @param point
+ *   Breakpoint or catchpoint to destroy, or NULL.
+ */
+__rte_experimental
+void
+rte_bpf_validate_debug_point_destroy(struct rte_bpf_validate_debug_point *point);
+
+/**
+ * Get effective eBPF parameters struct.
+ *
+ * @param debug
+ *   Debug instance.
+ * @return
+ *   Parameters struct of the validated eBPF program, including code with all
+ *   patches and relocations applied.
+ */
+__rte_experimental
+const struct rte_bpf_prm_ex *
+rte_bpf_validate_debug_get_bpf_param(const struct rte_bpf_validate_debug *debug);
+
+/**
+ * Get pointer to effective eBPF program instructions.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param ins
+ *   Upon return, program instructions with all patches and relocations applied.
+ * @param nb_ins
+ *   Upon return, number of program instructions.
+ * @return
+ *   Non-negative value on success, negative errno on failure.
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_get_ins(const struct rte_bpf_validate_debug *debug,
+	const struct ebpf_insn **ins, uint32_t *nb_ins);
+
+/**
+ * Get last triggered breakpoint or catchpoint.
+ *
+ * Can be used to destroy currently processed breakpoint or catchpoint.
+ *
+ * The pointer may be invalid if the breakpoint or catchpoint has already been
+ * destroyed earlier.
+ *
+ * @param debug
+ *   Debug instance.
+ * @return
+ *   Last triggered breakpoint or callpoint, including one the callback is
+ *   currently executing for.
+ *   NULL of none were triggered in the current validation process.
+ */
+__rte_experimental
+struct rte_bpf_validate_debug_point *
+rte_bpf_validate_debug_get_last_point(const struct rte_bpf_validate_debug *debug);
+
+/**
+ * Get current instruction index, or one after last if finishing.
+ *
+ * @param debug
+ *   Debug instance.
+ * @return
+ *   Current program counter being validated, or one after last.
+ *   UINT32_MAX if no program is being validated.
+ */
+__rte_experimental
+uint32_t
+rte_bpf_validate_debug_get_pc(const struct rte_bpf_validate_debug *debug);
+
+/**
+ * Get the validation result, if it has finished.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param result
+ *   Upon successful return, the validation result (negative if validation failed).
+ * @return
+ *   Non-negative value if validation has finished and result variable was written;
+ *   -EAGAIN if validation is still ongoing;
+ *   other negative errno in case of failure;
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_get_validation_result(const struct rte_bpf_validate_debug *debug,
+	int *result);
+
+/**
+ * Check if specified memory access instruction is currently valid.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param access
+ *   Memory load or store eBPF instruction.
+ * @param off64
+ *   Additional 64-bit offset added to ins->off.
+ * @return
+ *   1 if specified memory access is currently valid;
+ *   0 if specified memory access is currently invalid;
+ *   negative errno in case of failure;
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_can_access(const struct rte_bpf_validate_debug *debug,
+	const struct ebpf_insn *access, uint64_t off64);
+
+/**
+ * Get possible truth values of the specified jump condition.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param jump
+ *   Conditional jump instruction specifying the condition.
+ * @param imm64
+ *   Additional 64-bit immediate added to the source.
+ * @return
+ *   in case of success, bitwise combination of:
+ *     RTE_BPF_VALIDATE_DEBUG_MAY_BE_FALSE if the jump condition may be false;
+ *     RTE_BPF_VALIDATE_DEBUG_MAY_BE_TRUE if the jump condition may be true;
+ *   negative errno in case of failure.
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_may_jump(const struct rte_bpf_validate_debug *debug,
+	const struct ebpf_insn *jump, uint64_t imm64);
+
+/**
+ * Format information about specified register for the user.
+ *
+ * Parameters buffer, bufsz and return value work the same way as for snprintf.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param buffer
+ *   Buffer to fill with register information.
+ * @param bufsz
+ *   Buffer size (including space for terminating zero).
+ * @param reg
+ *   Register to provide information about.
+ * @return
+ *   Number of characters needed _excluding_ terminating zero.
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_format_register_info(const struct rte_bpf_validate_debug *debug,
+	char *buffer, size_t bufsz, uint8_t reg);
+
+/**
+ * Format information about specified stack frame location for the user.
+ *
+ * Parameters buffer, bufsz and return value work the same way as for snprintf.
+ *
+ * @param debug
+ *   Debug instance.
+ * @param buffer
+ *   Buffer to fill with register information.
+ * @param bufsz
+ *   Buffer size (including space for terminating zero).
+ * @param offset
+ *   Stack frame offset to provide information about, in bytes.
+ *   Typically a negative multiple of 8.
+ * @return
+ *   Number of characters needed _excluding_ terminating zero.
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_format_frame_info(const struct rte_bpf_validate_debug *debug,
+	char *buffer, size_t bufsz, int32_t offset);
+
+/**
+ * Get program stack frame size.
+ *
+ * @param debug
+ *   Debug instance.
+ * @return
+ *   Program stack frame size in bytes.
+ */
+__rte_experimental
+int32_t
+rte_bpf_validate_debug_get_frame_size(const struct rte_bpf_validate_debug *debug);
+
+/**
+ * Format value following the style of register format function.
+ *
+ * Parameters buffer, bufsz and return value work the same way as for snprintf.
+ *
+ * @param buffer
+ *   Buffer to fill with register information.
+ * @param bufsz
+ *   Buffer size (including space for terminating zero).
+ * @param format
+ *   One of characters 'd' or 'x' for signed or hexadecimal format.
+ * @param value
+ *   Formatted value, can be signed typecast to unsigned.
+ * @return
+ *   Number of characters needed _excluding_ terminating zero.
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_format_value(char *buffer, size_t bufsz, char format,
+	uint64_t value);
+
+/**
+ * Format interval following the style of register format function.
+ *
+ * Parameters buffer, bufsz and return value work the same way as for snprintf.
+ *
+ * @param buffer
+ *   Buffer to fill with register information.
+ * @param bufsz
+ *   Buffer size (including space for terminating zero).
+ * @param format
+ *   One of characters 'd' or 'x' for signed or hexadecimal format.
+ * @param min
+ *   Minimum value of the interval, can be signed typecast to unsigned.
+ * @param max
+ *   Maximum value of the interval, can be signed typecast to unsigned.
+ * @return
+ *   Number of characters needed _excluding_ terminating zero.
+ */
+__rte_experimental
+int
+rte_bpf_validate_debug_format_interval(char *buffer, size_t bufsz, char format,
+	uint64_t min, uint64_t max);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_VALIDATE_DEBUG_H_ */
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 04/24] bpf/validate: expand comments in evaluate cycle
From: Marat Khalili @ 2026-06-23 14:31 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev
In-Reply-To: <20260623143215.95318-1-marat.khalili@huawei.com>

Logic of execution tree traversal is not 100% obvious, and had some bugs
in the past. Add and expand comments to clarify what `next` and `node`
variables are supposed to point to at various points of the cycle.

Signed-off-by: Marat Khalili <marat.khalili@huawei.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
---
 lib/bpf/bpf_validate.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c
index 1619faf3604a..362d00c77095 100644
--- a/lib/bpf/bpf_validate.c
+++ b/lib/bpf/bpf_validate.c
@@ -2449,6 +2449,7 @@ evaluate(struct bpf_verifier *bvf)
 		 * each node only once.
 		 */
 		if (next != NULL) {
+			/* just started or stepped down the tree, node == next */
 
 			bvf->evin = node;
 			idx = get_node_idx(bvf, node);
@@ -2481,8 +2482,10 @@ evaluate(struct bpf_verifier *bvf)
 		next = get_next_node(bvf, node);
 
 		if (next != NULL) {
-
-			/* proceed with next child */
+			/*
+			 * proceed with next child
+			 * next points to an unwalked subtree of node
+			 */
 			if (node->cur_edge == node->nb_edge &&
 					node->evst.cur != NULL) {
 				restore_cur_eval_state(bvf, node);
@@ -2514,6 +2517,11 @@ evaluate(struct bpf_verifier *bvf)
 
 			/* first node will not have prev, signalling finish */
 		}
+
+		/*
+		 * next != NULL: stepped down the tree, node == next;
+		 * next == NULL: stepped up after processing or pruning subtree;
+		 */
 	}
 
 	RTE_LOG(DEBUG, BPF, "%s(%p) returns %d, stats:\n"
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 03/24] bpf/validate: break on error in evaluate
From: Marat Khalili @ 2026-06-23 14:31 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev
In-Reply-To: <20260623143215.95318-1-marat.khalili@huawei.com>

Evaluation loop previously continued until the cycle end in case of an
evaluation error. It made reasoning about the code difficult since it
might be executing when the evaluation is already in an invalid state.

Change loop logic to break out of the loop immediately after an error.

Signed-off-by: Marat Khalili <marat.khalili@huawei.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
---
 lib/bpf/bpf_validate.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c
index bf8a4abb5a5a..1619faf3604a 100644
--- a/lib/bpf/bpf_validate.c
+++ b/lib/bpf/bpf_validate.c
@@ -2401,11 +2401,11 @@ prune_eval_state(struct bpf_verifier *bvf, const struct inst_node *node,
 static int
 evaluate(struct bpf_verifier *bvf)
 {
-	int32_t rc;
 	uint32_t idx, op;
 	const char *err;
 	const struct ebpf_insn *ins;
 	struct inst_node *next, *node;
+	int rc = 0;
 
 	struct {
 		uint32_t nb_eval;
@@ -2439,11 +2439,10 @@ evaluate(struct bpf_verifier *bvf)
 	ins = bvf->prm->raw.ins;
 	node = bvf->in;
 	next = node;
-	rc = 0;
 
 	memset(&stats, 0, sizeof(stats));
 
-	while (node != NULL && rc == 0) {
+	while (node != NULL) {
 
 		/*
 		 * current node evaluation, make sure we evaluate
@@ -2457,17 +2456,20 @@ evaluate(struct bpf_verifier *bvf)
 
 			/* for jcc node make a copy of evaluation state */
 			if (node->nb_edge > 1) {
-				rc |= save_cur_eval_state(bvf, node);
+				rc = save_cur_eval_state(bvf, node);
+				if (rc < 0)
+					break;
 				stats.nb_save++;
 			}
 
-			if (ins_chk[op].eval != NULL && rc == 0) {
+			if (ins_chk[op].eval != NULL) {
 				err = ins_chk[op].eval(bvf, ins + idx);
 				stats.nb_eval++;
 				if (err != NULL) {
 					RTE_BPF_LOG_FUNC_LINE(ERR,
 						"%s at pc: %u", err, idx);
 					rc = -EINVAL;
+					break;
 				}
 			}
 
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 01/24] bpf: format and dump jlt, jle, jslt, and jsle
From: Marat Khalili @ 2026-06-23 14:31 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev
In-Reply-To: <20260623143215.95318-1-marat.khalili@huawei.com>

Signed and unsigned less and less-then conditional jumps were not
supported by the eBPF format and dump functions, add these instructions.

Signed-off-by: Marat Khalili <marat.khalili@huawei.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
---
 lib/bpf/bpf_dump.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/bpf/bpf_dump.c b/lib/bpf/bpf_dump.c
index 91bc7c0a7af1..0abaeef8ae98 100644
--- a/lib/bpf/bpf_dump.c
+++ b/lib/bpf/bpf_dump.c
@@ -42,6 +42,8 @@ static const char *const jump_tbl[16] = {
 	[BPF_JSET >> 4] = "jset",  [EBPF_JNE >> 4] = "jne",
 	[EBPF_JSGT >> 4] = "jsgt", [EBPF_JSGE >> 4] = "jsge",
 	[EBPF_CALL >> 4] = "call", [EBPF_EXIT >> 4] = "exit",
+	[EBPF_JLT >> 4] = "jlt",   [EBPF_JLE >> 4] = "jle",
+	[EBPF_JSLT >> 4] = "jslt", [EBPF_JSLE >> 4] = "jsle",
 };
 
 static inline const char *
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 02/24] bpf: add format instruction function
From: Marat Khalili @ 2026-06-23 14:31 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev
In-Reply-To: <20260623143215.95318-1-marat.khalili@huawei.com>

BPF library already contains BPF instruction formatting functions, but
they could only be used via `rte_bpf_dump` to dump result into file. Add
new function `rte_bpf_format` to format instruction in various way
(hexadecimal, disassembly) into a user-provided buffer, as well as a
service function `rte_bpf_insn_is_wide` to detect wide instructions.

Signed-off-by: Marat Khalili <marat.khalili@huawei.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
---
 lib/bpf/bpf_dump.c | 290 +++++++++++++++++++++++++++------------------
 lib/bpf/rte_bpf.h  |  51 ++++++++
 2 files changed, 226 insertions(+), 115 deletions(-)

diff --git a/lib/bpf/bpf_dump.c b/lib/bpf/bpf_dump.c
index 0abaeef8ae98..4fd67ad5a1df 100644
--- a/lib/bpf/bpf_dump.c
+++ b/lib/bpf/bpf_dump.c
@@ -46,6 +46,38 @@ static const char *const jump_tbl[16] = {
 	[EBPF_JSLT >> 4] = "jslt", [EBPF_JSLE >> 4] = "jsle",
 };
 
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_insn_is_wide, 26.07)
+bool
+rte_bpf_insn_is_wide(const struct ebpf_insn *ins)
+{
+	return ins->code == (BPF_LD | BPF_IMM | EBPF_DW);
+}
+
+
+/* Format one (possibly wide) eBPF command as hexadecimal in objdump format. */
+static int
+format_hexadecimal(char *buffer, size_t bufsz, const struct ebpf_insn *ins,
+	uint32_t flags)
+{
+	const char *const b = (const char *)ins;
+
+	RTE_ASSERT((flags & RTE_BPF_FORMAT_FLAG_HEXADECIMAL) != 0);
+
+	RTE_BUILD_BUG_ON(sizeof(*ins) != 8);
+
+	if ((flags & RTE_BPF_FORMAT_FLAG_NEVER_WIDE) == 0 && rte_bpf_insn_is_wide(ins))
+		return snprintf(buffer, bufsz,
+			"%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
+			b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
+			b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]);
+	else
+		return snprintf(buffer, bufsz,
+			"%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
+			b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
+}
+
+/* Return atomic subcommand mnemonic based on BPF_STX immediate. */
 static inline const char *
 atomic_op(int32_t imm)
 {
@@ -59,130 +91,158 @@ atomic_op(int32_t imm)
 	}
 }
 
-RTE_EXPORT_SYMBOL(rte_bpf_dump)
-void rte_bpf_dump(FILE *f, const struct ebpf_insn *buf, uint32_t len)
+/* Format one (possibly wide) eBPF command as assembler. */
+static int
+format_disassembly(char *buffer, size_t bufsz, const struct ebpf_insn *ins,
+	uint32_t pc, uint32_t flags)
 {
-	uint32_t i;
+	uint8_t cls = BPF_CLASS(ins->code);
+	const char *op, *postfix = "", *warning = "";
+	char jump[16];
 
-	for (i = 0; i < len; ++i) {
-		const struct ebpf_insn *ins = buf + i;
-		uint8_t cls = BPF_CLASS(ins->code);
-		const char *op, *postfix = "", *warning = "";
+	RTE_ASSERT((flags & RTE_BPF_FORMAT_FLAG_HEXADECIMAL) == 0);
 
-		fprintf(f, " L%u:\t", i);
+	switch (cls) {
+	default:
+		return snprintf(buffer, bufsz, "unimp 0x%x // class: %s",
+			ins->code, class_tbl[cls]);
+	case BPF_ALU:
+		postfix = "32";
+		/* fall through */
+	case EBPF_ALU64:
+		op = alu_op_tbl[BPF_OP_INDEX(ins->code)];
+		if (ins->off != 0)
+			/* Not yet supported variation with non-zero offset. */
+			warning = ", off != 0";
+		if (BPF_SRC(ins->code) == BPF_X)
+			return snprintf(buffer, bufsz, "%s%s r%u, r%u%s", op, postfix, ins->dst_reg,
+				ins->src_reg, warning);
+		else
+			return snprintf(buffer, bufsz, "%s%s r%u, #0x%x%s", op, postfix,
+				ins->dst_reg, ins->imm, warning);
+	case BPF_LD:
+		op = "ld";
+		postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
+		if (ins->code == (BPF_LD | BPF_IMM | EBPF_DW)) {
+			uint64_t val;
 
-		switch (cls) {
-		default:
-			fprintf(f, "unimp 0x%x // class: %s\n",
-				ins->code, class_tbl[cls]);
-			break;
-		case BPF_ALU:
-			postfix = "32";
-			/* fall through */
-		case EBPF_ALU64:
-			op = alu_op_tbl[BPF_OP_INDEX(ins->code)];
-			if (ins->off != 0)
-				/* Not yet supported variation with non-zero offset. */
-				warning = ", off != 0";
-			if (BPF_SRC(ins->code) == BPF_X)
-				fprintf(f, "%s%s r%u, r%u%s\n", op, postfix, ins->dst_reg,
-					ins->src_reg, warning);
-			else
-				fprintf(f, "%s%s r%u, #0x%x%s\n", op, postfix,
-					ins->dst_reg, ins->imm, warning);
-			break;
-		case BPF_LD:
-			op = "ld";
-			postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
-			if (ins->code == (BPF_LD | BPF_IMM | EBPF_DW)) {
-				uint64_t val;
-
-				if (ins->src_reg != 0)
-					/* Not yet supported variation with non-zero src. */
-					warning = ", src != 0";
-				val = (uint32_t)ins[0].imm |
-					(uint64_t)(uint32_t)ins[1].imm << 32;
-				fprintf(f, "%s%s r%d, #0x%"PRIx64"%s\n",
-					op, postfix, ins->dst_reg, val, warning);
-				i++;
-			} else if (BPF_MODE(ins->code) == BPF_IMM)
-				fprintf(f, "%s%s r%d, #0x%x\n", op, postfix,
-					ins->dst_reg, ins->imm);
-			else if (BPF_MODE(ins->code) == BPF_ABS)
-				fprintf(f, "%s%s r%d, [%d]\n", op, postfix,
-					ins->dst_reg, ins->imm);
-			else if (BPF_MODE(ins->code) == BPF_IND)
-				fprintf(f, "%s%s r%d, [r%u + %d]\n", op, postfix,
-					ins->dst_reg, ins->src_reg, ins->imm);
-			else
-				fprintf(f, "// BUG: LD opcode 0x%02x in eBPF insns\n",
-					ins->code);
-			break;
-		case BPF_LDX:
-			op = "ldx";
-			postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
-			if (BPF_MODE(ins->code) == BPF_MEM)
-				fprintf(f, "%s%s r%d, [r%u + %d]\n", op, postfix, ins->dst_reg,
-					ins->src_reg, ins->off);
-			else
-				fprintf(f, "// BUG: LDX opcode 0x%02x in eBPF insns\n",
-					ins->code);
-			break;
-		case BPF_ST:
-			op = "st";
-			postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
-			if (BPF_MODE(ins->code) == BPF_MEM)
-				fprintf(f, "%s%s [r%d + %d], #0x%x\n", op, postfix,
-					ins->dst_reg, ins->off, ins->imm);
-			else
-				fprintf(f, "// BUG: ST opcode 0x%02x in eBPF insns\n",
-					ins->code);
-			break;
-		case BPF_STX:
-			if (BPF_MODE(ins->code) == BPF_MEM)
-				op = "stx";
-			else if (BPF_MODE(ins->code) == EBPF_ATOMIC) {
-				op = atomic_op(ins->imm);
-				if (op == NULL) {
-					fprintf(f, "// BUG: ATOMIC operation 0x%x in eBPF insns\n",
-						ins->imm);
-					break;
-				}
-			} else {
-				fprintf(f, "// BUG: STX opcode 0x%02x in eBPF insns\n",
-					ins->code);
-				break;
-			}
-			postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
-			fprintf(f, "%s%s [r%d + %d], r%u\n", op, postfix,
-				ins->dst_reg, ins->off, ins->src_reg);
-			break;
-#define L(pc, off) ((int)(pc) + 1 + (off))
-		case BPF_JMP:
-			op = jump_tbl[BPF_OP_INDEX(ins->code)];
 			if (ins->src_reg != 0)
-				/* Not yet supported variation with non-zero src w/o condition. */
+				/* Not yet supported variation with non-zero src. */
 				warning = ", src != 0";
+			val = (uint32_t)ins[0].imm |
+				(uint64_t)(uint32_t)ins[1].imm << 32;
+			return snprintf(buffer, bufsz, "%s%s r%d, #0x%"PRIx64"%s",
+				op, postfix, ins->dst_reg, val, warning);
+		}
+		switch (BPF_MODE(ins->code)) {
+		case BPF_IMM:
+			return snprintf(buffer, bufsz, "%s%s r%d, #0x%x", op, postfix,
+				ins->dst_reg, ins->imm);
+		case BPF_ABS:
+			return snprintf(buffer, bufsz, "%s%s r%d, [%d]", op, postfix,
+				ins->dst_reg, ins->imm);
+		case BPF_IND:
+			return snprintf(buffer, bufsz, "%s%s r%d, [r%u + %d]", op, postfix,
+				ins->dst_reg, ins->src_reg, ins->imm);
+		default:
+			return snprintf(buffer, bufsz, "// BUG: LD opcode 0x%02x in eBPF insns",
+				ins->code);
+		}
+	case BPF_LDX:
+		op = "ldx";
+		postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
+		if (BPF_MODE(ins->code) == BPF_MEM)
+			return snprintf(buffer, bufsz, "%s%s r%d, [r%u + %d]", op, postfix,
+				ins->dst_reg, ins->src_reg, ins->off);
+		else
+			return snprintf(buffer, bufsz, "// BUG: LDX opcode 0x%02x in eBPF insns",
+				ins->code);
+	case BPF_ST:
+		op = "st";
+		postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
+		if (BPF_MODE(ins->code) == BPF_MEM)
+			return snprintf(buffer, bufsz, "%s%s [r%d + %d], #0x%x", op, postfix,
+				ins->dst_reg, ins->off, ins->imm);
+		else
+			return snprintf(buffer, bufsz, "// BUG: ST opcode 0x%02x in eBPF insns",
+				ins->code);
+	case BPF_STX:
+		switch (BPF_MODE(ins->code)) {
+		case BPF_MEM:
+			op = "stx";
+			break;
+		case EBPF_ATOMIC:
+			op = atomic_op(ins->imm);
 			if (op == NULL)
-				fprintf(f, "invalid jump opcode: %#x\n", ins->code);
-			else if (BPF_OP(ins->code) == BPF_JA)
-				fprintf(f, "%s L%d%s\n", op, L(i, ins->off), warning);
-			else if (BPF_OP(ins->code) == EBPF_CALL)
-				/* Call of helper function with index in immediate. */
-				fprintf(f, "%s #%u%s\n", op, ins->imm, warning);
-			else if (BPF_OP(ins->code) == EBPF_EXIT)
-				fprintf(f, "%s%s\n", op, warning);
-			else if (BPF_SRC(ins->code) == BPF_X)
-				fprintf(f, "%s r%u, r%u, L%d\n", op, ins->dst_reg,
-					ins->src_reg, L(i, ins->off));
-			else
-				fprintf(f, "%s r%u, #0x%x, L%d\n", op, ins->dst_reg,
-					ins->imm, L(i, ins->off));
+				return snprintf(buffer, bufsz,
+					"// BUG: ATOMIC operation 0x%x in eBPF insns", ins->imm);
 			break;
-		case BPF_RET:
-			fprintf(f, "// BUG: RET opcode 0x%02x in eBPF insns\n",
+		default:
+			return snprintf(buffer, bufsz, "// BUG: STX opcode 0x%02x in eBPF insns",
 				ins->code);
-			break;
 		}
+		postfix = size_tbl[BPF_SIZE_INDEX(ins->code)];
+		return snprintf(buffer, bufsz, "%s%s [r%d + %d], r%u", op, postfix,
+			ins->dst_reg, ins->off, ins->src_reg);
+	case BPF_JMP:
+		op = jump_tbl[BPF_OP_INDEX(ins->code)];
+		if (op == NULL)
+			return snprintf(buffer, bufsz, "invalid jump opcode: %#x", ins->code);
+
+		if ((flags & RTE_BPF_FORMAT_FLAG_ABSOLUTE_JUMPS) != 0)
+			snprintf(jump, sizeof(jump), "L%d", pc + 1 + ins->off);
+		else
+			snprintf(jump, sizeof(jump), "%+d", (int)ins->off);
+
+		if (ins->src_reg != 0)
+			/* Not yet supported variation with non-zero src w/o condition. */
+			warning = ", src != 0";
+		switch (BPF_OP(ins->code)) {
+		case BPF_JA:
+			return snprintf(buffer, bufsz, "%s %s%s", op, jump, warning);
+		case EBPF_CALL:
+			/* Call of helper function with index in immediate. */
+			return snprintf(buffer, bufsz, "%s #%u%s", op, ins->imm, warning);
+		case EBPF_EXIT:
+			return snprintf(buffer, bufsz, "%s%s", op, warning);
+		}
+
+		if (BPF_SRC(ins->code) == BPF_X)
+			return snprintf(buffer, bufsz, "%s r%u, r%u, %s", op, ins->dst_reg,
+				ins->src_reg, jump);
+		else
+			return snprintf(buffer, bufsz, "%s r%u, #0x%x, %s", op, ins->dst_reg,
+				ins->imm, jump);
+	case BPF_RET:
+		return snprintf(buffer, bufsz, "// BUG: RET opcode 0x%02x in eBPF insns",
+			ins->code);
+	}
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_bpf_format, 26.07)
+int
+rte_bpf_format(char *buffer, size_t bufsz, const struct ebpf_insn *ins,
+	uint32_t pc, uint32_t flags)
+{
+	if ((flags & RTE_BPF_FORMAT_FLAG_HEXADECIMAL) != 0)
+		return format_hexadecimal(buffer, bufsz, ins, flags);
+	else
+		return format_disassembly(buffer, bufsz, ins, pc, flags);
+}
+
+RTE_EXPORT_SYMBOL(rte_bpf_dump)
+void rte_bpf_dump(FILE *f, const struct ebpf_insn *buf, uint32_t len)
+{
+	uint32_t i;
+	char buffer[256];
+
+	for (i = 0; i < len; ++i) {
+		const struct ebpf_insn *ins = buf + i;
+
+		format_disassembly(buffer, sizeof(buffer), ins, i,
+			RTE_BPF_FORMAT_FLAG_DISASSEMBLY	|
+			RTE_BPF_FORMAT_FLAG_ABSOLUTE_JUMPS);
+		fprintf(f, " L%u:\t%s\n", i, buffer);
+		i += rte_bpf_insn_is_wide(ins);
 	}
 }
diff --git a/lib/bpf/rte_bpf.h b/lib/bpf/rte_bpf.h
index 413ccf049755..b6c232704a56 100644
--- a/lib/bpf/rte_bpf.h
+++ b/lib/bpf/rte_bpf.h
@@ -30,6 +30,23 @@ extern "C" {
 /** Mask with all supported `RTE_BPF_EXEC_FLAG_*` flags set. */
 #define RTE_BPF_EXEC_FLAG_MASK  RTE_BPF_EXEC_FLAG_JIT
 
+/* Format instructions as assembler. */
+#define RTE_BPF_FORMAT_FLAG_DISASSEMBLY		0
+/* Format instructions as hexadecimal. */
+#define RTE_BPF_FORMAT_FLAG_HEXADECIMAL		RTE_BIT32(0)
+
+/* Only valid in disassembly mode. */
+/* Format jump offsets relative to the next instruction. */
+#define RTE_BPF_FORMAT_FLAG_RELATIVE_JUMPS	0
+/* Format jump targets relative to the start of the program. */
+#define RTE_BPF_FORMAT_FLAG_ABSOLUTE_JUMPS	RTE_BIT32(1)
+
+/* Only valid in hexadecimal mode. */
+/* Format full hexadecimal representation of wide instructions. */
+#define RTE_BPF_FORMAT_FLAG_AUTO_WIDE		0
+/* Format as hexadecimal only first half of wide instructions. */
+#define RTE_BPF_FORMAT_FLAG_NEVER_WIDE		RTE_BIT32(2)
+
 /**
  * Possible types for function/BPF program arguments.
  */
@@ -391,6 +408,40 @@ __rte_experimental
 int
 rte_bpf_get_jit_ex(const struct rte_bpf *bpf, struct rte_bpf_jit_ex *jit);
 
+/**
+ * Determine instruction width.
+ *
+ * @return
+ *   True if ins points to a wide (128-bit) instruction.
+ */
+__rte_experimental
+bool
+rte_bpf_insn_is_wide(const struct ebpf_insn *ins);
+
+/**
+ * Print eBPF instruction into a buffer.
+ *
+ * Semantics of handling buffer size repeats those of snprintf.
+ *
+ * @param buffer
+ *   Output buffer (may be NULL if bufsz is zero).
+ * @param bufsz
+ *   Output buffer size.
+ * @param ins
+ *   Narrow or wide (depending on opcode) eBPF instruction. That is, when
+ *   `rte_bpf_insn_is_wide` is true `ins[1]` is also accessed.
+ * @param pc
+ *   Current instruction number for displaying absolute jump targets.
+ * @param flags
+ *   Bitwise-OR combination of `RTE_BPF_FORMAT_FLAG_*` values.
+ * @return
+ *   Number of characters to be written excluding terminating zero.
+ */
+__rte_experimental
+int
+rte_bpf_format(char *buffer, size_t bufsz, const struct ebpf_insn *ins,
+	uint32_t pc, uint32_t flags);
+
 /**
  * Dump epf instructions to a file.
  *
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 00/24] bpf: test and fix issues in verifier
From: Marat Khalili @ 2026-06-23 14:31 UTC (permalink / raw)
  Cc: dev
In-Reply-To: <20260612104743.6465-1-marat.khalili@huawei.com>

This patchset addresses numerous bugs in the BPF verifier's abstract
interpretation logic and introduces a new validation debugger API to
enable precise, robust testing of the verifier itself.

While the existing DPDK eBPF verifier is capable of checking basic
execution graph loops and dead code, the mathematical tracking of
register bounds (both signed and unsigned) contained flaws resulting in
false positives and false negatives, undefined behavior, and hardware
exceptions such as SIGFPE during validation.

To resolve these issues and ensure they do not regress, this patchset
first introduces the "Validation Debugger API"
(`rte_bpf_validate_debug_*`). This gdb-like interface allows setting
breakpoints and catchpoints during the validation process to inspect the
verifier's internal state.

Using this new API, a comprehensive test harness
(`app/test/test_bpf_validate.c`) was created to formally check the
abstract domains of instructions across all their valid branches. The
remainder of the patchset incrementally fixes the math and bounds logic
for individual eBPF instructions, using the new tests to prove the
correctness of the fixes.

This debugger API also lays the foundation for an interactive eBPF
validation debugger to be introduced in the future.

Series-Depends-on: series-38434 ("bpf: introduce extensible load API")


v4:
No code changes (almost).
* Removed compilation error on presence of LIST_FOREACH_SAFE in Linux.
* Rebased on fresh main with prerequisite series merged.
* Moved new API documentation changes to the introducing commit.
* Made new release notes one section instead of two.
* Corrected documentation wording slightly to prevent confusion between
  the debugging API and a debugger application (not part of this series).

v3:
* Rebased on v5 of the prerequisite series and updated Depends-on tags.
* Replaced a hardcoded compiler attribute with __rte_format_printf.

v2:
* Addressed AI reviewer comments:
  * replaced `false` and `true` with 0 and 1 in some API descriptions
    and invocations that multiplex boolean and negative error code;
  * made some previously implicit casts explicit;
  * moved new enum value to the end of the definition.
* Added Acked-by and Depends-on tags to all individual commits to
  align with patchwork requirements.
* Added Reported-by tags to fixes of issues discovered by Claudia Cauli
  using a formal methods framework.


Marat Khalili (24):
  bpf: format and dump jlt, jle, jslt, and jsle
  bpf: add format instruction function
  bpf/validate: break on error in evaluate
  bpf/validate: expand comments in evaluate cycle
  bpf/validate: introduce debugging interface
  bpf/validate: fix BPF_ADD of pointer to a scalar
  bpf/validate: fix BPF_LDX | EBPF_DW signed range
  test/bpf_validate: add setup and basic tests
  test/bpf_validate: add harness for pointer tests
  bpf/validate: fix EBPF_JSLT | BPF_X evaluation
  bpf/validate: fix BPF_NEG of INT64_MIN and 0
  bpf/validate: fix BPF_DIV and BPF_MOD signed part
  bpf/validate: fix BPF_MUL ranges minimum typo
  bpf/validate: fix BPF_MUL signed overflow UB
  bpf/validate: fix BPF_JGT/EBPF_JSGT no-jump max
  bpf/validate: fix BPF_JMP source range calculation
  bpf/validate: fix BPF_JMP empty range handling
  bpf/validate: fix BPF_AND min calculations
  bpf/validate: fix BPF_LSH shift-out-of-bounds UB
  bpf/validate: fix BPF_OR min calculations
  bpf/validate: fix BPF_SUB signed max zero case
  bpf/validate: fix BPF_XOR signed min calculation
  bpf/validate: prevent overflow when building graph
  doc: add release notes for BPF validation fixes

 app/test/meson.build                   |    1 +
 app/test/test_bpf.c                    |   99 ++
 app/test/test_bpf_validate.c           | 2271 ++++++++++++++++++++++++
 doc/guides/prog_guide/bpf_lib.rst      |   31 +
 doc/guides/rel_notes/release_26_07.rst |   14 +-
 lib/bpf/bpf_dump.c                     |  292 +--
 lib/bpf/bpf_validate.c                 |  730 +++++++-
 lib/bpf/bpf_validate.h                 |   60 +
 lib/bpf/bpf_validate_debug.c           |  659 +++++++
 lib/bpf/bpf_validate_debug.h           |   86 +
 lib/bpf/bpf_value_set.c                |  403 +++++
 lib/bpf/bpf_value_set.h                |  126 ++
 lib/bpf/meson.build                    |    9 +-
 lib/bpf/rte_bpf.h                      |   55 +
 lib/bpf/rte_bpf_validate_debug.h       |  377 ++++
 15 files changed, 5015 insertions(+), 198 deletions(-)
 create mode 100644 app/test/test_bpf_validate.c
 create mode 100644 lib/bpf/bpf_validate.h
 create mode 100644 lib/bpf/bpf_validate_debug.c
 create mode 100644 lib/bpf/bpf_validate_debug.h
 create mode 100644 lib/bpf/bpf_value_set.c
 create mode 100644 lib/bpf/bpf_value_set.h
 create mode 100644 lib/bpf/rte_bpf_validate_debug.h

-- 
2.43.0


^ permalink raw reply

* [PATCH 3/3] vhost: remove use of strncpy
From: Bruce Richardson @ 2026-06-23 14:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, stable, Maxime Coquelin, Chenbo Xia,
	Yuanhan Liu, David Marchand, Stephen Hemminger
In-Reply-To: <20260623141930.704771-1-bruce.richardson@intel.com>

The strlcpy is preferred over use of strncpy, which removes the need to
try and explicitly null-terminate some string buffers. We can also
simplify some name length handling as a result of this, as we no longer
need to use strnlen to clamp the length before calling the set_ifname
function.

Fixes: a277c7159876 ("vhost: refactor code structure")
Fixes: 0adb8eccc6a6 ("vhost: add VDUSE device creation and destruction")
Fixes: c171a2d5ff17 ("vhost: use strlcpy instead of strncpy")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/vhost/socket.c |  4 +---
 lib/vhost/vduse.c  |  2 +-
 lib/vhost/vhost.c  | 12 +++---------
 lib/vhost/vhost.h  |  2 +-
 4 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 70e582a18d..0943b3e9bb 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -207,7 +207,6 @@ static void
 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 {
 	int vid;
-	size_t size;
 	struct vhost_user_connection *conn;
 	int ret;
 	struct virtio_net *dev;
@@ -226,8 +225,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 		goto err;
 	}
 
-	size = strnlen(vsocket->path, PATH_MAX);
-	vhost_set_ifname(vid, vsocket->path, size);
+	vhost_set_ifname(vid, vsocket->path);
 
 	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
 		vsocket->net_compliant_ol_flags, vsocket->stats_enabled,
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 0b5d158fee..f8a4a8edcb 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -796,7 +796,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
 		goto out_dev_destroy;
 	}
 
-	strncpy(dev->ifname, path, IF_NAME_SZ - 1);
+	strlcpy(dev->ifname, path, sizeof(dev->ifname));
 	dev->vduse_ctrl_fd = control_fd;
 	dev->vduse_dev_fd = dev_fd;
 
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 7e68b2c3be..fde8acb00c 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -776,20 +776,15 @@ vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
 }
 
 void
-vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
+vhost_set_ifname(int vid, const char *if_name)
 {
 	struct virtio_net *dev;
-	unsigned int len;
 
 	dev = get_device(vid);
 	if (dev == NULL)
 		return;
 
-	len = if_len > sizeof(dev->ifname) ?
-		sizeof(dev->ifname) : if_len;
-
-	strncpy(dev->ifname, if_name, len);
-	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
+	strlcpy(dev->ifname, if_name, sizeof(dev->ifname));
 }
 
 void
@@ -915,8 +910,7 @@ rte_vhost_get_ifname(int vid, char *buf, size_t len)
 
 	len = RTE_MIN(len, sizeof(dev->ifname));
 
-	strncpy(buf, dev->ifname, len);
-	buf[len - 1] = '\0';
+	strlcpy(buf, dev->ifname, len);
 
 	return 0;
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index ee61f7415e..1c957d2929 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -877,7 +877,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 
 void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
 
-void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+void vhost_set_ifname(int, const char *if_name);
 void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags, bool stats_enabled,
 	bool support_iommu);
 void vhost_enable_extbuf(int vid);
-- 
2.53.0


^ permalink raw reply related

* [PATCH 2/3] eventdev: improve bounds checks for names in adapter create
From: Bruce Richardson @ 2026-06-23 14:19 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, stable, Naga Harish K S V, Jerin Jacob,
	Nikhil Rao
In-Reply-To: <20260623141930.704771-1-bruce.richardson@intel.com>

The bounds checks for snprintf and then strncpy used different constant
defines, which happened to resolve to the same value (32). Make this
code more resilient by using sizeof() operator rather than the defines,
and replace use of strncpy with the better strlcpy.

Fixes: a3bbf2e09756 ("eventdev: add eth Tx adapter implementation")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/eventdev/rte_event_eth_tx_adapter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/eventdev/rte_event_eth_tx_adapter.c b/lib/eventdev/rte_event_eth_tx_adapter.c
index 91c7be55c7..d531da5d69 100644
--- a/lib/eventdev/rte_event_eth_tx_adapter.c
+++ b/lib/eventdev/rte_event_eth_tx_adapter.c
@@ -748,7 +748,7 @@ txa_service_adapter_create_ext(uint8_t id, struct rte_eventdev *dev,
 		return -EINVAL;
 
 	socket_id = dev->data->socket_id;
-	snprintf(mem_name, TXA_MEM_NAME_LEN,
+	snprintf(mem_name, sizeof(mem_name),
 		"rte_event_eth_txa_%d",
 		id);
 
@@ -767,7 +767,7 @@ txa_service_adapter_create_ext(uint8_t id, struct rte_eventdev *dev,
 	txa->id = id;
 	txa->eventdev_id = dev->data->dev_id;
 	txa->socket_id = socket_id;
-	strncpy(txa->mem_name, mem_name, TXA_SERVICE_NAME_LEN);
+	strlcpy(txa->mem_name, mem_name, sizeof(txa->mem_name));
 	txa->conf_cb = conf_cb;
 	txa->conf_arg = conf_arg;
 	txa->service_id = TXA_INVALID_SERVICE_ID;
-- 
2.53.0


^ permalink raw reply related

* [PATCH 1/3] ethdev: remove use of strncpy
From: Bruce Richardson @ 2026-06-23 14:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, stable, Thomas Monjalon, Andrew Rybchenko,
	Harman Kalra, Ferruh Yigit
In-Reply-To: <20260623141930.704771-1-bruce.richardson@intel.com>

The use of strncpy is not generally recommended, so replace it in code
tokenizing the representor list. Since its use in the function is not
involving null-terminated strings (we know that copied block will
not involve a null value in it), we can replace strncpy with memcpy
rather than a string function. This keeps the original intent of the
code.

For extra safety, also add in an explicit bounds check on the length
value before doing the memcpy.

Fixes: 9a9eb104edf6 ("ethdev: parse multiple representor devargs")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/ethdev/ethdev_driver.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/ethdev/ethdev_driver.c b/lib/ethdev/ethdev_driver.c
index 70ddce5bfc..4043ce898f 100644
--- a/lib/ethdev/ethdev_driver.c
+++ b/lib/ethdev/ethdev_driver.c
@@ -583,10 +583,15 @@ eth_dev_tokenise_representor_list(char *p_val, struct rte_eth_devargs *eth_devar
 		return devargs;
 	}
 
+	/* len - 2 strips the outer '[' and ']'; guard against underflow and overflow */
+	if (len < 2 || (len - 2) >= BUFSIZ) {
+		RTE_ETHDEV_LOG_LINE(ERR, "Representor list too long or malformed: %s", p_val);
+		return -EINVAL;
+	}
 	memset(str, 0, BUFSIZ);
 	memset(da_val, 0, BUFSIZ);
 	/* Remove the exterior [] of the consolidated list */
-	strncpy(str, &p_val[1], len - 2);
+	memcpy(str, &p_val[1], len - 2);
 	while (1) {
 		if (str[i] == '\0') {
 			if (da_val[0] != '\0') {
-- 
2.53.0


^ permalink raw reply related

* [PATCH 0/3] lib: remove use of strncpy
From: Bruce Richardson @ 2026-06-23 14:19 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson

Taking a lead from the kernel, which has just finished a multi-year
effort to remove use of strncpy[1], rework DPDK to remove use of the
same function. This series removes all remaining uses of strncpy
in lib directory.

[1] https://www.phoronix.com/news/Linux-7.2-Drops-strncpy

Bruce Richardson (3):
  ethdev: remove use of strncpy
  eventdev: improve bounds checks for names in adapter create
  vhost: remove use of strncpy

 lib/ethdev/ethdev_driver.c              |  7 ++++++-
 lib/eventdev/rte_event_eth_tx_adapter.c |  4 ++--
 lib/vhost/socket.c                      |  4 +---
 lib/vhost/vduse.c                       |  2 +-
 lib/vhost/vhost.c                       | 12 +++---------
 lib/vhost/vhost.h                       |  2 +-
 6 files changed, 14 insertions(+), 17 deletions(-)

--
2.53.0


^ permalink raw reply

* Re: [PATCH v5] graph: add optional profiling stats
From: saeed bishara @ 2026-06-23 14:10 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Jerin Jacob, dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram,
	Zhirun Yan
In-Reply-To: <98CBD80474FA8B44BF855DF32C47DC35F6593A@smartserver.smartshare.dk>

> > also, instead of adding cacheline for this profiling data, can we
> > share with line 1 that used solely for xstats?
>
> This profiling data is 4 indexes * 2 values * 8-byte fields, so one cache line in itself.
make sense.
btw, the default value of RTE_GRAPH_BURST_SIZE is 256, I suspect that
real applications will enforce smaller burst when pulling from input
devices (e.g. 32). Do you expect such cases to change
RTE_GRAPH_BURST_SIZE?

^ permalink raw reply

* DPDK Release Status Meeting 2026-06-23
From: Mcnamara, John @ 2026-06-23 14:02 UTC (permalink / raw)
  To: dev@dpdk.org; +Cc: Thomas Monjalon, David Marchand

[-- Attachment #1: Type: text/plain, Size: 2792 bytes --]

Release status meeting minutes 2026-06-23
=========================================

Agenda:
- Release Dates
- Subtrees
- Roadmaps
- LTS
- Defects
- Opens

Participants:
- ARM
- Broadcom
- Debian
- Intel
- Marvell
- Nvidia
- Red Hat
- Stephen Hemminger

Release Dates
-------------

The following are the proposed working dates for 27.03:

| Date            | Milestone        | Description                     |
|-----------------|------------------|---------------------------------|
| 30 April 2026   | RFC/v1 patches   | Proposal deadline               |
| 11 June 2026    | 26.07-rc1        | API freeze                      |
| 25 June 2026    | 26.07-rc2        | PMD features freeze             |
| 02 July 2026    | 26.07-rc3        | Builtin apps features freeze    |
| 9 July 2026     | 26.07-rc4        | Documentation ready             |
| 16 July 2026    | 26.07.0          | Release                         |


See https://core.dpdk.org/roadmap/


Subtrees
--------

- next-net
  - 76 patches ready for main.
  - 26 patches in review/waiting.

- next-net-intel
  - 6 patches merged, ready for main
  - Small backlog.

- next-net-mlx
  - Some patches getting ready for pull.

- next-broadcom
  - Most patches merged.
  - 1-2 in backlog.

- next-net-mvl
  - PR sent.

- next-eventdev
  - No change.

- next-baseband
  - Nothing pending.

- next-virtio
  - Nothing pending.

- next-crypto
  - No update.

- next-dts
  - No update.

- main
  - BUS refactoring.
  - DMA driver to review.
  - Started pulling trees for RC2.
  - RC2 targeting June 29 2026.


Other
-----
  - None.

LTS
---

See also: https://core.dpdk.org/roadmap/#stable

LTS versions ongoing/released:

- 25.11.3 - In progress.
- 24.11.7 - In progress.
- 23.11.8 - In progress.

Older releases:
- 20.11.10 - Will only be updated with CVE and critical fixes.
- 19.11.14 - Will only be updated with CVE and critical fixes.

- Distros
  - Debian 13 contains DPDK v24.11
  - Ubuntu 25.04 contains DPDK v24.11
  - Ubuntu 24.04 LTS contains DPDK v23.11
  - RHEL 9 contains DPDK 24.11

Defects
-------

- Bugzilla links, 'Bugs',  added for hosted projects
  - https://www.dpdk.org/hosted-projects/



DPDK Release Status Meetings
----------------------------

The DPDK Release Status Meeting is intended for DPDK Committers to discuss the
status of the main tree and sub-trees, and for project managers to track
progress or milestone dates.

The meeting occurs on every Tuesday at 14:30 DST over Jitsi on https://meet.jit.si/DPDK

You don't need an invite to join the meeting but if you want a calendar reminder just
send an email to "John McNamara <john.mcnamara@intel.com>" for the invite.



[-- Attachment #2: Type: text/html, Size: 35226 bytes --]

^ permalink raw reply

* RE: [PATCH v3 6/6] test/bpf: check that bpf_convert can be JIT'd
From: Marat Khalili @ 2026-06-23 13:57 UTC (permalink / raw)
  To: Stephen Hemminger, dev@dpdk.org; +Cc: Konstantin Ananyev
In-Reply-To: <20260621162524.82690-7-stephen@networkplumber.org>

Thank you for working on this, please see some comments inline.

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Sunday 21 June 2026 17:24
> To: dev@dpdk.org
> Cc: Stephen Hemminger <stephen@networkplumber.org>; Konstantin Ananyev <konstantin.ananyev@huawei.com>;
> Marat Khalili <marat.khalili@huawei.com>
> Subject: [PATCH v3 6/6] test/bpf: check that bpf_convert can be JIT'd
> 
> Add followup in bpf conversion tests to make sure resulting
> code was also run through JIT and that JIT produces
> same results as non-JIT.
> 
> Reduce log output to make it easier to match which
> expression might be causing issues.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  app/test/test_bpf.c | 94 +++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 79 insertions(+), 15 deletions(-)
> 
> diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
> index 3a88434c3c..973dd7d659 100644
> --- a/app/test/test_bpf.c
> +++ b/app/test/test_bpf.c
> @@ -8,6 +8,7 @@
>  #include <inttypes.h>
>  #include <unistd.h>
> 
> +#include <rte_byteorder.h>
>  #include <rte_memory.h>
>  #include <rte_debug.h>
>  #include <rte_hexdump.h>
> @@ -32,6 +33,7 @@ test_bpf(void)
>  #include <rte_bpf.h>
>  #include <rte_ether.h>
>  #include <rte_ip.h>
> +#include <rte_udp.h>
> 
> 
>  /* Tests of most simple BPF programs (no instructions, one instruction etc.) */
> @@ -4529,6 +4531,7 @@ test_bpf_match(pcap_t *pcap, const char *str,
>  	int ret = -1;
>  	uint64_t rc;
> 
> +	printf("%s '%s'\n", __func__, str);
>  	if (pcap_compile(pcap, &fcode, str, 1, PCAP_NETMASK_UNKNOWN)) {
>  		printf("%s@%d: pcap_compile(\"%s\") failed: %s;\n",
>  		       __func__, __LINE__,  str, pcap_geterr(pcap));
> @@ -4550,6 +4553,24 @@ test_bpf_match(pcap_t *pcap, const char *str,
>  	}
> 
>  	rc = rte_bpf_exec(bpf, mb);
> +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)

We are going to have a few of these lines, I think something like
RTE_BPF_JIT_SUPPORTED is warranted.

> +	{
> +		struct rte_bpf_jit jit;
> +
> +		rte_bpf_get_jit(bpf, &jit);

Out of abundance of caution I would also prefill jit with zeroes and check the
return code here.

> +		if (jit.func == NULL) {
> +			printf("%s@%d: no JIT generated\n", __func__, __LINE__);
> +			goto error;
> +		}
> +
> +		fflush(stdout);
> +		uint64_t rc_jit = jit.func(mb);
> +		if (rc_jit != rc) {
> +			printf("%s@%d: JIT return code does not match\n", __func__, __LINE__);
> +			goto error;
> +		}
> +	}
> +#endif
>  	/* The return code from bpf capture filter is non-zero if matched */
>  	ret = (rc == 0);
>  error:
> @@ -4560,23 +4581,16 @@ test_bpf_match(pcap_t *pcap, const char *str,
>  	return ret;
>  }
> 
> -/* Basic sanity test can we match a IP packet */
> -static int
> -test_bpf_filter_sanity(pcap_t *pcap)
> +/* Setup mbuf for filter test */
> +static void
> +dummy_ip_prep(void *data, uint16_t plen)
>  {
> -	const uint32_t plen = 100;
> -	struct rte_mbuf mb, *m;
> -	uint8_t tbuf[RTE_MBUF_DEFAULT_BUF_SIZE];
>  	struct {
>  		struct rte_ether_hdr eth_hdr;
>  		struct rte_ipv4_hdr ip_hdr;
> -	} *hdr;
> +		struct rte_udp_hdr udp_hdr;
> +	} *hdr = data;
> 
> -	memset(&mb, 0, sizeof(mb));
> -	dummy_mbuf_prep(&mb, tbuf, sizeof(tbuf), plen);
> -	m = &mb;
> -
> -	hdr = rte_pktmbuf_mtod(m, typeof(hdr));
>  	hdr->eth_hdr = (struct rte_ether_hdr) {
>  		.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
>  		.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4),
> @@ -4589,13 +4603,32 @@ test_bpf_filter_sanity(pcap_t *pcap)
>  		.src_addr = rte_cpu_to_be_32(RTE_IPV4_LOOPBACK),
>  		.dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST),
>  	};
> +	hdr->udp_hdr = (struct rte_udp_hdr) {
> +		.src_port = rte_rand_max(UINT16_MAX),
> +		.dst_port = rte_cpu_to_be_16(9),	/* discard port */
> +		.dgram_len = rte_cpu_to_be_16(plen - sizeof(struct rte_ipv4_hdr)),
> +		.dgram_cksum = 0,
> +	};
> +}
> +
> +
> +/* Basic sanity test can we match a IP packet */
> +static int
> +test_bpf_filter_sanity(pcap_t *pcap)
> +{
> +	struct rte_mbuf mb = { 0 };
> +	uint8_t tbuf[RTE_MBUF_DEFAULT_BUF_SIZE];
> +	const uint32_t plen = 100;
> +
> +	dummy_mbuf_prep(&mb, tbuf, sizeof(tbuf), plen);
> +	dummy_ip_prep(rte_pktmbuf_mtod(&mb, void *), plen);
> 
> -	if (test_bpf_match(pcap, "ip", m) != 0) {
> +	if (test_bpf_match(pcap, "ip", &mb) != 0) {
>  		printf("%s@%d: filter \"ip\" doesn't match test data\n",
>  		       __func__, __LINE__);
>  		return -1;
>  	}
> -	if (test_bpf_match(pcap, "not ip", m) == 0) {
> +	if (test_bpf_match(pcap, "not ip", &mb) == 0) {

Not a new bug, but this condition should be for non-positive, not just zero.

>  		printf("%s@%d: filter \"not ip\" does match test data\n",
>  		       __func__, __LINE__);
>  		return -1;
> @@ -4648,10 +4681,15 @@ static const char * const sample_filters[] = {
>  static int
>  test_bpf_filter(pcap_t *pcap, const char *s)
>  {
> +	struct rte_mbuf mb = { 0 };
> +	uint8_t tbuf[RTE_MBUF_DEFAULT_BUF_SIZE];
> +	const uint32_t plen = 100;
>  	struct bpf_program fcode;
>  	struct rte_bpf_prm *prm = NULL;
>  	struct rte_bpf *bpf = NULL;
> +	int ret = -1;
> 
> +	printf("%s '%s'\n", __func__, s);
>  	if (pcap_compile(pcap, &fcode, s, 1, PCAP_NETMASK_UNKNOWN)) {
>  		printf("%s@%d: pcap_compile('%s') failed: %s;\n",
>  		       __func__, __LINE__, s, pcap_geterr(pcap));
> @@ -4665,8 +4703,10 @@ test_bpf_filter(pcap_t *pcap, const char *s)
>  		goto error;
>  	}
> 
> +#ifdef DEBUG
>  	printf("bpf convert for \"%s\" produced:\n", s);
>  	rte_bpf_dump(stdout, prm->ins, prm->nb_ins);
> +#endif
> 
>  	bpf = rte_bpf_load(prm);
>  	if (bpf == NULL) {
> @@ -4675,6 +4715,30 @@ test_bpf_filter(pcap_t *pcap, const char *s)
>  		goto error;
>  	}
> 
> +	dummy_mbuf_prep(&mb, tbuf, sizeof(tbuf), plen);
> +	dummy_ip_prep(rte_pktmbuf_mtod(&mb, void *), plen);
> +
> +	uint64_t rc = rte_bpf_exec(bpf, &mb);

Would it be hard to check the result against a known correct answer?

> +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
> +	{
> +		struct rte_bpf_jit jit;
> +
> +		rte_bpf_get_jit(bpf, &jit);

Same suggestion regarding zeroing jit and check return code here.

> +		if (jit.func == NULL) {
> +			printf("%s@%d: no JIT generated\n", __func__, __LINE__);
> +			goto error;
> +		}
> +
> +		fflush(stdout);
> +		uint64_t rc_jit = jit.func(&mb);
> +		if (rc_jit != rc) {
> +			printf("%s@%d: JIT return code does not match\n", __func__, __LINE__);
> +			goto error;
> +		}
> +	}
> +#endif
> +	ret = 0;
> +

Are `test_bpf_filter` and `test_bpf_filter_sanity` substantially different any
more, or could they just be merged?

>  error:
>  	if (bpf)
>  		rte_bpf_destroy(bpf);
> @@ -4685,7 +4749,7 @@ test_bpf_filter(pcap_t *pcap, const char *s)
> 
>  	rte_free(prm);
>  	pcap_freecode(&fcode);
> -	return (bpf == NULL) ? -1 : 0;
> +	return ret;
>  }
> 
>  static int
> --
> 2.53.0


^ permalink raw reply

* Re: [PATCH v4] pcapng: add user-supplied timestamp support
From: Stephen Hemminger @ 2026-06-23 13:53 UTC (permalink / raw)
  To: Dawid Wesierski; +Cc: dev, mb, Marek Kasiewicz
In-Reply-To: <20260623141302.486601-1-dawid.wesierski@intel.com>

On Tue, 23 Jun 2026 10:10:11 -0400
Dawid Wesierski <dawid.wesierski@intel.com> wrote:

> +/*
> + * Compatibility wrapper: captures current TSC (converted at write time).
> + * Equivalent to rte_pcapng_copy_ts(..., 0).
> + */
> +RTE_EXPORT_SYMBOL(rte_pcapng_copy)
> +struct rte_mbuf *
> +rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> +		const struct rte_mbuf *md,
> +		struct rte_mempool *mp,
> +		uint32_t length,
> +		enum rte_pcapng_direction direction,
> +		const char *comment)
> +{
> +	return rte_pcapng_copy_ts(port_id, queue, md, mp, length, direction,
> +				  comment, 0);
> +}
> +
> +/*
> + * Convert a TSC value to nanoseconds since the Unix epoch using the
> + * calibrated clock of the capture file. Uses the same pre-computed
> + * reciprocal multiplier as the internal write path (no integer division).
> + */
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pcapng_tsc_to_ns, 26.07)
> +uint64_t
> +rte_pcapng_tsc_to_ns(const rte_pcapng_t *self, uint64_t tsc)
> +{
> +	return tsc_to_ns_epoch(&self->clock, tsc);
> +}

Why not just use function versioning on rte_pcapng_copy() to add new parameter?

Also should add a coverage test app/test/test_pcapng.c

^ permalink raw reply

* Re: [PATCH 0/5] add versioned symbols for recently stabilized APIs
From: David Marchand @ 2026-06-23 13:50 UTC (permalink / raw)
  To: Dariusz Sosnowski, Thomas Monjalon, dpdk-techboard
  Cc: Bruce Richardson, Andrew Rybchenko, Viacheslav Ovsiienko,
	Bing Zhao, Ori Kam, Suanming Mou, Matan Azrad, dev
In-Reply-To: <20260623113752.1100072-1-dsosnowski@nvidia.com>

Hello Dariusz,

On Tue, 23 Jun 2026 at 13:38, Dariusz Sosnowski <dsosnowski@nvidia.com> wrote:
>
> Main goal of this patchset is to address https://bugs.dpdk.org/show_bug.cgi?id=1957

It is expected that experimental symbols may disappear overnight, and
this bug could also be closed as NOTABUG.

On the other hand, we do state in the doc that compatibility could be
provided when stabilising an experimental API, so ok.. let's try.

> but it also handles other recently stabilized symbols and has some minor fixes:
>
> - Patch 1 - Fix RTE_VERSION_EXPERIMENTAL_SYMBOL macro on clang.

Ouch... /me hides.


> - Patch 2 - Allow function versioning inside drivers.
> - Patch 3 - Version the function symbols stabilized in
>   https://git.dpdk.org/dpdk/commit/?id=e8cab133645f5466ef75e511629add43b68a5027
> - Patch 4 - Introduce versioning macros for global variable symbols.
> - Patch 5 - Version the function and variable symbols stabilized in
>   https://git.dpdk.org/dpdk/commit/?id=4ee2f5c1cedf9ee7f39afa667f71b07f4004ba5c
>
> Issue is still not fully fixed for stabilized global variables:
> rte_flow_dynf_metadata_offs and rte_flow_dynf_metadata_mask.

Well, symbol versioning is not something for variables.
Exposing global variables was a mistake from the start...
Those were exported for "performance" reasons as those are accessed
via inline helpers (but I am not sure there were benchmarks showing
the benefits).

I am for forbidding exports of global variables from now, unless some
really good performance benchmark is provided (@techboard for info).


Now, in practice for your issue, rather than reintroducing symbol
aliases (technical solution that I dropped when refactoring the
macros), I think we can do with some middle ground approach:
- leaving the inline helpers as "stable" (not __rte_experimental),
- restoring the EXPERIMENTAL version on the global variables, this
will restore the location of those symbols from the previous ABI pov,
and the checks won't catch this discrepancy anyway,
- during 26.11, drop the EXPERIMENTAL version on those variables,


In other words, stopping at your patch 3 of the series, then adding:

$ git diff
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index ec0fe08355..8bd21ccd31 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -23,11 +23,11 @@
 #define FLOW_LOG RTE_ETHDEV_LOG_LINE

 /* Mbuf dynamic field name for metadata. */
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_offs)
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_flow_dynf_metadata_offs, 19.11)
 int32_t rte_flow_dynf_metadata_offs = -1;

 /* Mbuf dynamic field flag bit number for metadata. */
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_mask)
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_flow_dynf_metadata_mask, 19.11)
 uint64_t rte_flow_dynf_metadata_mask;

 /**

> Patch 4 and 5 address the bug for these global variables,
> by providing a single storage for both EXPERIMENTAL and
> DPDK_26 variable symbol versions.
> This is achieved through symbol aliasing.
> But this solution is limited only to executables compiled with clang.
>
> clang and gcc have a different default behavior regarding relocations
> of global variables exposed by shared libraries.
>

Yeah... not even thinking about adding MSVC in the list...


-- 
David Marchand


^ permalink raw reply related

* Re: [PATCH 1/5] eal: fix macro for versioned experimental symbol
From: Stephen Hemminger @ 2026-06-23 13:50 UTC (permalink / raw)
  To: Dariusz Sosnowski; +Cc: David Marchand, dev, Bruce Richardson
In-Reply-To: <20260623113752.1100072-2-dsosnowski@nvidia.com>

On Tue, 23 Jun 2026 13:37:47 +0200
Dariusz Sosnowski <dsosnowski@nvidia.com> wrote:

> Add a missing semicolon after __asm__ block in
> RTE_VERSION_EXPERIMENTAL_SYMBOL macro.
> It's lack triggers the following compilation error with clang:
> 
>     ../lib/ethdev/rte_flow.c:320:1: error: expected ';' after top-level asm block
>       320 | RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_flow_dynf_metadata_register, (void))
>           | ^
>     ../lib/eal/common/eal_export.h:75:74: note: expanded from macro 'RTE_VERSION_EXPERIMENTAL_SYMBOL'
>        75 | __asm__(".symver " RTE_STR(name) "_exp, " RTE_STR(name) "@EXPERIMENTAL") \
>           |                                                                          ^
>     ../lib/eal/include/rte_common.h:237:20: note: expanded from macro '\
>     __rte_used'
>       237 | #define __rte_used __attribute__((used))
>           |                    ^
> 
> Fixes: e30e194c4d06 ("eal: rework function versioning macros")
> Cc: david.marchand@redhat.com
> 
> Signed-

I didn't see this because clang doesn't have symver support.
Which version of clang is this?

^ permalink raw reply

* Re: [PATCH 0/5] add versioned symbols for recently stabilized APIs
From: Stephen Hemminger @ 2026-06-23 13:48 UTC (permalink / raw)
  To: Dariusz Sosnowski
  Cc: Thomas Monjalon, David Marchand, Bruce Richardson,
	Andrew Rybchenko, Viacheslav Ovsiienko, Bing Zhao, Ori Kam,
	Suanming Mou, Matan Azrad, dev
In-Reply-To: <20260623113752.1100072-1-dsosnowski@nvidia.com>

On Tue, 23 Jun 2026 13:37:46 +0200
Dariusz Sosnowski <dsosnowski@nvidia.com> wrote:

> Main goal of this patchset is to address https://bugs.dpdk.org/show_bug.cgi?id=1957
> but it also handles other recently stabilized symbols and has some minor fixes:
> 
> - Patch 1 - Fix RTE_VERSION_EXPERIMENTAL_SYMBOL macro on clang.
> - Patch 2 - Allow function versioning inside drivers.
> - Patch 3 - Version the function symbols stabilized in
>   https://git.dpdk.org/dpdk/commit/?id=e8cab133645f5466ef75e511629add43b68a5027
> - Patch 4 - Introduce versioning macros for global variable symbols.
> - Patch 5 - Version the function and variable symbols stabilized in
>   https://git.dpdk.org/dpdk/commit/?id=4ee2f5c1cedf9ee7f39afa667f71b07f4004ba5c
> 
> Issue is still not fully fixed for stabilized global variables:
> rte_flow_dynf_metadata_offs and rte_flow_dynf_metadata_mask.
> Patch 4 and 5 address the bug for these global variables,
> by providing a single storage for both EXPERIMENTAL and
> DPDK_26 variable symbol versions.
> This is achieved through symbol aliasing.
> But this solution is limited only to executables compiled with clang.
> 
> clang and gcc have a different default behavior regarding relocations
> of global variables exposed by shared libraries.
> 
> With clang, R_X86_64_GLOB_DAT relocations are generated for executables:
> 
>    $ readelf -sW build-26.07/lib/librte_ethdev.so | grep rte_flow_dynf_metadata_offs
>        113: 00000000000ea4c0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@@DPDK_26
>        116: 00000000000ea4c0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@EXPERIMENTAL
>        970: 00000000000ea4c0     4 OBJECT  LOCAL  DEFAULT   24 rte_flow_dynf_metadata_offs_impl
>       1212: 00000000000ea4c0     4 OBJECT  LOCAL  DEFAULT   24 rte_flow_dynf_metadata_offs_v26
>       1325: 00000000000ea4c0     4 OBJECT  LOCAL  DEFAULT   24 rte_flow_dynf_metadata_offs_exp
>       1415: 00000000000ea4c0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@@DPDK_26
>       1705: 00000000000ea4c0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@EXPERIMENTAL
> 
>     $ readelf -rW build-26.07/drivers/librte_net_mlx5.so | grep rte_flow_dynf_metadata_offs
>     0000000003ed5f18  0000001600000006 R_X86_64_GLOB_DAT      0000000000000000 rte_flow_dynf_metadata_offs@DPDK_26 + 0
> 
>     $ readelf -rW build-25.11/app/dpdk-testpmd | grep rte_flow_dynf_metadata_offs
> --> 000000000028ef70  0000011300000006 R_X86_64_GLOB_DAT      0000000000000000 rte_flow_dynf_metadata_offs@EXPERIMENTAL + 0  
> 
> With gcc, R_X86_64_COPY relocations are generated:
> 
>     $ readelf -sW build-26.07/lib/librte_ethdev.so | grep rte_flow_dynf_metadata_offs
>        113: 00000000000e74e0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@@DPDK_26
>        116: 00000000000e74e0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@EXPERIMENTAL
>       1471: 00000000000e74e0     4 OBJECT  LOCAL  DEFAULT   24 rte_flow_dynf_metadata_offs_impl
>       2134: 00000000000e74e0     4 OBJECT  LOCAL  DEFAULT   24 rte_flow_dynf_metadata_offs_v26
>       2247: 00000000000e74e0     4 OBJECT  LOCAL  DEFAULT   24 rte_flow_dynf_metadata_offs_exp
>       2337: 00000000000e74e0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@@DPDK_26
>       2627: 00000000000e74e0     4 OBJECT  GLOBAL DEFAULT   24 rte_flow_dynf_metadata_offs@EXPERIMENTAL
> 
>     $ readelf -rW build-26.07/drivers/librte_net_mlx5.so | grep rte_flow_dynf_metadata_offs
>     00000000046dbef0  0000001600000006 R_X86_64_GLOB_DAT      0000000000000000 rte_flow_dynf_metadata_offs@DPDK_26 + 0
> 
>     $ readelf -rW build-25.11/app/dpdk-testpmd | grep rte_flow_dynf_metadata_offs
> --> 000000000029b540  000001d200000005 R_X86_64_COPY          000000000029b540 rte_flow_dynf_metadata_offs@EXPERIMENTAL + 0  
> 
> With copy relocations (testpmd linked through gcc) the following happens:
> 
> - When variable symbol (with EXPERIMENTAL version) gets resolved inside executable,
>   global variable gets copied from read-only data to executable's BSS section.
>   Executable will access this variable through BSS.
> - When variable symbol (with DPDK_26 version) gets resolved inside a library,
>   global variable is accessed indirectly through GOT.
>   It is stored inside BSS section of the shared library.
> 
> So executable and libraries refer to different storage,
> eventually leading to inconsistent runtime behavior.
> Problems only appears when executable and library require
> different versions of global variable symbol.
> If testpmd from 26.07 is used with libraries from 26.07,
> GOT entry for these variables will point to copied variable.
> 
> Without copy relocations (testpmd linked through clang) both
> executable and libraries access the global variable indirectly through GOT.
> Runtime behavior is consistent, regardless of the mix of variable symbol versions.
> 
> The only other solution I could find was to use dlsym() inside libraries
> to dynamically resolve the location rte_flow_dynf_metadata_offs and rte_flow_dynf_metadata_mask,
> but this solution sounds like an overkill.
> Essentially this would require moving to getter/setter functions for these variables
> inside the library.
> 
> I would appreciate any feedback or suggestions if anybody had encountered a similar issue before.
> 
> Dariusz Sosnowski (5):
>   eal: fix macro for versioned experimental symbol
>   drivers: support function versioning
>   net/mlx5: fix stabilized function versions
>   eal: support aliases for versioned variable symbols
>   ethdev: fix promoted flow metadata symbols
> 
>  buildtools/gen-version-map.py        | 11 ++++++++++
>  drivers/meson.build                  |  8 +++++++
>  drivers/net/mlx5/meson.build         |  2 ++
>  drivers/net/mlx5/mlx5_driver_event.c | 22 ++++++++++++++-----
>  drivers/net/mlx5/mlx5_flow.c         | 18 ++++++++++-----
>  lib/eal/common/eal_export.h          | 24 +++++++++++++++++++-
>  lib/ethdev/meson.build               |  2 ++
>  lib/ethdev/rte_flow.c                | 33 ++++++++++++++++++----------
>  8 files changed, 96 insertions(+), 24 deletions(-)
> 
> --
> 2.47.3
> 

The bugfix is good, but not sure the rest is needed right now.
It is getting late to add more stuff for 26.07 and in 26.11 function versioning
will not be needed.

^ permalink raw reply

* [PATCH v4] pcapng: add user-supplied timestamp support
From: Dawid Wesierski @ 2026-06-23 14:10 UTC (permalink / raw)
  To: dev; +Cc: dawid.wesierski, stephen, mb, Marek Kasiewicz
In-Reply-To: <20260618143819.310046-1-dawid.wesierski@intel.com>

From: "Wesierski, Dawid" <dawid.wesierski@intel.com>

Introduce rte_pcapng_copy_ts() alongside the existing rte_pcapng_copy()
so that callers with a hardware PTP or pre-captured timestamp can inject
an exact epoch-ns value directly into the packet record.

Timestamp handling in rte_pcapng_copy_ts():
 - ts != 0: caller-supplied nanoseconds since the Unix epoch, stored as-is.
 - ts == 0: TSC captured at copy time with bit 63 set as a sentinel.
   rte_pcapng_write_packets() detects the sentinel and converts the TSC to
   epoch ns using the file's calibrated clock.  The TSC will not reach
   bit 63 for centuries, and epoch-ns values stay below bit 63 until 2554,
   so the bit is safe to use as a disambiguation flag.

rte_pcapng_copy() is retained as a real exported function (not an inline
wrapper) so the stable ABI symbol is preserved.  It simply calls
rte_pcapng_copy_ts(..., 0) to capture the current TSC.

rte_pcapng_tsc_to_ns() is added as a new experimental helper (addressing
review requests from Stephen Hemminger and Morten Brørup).  It exposes the
same calibrated, drift-compensated, divide-free TSC-to-epoch-ns conversion
used internally by rte_pcapng_write_packets(), allowing callers to convert
a TSC captured at packet arrival time before passing it to
rte_pcapng_copy_ts().

Signed-off-by: Marek Kasiewicz <marek.kasiewicz@intel.com>
Signed-off-by: Dawid Wesierski <dawid.wesierski@intel.com>
---
Hi Stephen, Morten,
Thank you very much for your review and comments.

I have prepared a v4 patch.

ABI failure > I have restored rte_pcapng_copy() as a real exported function instead of a static inline wrapper.
This should fix the iol-abi-testing failure. It now simply calls rte_pcapng_copy_ts(..., 0) internally.

As suggested, I've added a new experimental function uint64_t rte_pcapng_tsc_to_ns(const rte_pcapng_t *self, uint64_t tsc);
I exposed the internal calibrated clock state maintained by the pcapng.

Regards,
Dawid Węsierski.

 .mailmap                |  2 ++
 lib/pcapng/rte_pcapng.c | 71 +++++++++++++++++++++++++++++++++--------
 lib/pcapng/rte_pcapng.h | 64 +++++++++++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+), 13 deletions(-)

diff --git a/.mailmap b/.mailmap
index 4001e5fb0e..a7d97a631e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -366,6 +366,7 @@ David Zeng <zengxhsh@cn.ibm.com>
 Davide Caratti <dcaratti@redhat.com>
 Dawid Gorecki <dgr@semihalf.com>
 Dawid Jurczak <dawid_jurek@vp.pl>
+Dawid Wesierski <dawid.wesierski@intel.com> Wesierski, Dawid <dawid.wesierski@intel.com>
 Dawid Zielinski <dawid.zielinski@intel.com>
 Dawid Łukwiński <dawid.lukwinski@intel.com>
 Daxue Gao <daxuex.gao@intel.com>
@@ -1014,6 +1015,7 @@ Marcin Wilk <marcin.wilk@caviumnetworks.com>
 Marcin Wojtas <mw@semihalf.com>
 Marcin Zapolski <marcinx.a.zapolski@intel.com>
 Marco Varlese <mvarlese@suse.de>
+Marek Kasiewicz <marek.kasiewicz@intel.com>
 Marek Mical <marekx.mical@intel.com>
 Marek Zalfresso-jundzillo <marekx.zalfresso-jundzillo@intel.com>
 Maria Lingemark <maria.lingemark@ericsson.com>
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index b5d1026891..f583fae995 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -546,14 +546,14 @@ pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
  */
 
 /* Make a copy of original mbuf with pcapng header and options */
-RTE_EXPORT_SYMBOL(rte_pcapng_copy)
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pcapng_copy_ts, 26.07)
 struct rte_mbuf *
-rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+rte_pcapng_copy_ts(uint16_t port_id, uint32_t queue,
 		const struct rte_mbuf *md,
 		struct rte_mempool *mp,
 		uint32_t length,
 		enum rte_pcapng_direction direction,
-		const char *comment)
+		const char *comment, uint64_t ts)
 {
 	struct pcapng_enhance_packet_block *epb;
 	uint32_t orig_len, pkt_len, padding, flags;
@@ -690,8 +690,20 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 	/* Interface index is filled in later during write */
 	mc->port = port_id;
 
-	/* Put timestamp in cycles here - adjust in packet write */
-	timestamp = rte_get_tsc_cycles();
+	/*
+	 * Timestamp handling:
+	 *  - If the caller supplied an explicit timestamp (ts != 0), it is
+	 *    already in nanoseconds since the Unix epoch, so store it as-is.
+	 *  - If the caller did not (ts == 0), store the current TSC and set
+	 *    the high bit as a sentinel so rte_pcapng_write_packets() knows
+	 *    it must convert TSC -> epoch ns at write time. The TSC counter
+	 *    will not reach bit 63 for centuries, and epoch-ns values stay
+	 *    below bit 63 until the year 2554, so the bit is safe to use.
+	 */
+	if (ts != 0)
+		timestamp = ts;
+	else
+		timestamp = rte_get_tsc_cycles() | (UINT64_C(1) << 63);
 	epb->timestamp_hi = timestamp >> 32;
 	epb->timestamp_lo = (uint32_t)timestamp;
 	epb->capture_length = pkt_len;
@@ -707,6 +719,35 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 	return NULL;
 }
 
+/*
+ * Compatibility wrapper: captures current TSC (converted at write time).
+ * Equivalent to rte_pcapng_copy_ts(..., 0).
+ */
+RTE_EXPORT_SYMBOL(rte_pcapng_copy)
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+		const struct rte_mbuf *md,
+		struct rte_mempool *mp,
+		uint32_t length,
+		enum rte_pcapng_direction direction,
+		const char *comment)
+{
+	return rte_pcapng_copy_ts(port_id, queue, md, mp, length, direction,
+				  comment, 0);
+}
+
+/*
+ * Convert a TSC value to nanoseconds since the Unix epoch using the
+ * calibrated clock of the capture file. Uses the same pre-computed
+ * reciprocal multiplier as the internal write path (no integer division).
+ */
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pcapng_tsc_to_ns, 26.07)
+uint64_t
+rte_pcapng_tsc_to_ns(const rte_pcapng_t *self, uint64_t tsc)
+{
+	return tsc_to_ns_epoch(&self->clock, tsc);
+}
+
 /* Write pre-formatted packets to file. */
 RTE_EXPORT_SYMBOL(rte_pcapng_write_packets)
 ssize_t
@@ -720,7 +761,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
 	for (i = 0; i < nb_pkts; i++) {
 		struct rte_mbuf *m = pkts[i];
 		struct pcapng_enhance_packet_block *epb;
-		uint64_t cycles, timestamp;
+		uint64_t timestamp;
 
 		/* sanity check that is really a pcapng mbuf */
 		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
@@ -738,14 +779,18 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
 		}
 
 		/*
-		 * When data is captured by pcapng_copy the current TSC is stored.
-		 * Adjust the value recorded in file to PCAP epoch units.
+		 * If rte_pcapng_copy[_ts]() stored a TSC value (high bit set
+		 * as sentinel), convert it to nanoseconds since the Unix epoch
+		 * using the per-file clock. Otherwise the timestamp is already
+		 * in epoch ns and is written unchanged.
 		 */
-		cycles = (uint64_t)epb->timestamp_hi << 32;
-		cycles += epb->timestamp_lo;
-		timestamp = tsc_to_ns_epoch(&self->clock, cycles);
-		epb->timestamp_hi = timestamp >> 32;
-		epb->timestamp_lo = (uint32_t)timestamp;
+		timestamp = ((uint64_t)epb->timestamp_hi << 32) | epb->timestamp_lo;
+		if (timestamp & (UINT64_C(1) << 63)) {
+			timestamp &= ~(UINT64_C(1) << 63);
+			timestamp = tsc_to_ns_epoch(&self->clock, timestamp);
+			epb->timestamp_hi = timestamp >> 32;
+			epb->timestamp_lo = (uint32_t)timestamp;
+		}
 
 		/*
 		 * Handle case of highly fragmented and large burst size
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
index d8d328f710..6eeaeada05 100644
--- a/lib/pcapng/rte_pcapng.h
+++ b/lib/pcapng/rte_pcapng.h
@@ -108,9 +108,50 @@ enum rte_pcapng_direction {
 	RTE_PCAPNG_DIRECTION_OUT = 2,
 };
 
+/**
+ * Format an mbuf with a caller-supplied timestamp for writing to file.
+ *
+ * @param port_id
+ *   The Ethernet port on which packet was received
+ *   or is going to be transmitted.
+ * @param queue
+ *   The queue on the Ethernet port where packet was received
+ *   or is going to be transmitted.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @param m
+ *   The mbuf to copy
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @param direction
+ *   The direction of the packer: receive, transmit or unknown.
+ * @param comment
+ *   Optional per packet comment.
+ *   Truncated to UINT16_MAX characters.
+ * @param ts
+ *   Packet timestamp in nanoseconds since the Unix epoch. If zero, the
+ *   current TSC is captured and converted to epoch ns by
+ *   rte_pcapng_write_packets() when the packet is written.
+ *
+ * @return
+ *   - The pointer to the new mbuf formatted for pcapng_write
+ *   - NULL on error such as invalid port or out of memory.
+ */
+__rte_experimental
+struct rte_mbuf *
+rte_pcapng_copy_ts(uint16_t port_id, uint32_t queue,
+		const struct rte_mbuf *m, struct rte_mempool *mp,
+		uint32_t length,
+		enum rte_pcapng_direction direction, const char *comment,
+		uint64_t ts);
+
 /**
  * Format an mbuf for writing to file.
  *
+ * Equivalent to rte_pcapng_copy_ts() with ts=0: the current TSC is
+ * captured at copy time and converted to epoch ns at write time.
+ *
  * @param port_id
  *   The Ethernet port on which packet was received
  *   or is going to be transmitted.
@@ -153,6 +194,29 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 uint32_t
 rte_pcapng_mbuf_size(uint32_t length);
 
+/**
+ * Convert a TSC value to nanoseconds since the Unix epoch.
+ *
+ * Uses the same calibrated clock reference as the capture file so that
+ * the result is consistent with timestamps written by
+ * rte_pcapng_write_packets(). The conversion is drift-compensated and
+ * uses a pre-computed reciprocal multiplier (no integer division).
+ *
+ * Typical use: convert a TSC timestamp captured close to packet arrival
+ * (e.g., from a PMD or hardware register) to an epoch-ns value before
+ * passing it to rte_pcapng_copy_ts().
+ *
+ * @param self
+ *   The handle to the packet capture file.
+ * @param tsc
+ *   TSC value to convert.
+ * @return
+ *   Nanoseconds since the Unix epoch corresponding to @p tsc.
+ */
+__rte_experimental
+uint64_t
+rte_pcapng_tsc_to_ns(const rte_pcapng_t *self, uint64_t tsc);
+
 /**
  * Write packets to the capture file.
  *
-- 
2.47.3

---------------------------------------------------------------------
Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.
Spolka oswiadcza, ze posiada status duzego przedsiebiorcy w rozumieniu ustawy z dnia 8 marca 2013 r. o przeciwdzialaniu nadmiernym opoznieniom w transakcjach handlowych.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by others is strictly prohibited.

^ permalink raw reply related

* Re: [PATCH] common/cnxk: fix inline dev null dereference
From: Jerin Jacob @ 2026-06-23 13:35 UTC (permalink / raw)
  To: Aarnav JP
  Cc: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Rakesh Kudurumalla, jerinj, rbhansali,
	stable
In-Reply-To: <20260623085433.3190541-1-ajp@marvell.com>

On Tue, Jun 23, 2026 at 2:31 PM Aarnav JP <ajp@marvell.com> wrote:
>
> inl_dev is initialized to NULL and only assigned within the
> if (idev && idev->nix_inl_dev) block.
> Move inl_dev->res_addr_offset and inl_dev->cpt_cq_ena
> accesses inside this null-guarded block in
> nix_inl_inb_ipsec_sa_tbl_setup() and nix_inl_reass_inb_sa_tbl_setup()
> to avoid dereferencing a null pointer.
>
> Fixes: 3fdf3e53f3c4 ("common/cnxk: enable CPT CQ for inline IPsec inbound")
> Cc: stable@dpdk.org
>
> Signed-off-by: Aarnav JP <ajp@marvell.com>


Applied to dpdk-next-net-mrvl/for-main. Thanks

^ permalink raw reply

* Re: [PATCH] net/mlx5: fix double free in vectorized Rx recovery
From: Dariusz Sosnowski @ 2026-06-23 12:50 UTC (permalink / raw)
  To: Borys Tsyrulnikov
  Cc: Thomas Monjalon, Viacheslav Ovsiienko, Bing Zhao, Ori Kam,
	Suanming Mou, Matan Azrad, Alexander Kozyrev, dev, stable
In-Reply-To: <20260617134301.798213-1-tsyrulnikov.borys@gmail.com>

On Wed, Jun 17, 2026 at 04:43:01PM +0300, Borys Tsyrulnikov wrote:
> During Rx queue error recovery, the vectorized path in
> mlx5_rx_err_handle() reallocates an mbuf for every queue element. When
> rte_mbuf_raw_alloc() fails (for example, the mempool is exhausted), the
> rollback loop frees the mbufs allocated so far, but masks the element
> ring index with "& elts_n" instead of "& (elts_n - 1)".
> 
> elts_n is a power-of-two element count, so "x & elts_n" isolates a
> single bit and can only evaluate to 0 or elts_n, regardless of the loop
> counter. The rollback therefore never frees the mbufs just allocated in
> this pass (they are leaked); instead it repeatedly frees elts[0], a live
> mbuf still posted to the NIC (use-after-free / double free), and
> elts[elts_n], the fake_mbuf padding entry used by the vector datapath.
> 
> Mask with the existing e_mask (elts_n - 1), as already done in the
> matching forward allocation loop just above.
> 
> Fixes: 0f20acbf5eda ("net/mlx5: implement vectorized MPRQ burst")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Borys Tsyrulnikov <tsyrulnikov.borys@gmail.com>

Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>

^ permalink raw reply

* Re: [PATCH v3 05/25] bpf/validate: introduce debugging interface
From: Thomas Monjalon @ 2026-06-23 12:29 UTC (permalink / raw)
  To: Marat Khalili; +Cc: Konstantin Ananyev, dev@dpdk.org
In-Reply-To: <84ce7f7669404239864c61819267d9b6@huawei.com>

23/06/2026 12:29, Marat Khalili:
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Tuesday 23 June 2026 11:19
> > To: Marat Khalili <marat.khalili@huawei.com>
> > Cc: Konstantin Ananyev <konstantin.ananyev@huawei.com>; dev@dpdk.org
> > Subject: Re: [PATCH v3 05/25] bpf/validate: introduce debugging interface
> > 
> > 12/06/2026 12:47, Marat Khalili:
> > > +#ifndef LIST_FOREACH_SAFE
> > > +/* We need this macro which neither Linux nor EAL for Linux include yet. */
> > > +#define        LIST_FOREACH_SAFE(var, head, field, tvar)                       \
> > > +       for ((var) = LIST_FIRST((head));                                \
> > > +           (var) && ((tvar) = LIST_NEXT((var), field), 1);             \
> > > +           (var) = (tvar))
> > > +#else
> > > +#ifdef RTE_EXEC_ENV_LINUX
> > > +#error "Don't need LIST_FOREACH_SAFE in this version of DPDK anymore, remove it."
> > > +#endif
> > > +#endif
> > 
> > It fails on Alpine Linux.
> > Why adding this #error?
> > 
> 
> This is interesting. My mental model was that Linux is never going to have
> LIST_FOREACH_SAFE, but DPDK will eventually gain its own polyfill. I was
> actually expecting it to happen before my patch is published, so this was a
> reminder to remove my own definition since it clearly belongs to some common
> library. Turns out I was wrong on both accounts: there are Linuxes that define
> LIST_FOREACH_SAFE, and I managed to submit faster. Apart from these
> organizational issues the whole else branch can be safely removed. Do you want
> me to submit an updated version?

Yes would be nice so we will have a full CI run on it
now that the dependency is merged in main.



^ permalink raw reply

* RE: [PATCH v5] graph: add optional profiling stats
From: Morten Brørup @ 2026-06-23 12:04 UTC (permalink / raw)
  To: saeed bishara
  Cc: Jerin Jacob, dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram,
	Zhirun Yan
In-Reply-To: <CAHfVqdWKoDqb0uD_HrF8e=GqadThPhZj0vZnRYDW=KMPei0mXQ@mail.gmail.com>

> From: saeed bishara [mailto:saeed.bishara.os@gmail.com]
> Sent: Tuesday, 23 June 2026 10.34
> 
> > > > > +               /** Fast path area cache line 3. */
> > > > > +#ifdef RTE_GRAPH_PROFILE
> > > > > +               struct {
> > > > > +                       uint64_t calls;     /**< Calls
> processing
> > > > resp. 0 or 1 objects. */
> > > > > +                       uint64_t cycles;    /**< Cycles spent
> > > > processing resp. 0 or 1 objects. */
> > > > > +               } usage_stats[2];       /**< Usage when this
> node
> > > > processed 0 or 1 objects. */
> > > > > +               uint64_t full_burst_calls;  /**< Calls
> processing a
> > > > full burst of objects. */
> > > > > +               uint64_t full_burst_cycles; /**< Cycles spent
> > > > processing a full burst of objects. */
> > > > > +               uint64_t half_burst_calls;  /**< Calls
> processing a
> > > > half burst of objects. */
> > > > > +               uint64_t half_burst_cycles; /**< Cycles spent
> > > > processing a half burst of objects. */
> > > > > +               /** Fast path area cache line 4. */
> > > > > +#endif
> > > >
> > > > Is it an ABI breakage?
> Can you consider one array for all cases?

Ack.

> also, instead of adding cacheline for this profiling data, can we
> share with line 1 that used solely for xstats?

This profiling data is 4 indexes * 2 values * 8-byte fields, so one cache line in itself.


^ permalink raw reply

* Re: [PATCH v1 0/5] prefix lcore role enum values
From: lihuisong (C) @ 2026-06-23 11:52 UTC (permalink / raw)
  To: David Marchand
  Cc: Stephen Hemminger, Morten Brørup, thomas, andrew.rybchenko,
	dev, zhanjie9
In-Reply-To: <CAJFAV8yNsZ_SLcG-ukzmDTQXRXDsGVtf-9szwSc6T2GM+fhE_Q@mail.gmail.com>


On 6/22/2026 4:18 PM, David Marchand wrote:
> Hello all,
>
> On Mon, 22 Jun 2026 at 03:23, lihuisong (C) <lihuisong@huawei.com> wrote:
>> On 6/19/2026 10:03 AM, Stephen Hemminger wrote:
>>> On Wed, 17 Jun 2026 13:48:37 +0200
>>> Morten Brørup <mb@smartsharesystems.com> wrote:
>>>
>>>>> From: Huisong Li [mailto:lihuisong@huawei.com]
>>>>> Sent: Wednesday, 17 June 2026 12.28
>>>>>
>>>>> Add the RTE_LCORE_ prefix to the lcore role enum values in
>>>>> rte_lcore_role_t
>>>>> to follow DPDK naming conventions.
>>>>>
>>>>> - ROLE_RTE      -> RTE_LCORE_ROLE_RTE
>>>>> - ROLE_OFF      -> RTE_LCORE_ROLE_OFF
>>>>> - ROLE_SERVICE  -> RTE_LCORE_ROLE_SERVICE
>>>>> - ROLE_NON_EAL  -> RTE_LCORE_ROLE_NON_EAL
>>>>>
>>>>> Old names are kept as macros aliasing to the new names to preserve
>>>>> backward compatibility.
>>>>>
>>>> Series-Acked-by: Morten Brørup <mb@smartsharesystems.com>
>>>>
>>> The problem with this patch it causes build failures now with abi diff.
>>>
>>> Example build log...
>>>
>>>
>>> 2 functions with some indirect sub-type change:
>>>
>>>
>>>
>>>
>>>
>>>    [C] 'function rte_lcore_role_t rte_eal_lcore_role(unsigned int)' at eal_common_lcore.c:74:1 has some indirect sub-type changes:
>>>
>>>    return type changed:
>>>
>>>    type size hasn't changed
>>>
>>>    4 enumerator deletions:
>>>
>>>    'rte_lcore_role_t::ROLE_RTE' value '0'
>>>
>>>    'rte_lcore_role_t::ROLE_OFF' value '1'
>>>
>>>    'rte_lcore_role_t::ROLE_SERVICE' value '2'
>>>
>>>    'rte_lcore_role_t::ROLE_NON_EAL' value '3'
>>>
>>>    4 enumerator insertions:
>>>
>>>    'rte_lcore_role_t::RTE_LCORE_ROLE_RTE' value '0'
>>>
>>>    'rte_lcore_role_t::RTE_LCORE_ROLE_OFF' value '1'
>>>
>>>    'rte_lcore_role_t::RTE_LCORE_ROLE_SERVICE' value '2'
>>>
>>>    'rte_lcore_role_t::RTE_LCORE_ROLE_NON_EAL' value '3'
>>>
>>>
>>>
>>>
>>>
>>>    [C] 'function int rte_lcore_has_role(unsigned int, rte_lcore_role_t)' at eal_common_lcore.c:85:1 has some indirect sub-type changes:
>>>
>>>    parameter 2 of type 'enum rte_lcore_role_t' has sub-type changes:
>>>
>>>    enum type 'enum rte_lcore_role_t' changed at rte_lcore.h:33:1, as reported earlier
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> Error: ABI issue reported for abidiff --suppr /home/runner/work/dpdk/dpdk/devtools/libabigail.abignore --no-added-syms --headers-dir1 reference/usr/local/include --headers-dir2 install/usr/local/include reference/usr/local/lib/librte_eal.so.26.1 install/usr/local/lib/librte_eal.so.26.2
>> We just came back from the Dragon Boat Festival.
>> I also received this ABI change warning. But I didn't have any good
>> ideas yet.
>> Thanks for helping to handle this.
>> Sorry for the inconvenience.
> There is nothing broken from a ABI pov.
> This is a limitation in earlier versions of libabigail.
> I can't reproduce with libabigail 2.9 (update in progress as I see
> 2.10 is available now).
>
> I think it was solved in libabigail 2.8
> (https://sourceware.org/git/?p=libabigail.git;a=commit;h=6f5f91564bdd).
This seems to solve the problem.
>
> If we want to go with the enum renaming before 26.11, bumping
> libabigail to 2.10 in the CI is an option (latest upstream version,
> and this is the version in f43 and f44).
> I tried it in GHA:
> https://github.com/david-marchand/dpdk/actions/runs/27937595115/job/82662953500
I also tested it based on libabigail 2.9.0 version. No any warning.
-->
abidiff build-ref/lib/librte_eal.so build-new/lib/librte_eal.so
Functions changes summary: 0 Removed, 0 Changed (2 filtered out), 0 
Added functions
Variables changes summary: 0 Removed, 0 Changed, 0 Added variable

>
>

^ permalink raw reply

* [PATCH 5/5] ethdev: fix promoted flow metadata symbols
From: Dariusz Sosnowski @ 2026-06-23 11:37 UTC (permalink / raw)
  To: Thomas Monjalon, Andrew Rybchenko, Ori Kam
  Cc: dev, David Marchand, Bruce Richardson, Yu Jiang
In-Reply-To: <20260623113752.1100072-1-dsosnowski@nvidia.com>

Offending patch stabilized the following symbols:

- 1 function symbol:
    - rte_flow_dynf_metadata_register
- 2 global variable symbols:
    - rte_flow_dynf_metadata_offs
    - rte_flow_dynf_metadata_mask

Any application using these flow metadata symbols,
which was linked dynamically against 25.11 version of ethdev
library and using current version of ethdev library
would fail on symbol resolution, because EXPERIMENTAL versions
were not exported.
Specifically, on application start up
variable symbol lookup error happens:

/tmp/dpdk-25.11/usr/local/bin/dpdk-testpmd:
  symbol lookup error: /tmp/dpdk-25.11/usr/local/bin/dpdk-testpmd:
    undefined symbol: rte_flow_dynf_metadata_offs, version EXPERIMENTAL

This error occurss because symbol lookup for global variables
happens on application startup.

This patch addresses that by adding versioned aliases
for the following variable symbols:

- rte_flow_dynf_metadata_offs
- rte_flow_dynf_metadata_mask

Versioned function symbols are also added
for rte_flow_dynf_metadata_register().

Bugzilla ID: 1957
Fixes: 4ee2f5c1cedf ("ethdev: promote flow metadata API to stable")

Reported-by: Yu Jiang <yux.jiang@intel.com>
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
 lib/ethdev/meson.build |  2 ++
 lib/ethdev/rte_flow.c  | 33 ++++++++++++++++++++++-----------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index 8ba6c708a2..63fd866af9 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+use_function_versioning = true
+
 sources = files(
         'ethdev_driver.c',
         'ethdev_private.c',
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index ec0fe08355..a8c01ffe8a 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -23,12 +23,20 @@
 #define FLOW_LOG RTE_ETHDEV_LOG_LINE
 
 /* Mbuf dynamic field name for metadata. */
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_offs)
-int32_t rte_flow_dynf_metadata_offs = -1;
+static int32_t rte_flow_dynf_metadata_offs_impl = -1;
+
+RTE_DEFAULT_SYMBOL_ALIAS(26, int32_t, rte_flow_dynf_metadata_offs,
+			 rte_flow_dynf_metadata_offs_impl);
+RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS(int32_t, rte_flow_dynf_metadata_offs,
+				      rte_flow_dynf_metadata_offs_impl);
 
 /* Mbuf dynamic field flag bit number for metadata. */
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_mask)
-uint64_t rte_flow_dynf_metadata_mask;
+static uint64_t rte_flow_dynf_metadata_mask_impl = 0;
+
+RTE_DEFAULT_SYMBOL_ALIAS(26, uint64_t, rte_flow_dynf_metadata_mask,
+			 rte_flow_dynf_metadata_mask_impl);
+RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS(uint64_t, rte_flow_dynf_metadata_mask,
+				      rte_flow_dynf_metadata_mask_impl);
 
 /**
  * Flow elements description tables.
@@ -281,9 +289,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 	MK_FLOW_ACTION(JUMP_TO_TABLE_INDEX, sizeof(struct rte_flow_action_jump_to_table_index)),
 };
 
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_register)
-int
-rte_flow_dynf_metadata_register(void)
+RTE_DEFAULT_SYMBOL(26, int, rte_flow_dynf_metadata_register, (void))
 {
 	int offset;
 	int flag;
@@ -303,19 +309,24 @@ rte_flow_dynf_metadata_register(void)
 	flag = rte_mbuf_dynflag_register(&desc_flag);
 	if (flag < 0)
 		goto error;
-	rte_flow_dynf_metadata_offs = offset;
-	rte_flow_dynf_metadata_mask = RTE_BIT64(flag);
+	rte_flow_dynf_metadata_offs_impl = offset;
+	rte_flow_dynf_metadata_mask_impl = RTE_BIT64(flag);
 
 	rte_flow_trace_dynf_metadata_register(offset, RTE_BIT64(flag));
 
 	return 0;
 
 error:
-	rte_flow_dynf_metadata_offs = -1;
-	rte_flow_dynf_metadata_mask = UINT64_C(0);
+	rte_flow_dynf_metadata_offs_impl = -1;
+	rte_flow_dynf_metadata_mask_impl = UINT64_C(0);
 	return -rte_errno;
 }
 
+RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_flow_dynf_metadata_register, (void))
+{
+	return rte_flow_dynf_metadata_register();
+}
+
 static inline void
 fts_enter(struct rte_eth_dev *dev)
 {
-- 
2.47.3


^ permalink raw reply related

* [PATCH 4/5] eal: support aliases for versioned variable symbols
From: Dariusz Sosnowski @ 2026-06-23 11:37 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, David Marchand
In-Reply-To: <20260623113752.1100072-1-dsosnowski@nvidia.com>

Existing symbol versioning macros are not suitable for versioning
exported global variables.

Specifically, if existing macros are used for versioning
global variable symbol promoted from experimental to stable,
result would be multiple variables with separate storage defined.
If an application was linked against older DPDK and had copy
relocations, this would yield an inconsistent behavior:

- Application would use experimental symbol version,
  with storage set up in BSS section in application.
- Library would use latest symbol version,
  with storage set up in BSS section of shared object.

This patch adds versioning macros which utilize symbol aliasing.
Specifically, a new variable (with version suffix) is defined
as an alias to private (static) variable inside the library.
Variable symbol versions are attached to these alias variables.

Following macros are added:

- RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS
- RTE_DEFAULT_SYMBOL_ALIAS

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
 buildtools/gen-version-map.py | 11 +++++++++++
 lib/eal/common/eal_export.h   | 22 ++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/buildtools/gen-version-map.py b/buildtools/gen-version-map.py
index 57e08a8c0f..aa88e69179 100755
--- a/buildtools/gen-version-map.py
+++ b/buildtools/gen-version-map.py
@@ -14,8 +14,12 @@
 export_int_sym_regexp = re.compile(r"^RTE_EXPORT_INTERNAL_SYMBOL\(([^)]+)\)")
 export_sym_regexp = re.compile(r"^RTE_EXPORT_SYMBOL\(([^)]+)\)")
 ver_sym_regexp = re.compile(r"^RTE_VERSION_SYMBOL\(([^,]+), [^,]+, ([^,]+),")
+
 ver_exp_sym_regexp = re.compile(r"^RTE_VERSION_EXPERIMENTAL_SYMBOL\([^,]+, ([^,]+),")
+ver_exp_sym_alias_regexp = re.compile(r"^RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS\([^,]+, ([^,]+),")
+
 default_sym_regexp = re.compile(r"^RTE_DEFAULT_SYMBOL\(([^,]+), [^,]+, ([^,]+),")
+default_sym_alias_regexp = re.compile(r"^RTE_DEFAULT_SYMBOL_ALIAS\(([^,]+), [^,]+, ([^,]+),")
 
 parser = argparse.ArgumentParser(
     description=__doc__,
@@ -73,10 +77,17 @@
         elif ver_exp_sym_regexp.match(ln):
             node = "EXPERIMENTAL"
             symbol = ver_exp_sym_regexp.match(ln).group(1)
+        elif ver_exp_sym_alias_regexp.match(ln):
+            node = "EXPERIMENTAL"
+            symbol = ver_exp_sym_alias_regexp.match(ln).group(1)
         elif default_sym_regexp.match(ln):
             abi = default_sym_regexp.match(ln).group(1)
             node = f"DPDK_{abi}"
             symbol = default_sym_regexp.match(ln).group(2)
+        elif default_sym_alias_regexp.match(ln):
+            abi = default_sym_alias_regexp.match(ln).group(1)
+            node = f"DPDK_{abi}"
+            symbol = default_sym_alias_regexp.match(ln).group(2)
 
         if not symbol:
             continue
diff --git a/lib/eal/common/eal_export.h b/lib/eal/common/eal_export.h
index 7971bf8d7a..5b458f81c6 100644
--- a/lib/eal/common/eal_export.h
+++ b/lib/eal/common/eal_export.h
@@ -63,6 +63,14 @@ __attribute__((__symver__(RTE_STR(name) "@@DPDK_" RTE_STR(ver)))) \
 type name ## _v ## ver args; \
 type name ## _v ## ver args
 
+#define RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS(type, name, orig) VERSIONING_WARN \
+extern type name ## _exp __attribute((alias(RTE_STR(orig)), \
+				      __symver__(RTE_STR(name) "@EXPERIMENTAL")))
+
+#define RTE_DEFAULT_SYMBOL_ALIAS(ver, type, name, orig) VERSIONING_WARN \
+extern type name ## _v ## ver __attribute((alias(RTE_STR(orig)), \
+					   __symver__(RTE_STR(name) "@@DPDK_" RTE_STR(ver))))
+
 #else /* !__has_attribute(symver) */
 
 /* Use asm tag to create symbol table entry */
@@ -81,6 +89,14 @@ __asm__(".symver " RTE_STR(name) "_v" RTE_STR(ver) ", " RTE_STR(name) "@@DPDK_"
 __rte_used type name ## _v ## ver args; \
 type name ## _v ## ver args
 
+#define RTE_DEFAULT_SYMBOL_ALIAS(ver, type, name, orig) VERSIONING_WARN \
+extern type name ## _v ## ver __attribute__((alias(RTE_STR(orig)))); \
+__asm__(".symver " RTE_STR(name) "_v" RTE_STR(ver) ", " RTE_STR(name) "@@DPDK_" RTE_STR(ver));
+
+#define RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS(type, name, orig) VERSIONING_WARN \
+extern type name ## _exp __attribute__((alias(RTE_STR(orig)))); \
+__asm__(".symver " RTE_STR(name) "_exp, " RTE_STR(name) "@EXPERIMENTAL");
+
 #endif /* __has_attribute(symver) */
 
 #else /* !RTE_BUILD_SHARED_LIB */
@@ -97,6 +113,12 @@ type name ## _exp args
 #define RTE_DEFAULT_SYMBOL(ver, type, name, args) VERSIONING_WARN \
 type name args
 
+#define RTE_VERSION_EXPERIMENTAL_SYMBOL_ALIAS(type, name, orig) VERSIONING_WARN \
+extern type name ## _exp __attribute__((alias(RTE_STR(orig))));
+
+#define RTE_DEFAULT_SYMBOL_ALIAS(ver, type, name, orig) VERSIONING_WARN \
+extern type name __attribute__((alias(RTE_STR(orig))));
+
 #endif /* RTE_BUILD_SHARED_LIB */
 
 #endif /* EAL_EXPORT_H */
-- 
2.47.3


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox