DPDK-dev Archive on lore.kernel.org

DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* RE: [PATCH v5 4/9] bpf/arm64: mask shift count per RFC 9669
From: Marat Khalili @ 2026-06-25 15:40 UTC (permalink / raw)
  To: Stephen Hemminger, dev@dpdk.org
  Cc: stable@dpdk.org, Wathsala Vithanage, Konstantin Ananyev,
	Jerin Jacob
In-Reply-To: <20260624175815.673064-5-stephen@networkplumber.org>

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday 24 June 2026 18:55
> To: dev@dpdk.org
> Cc: Stephen Hemminger <stephen@networkplumber.org>; stable@dpdk.org; Wathsala Vithanage
> <wathsala.vithanage@arm.com>; Konstantin Ananyev <konstantin.ananyev@huawei.com>; Marat Khalili
> <marat.khalili@huawei.com>; Jerin Jacob <jerinj@marvell.com>
> Subject: [PATCH v5 4/9] bpf/arm64: mask shift count per RFC 9669
> 
> The ARM JIT was not masking the shift count as required by RFC 9669
> (0x3f for 64-bit, 0x1f for 32-bit), so large immediate shift counts
> overflowed the UBFM/SBFM encoding and failed the JIT. Mask the
> immediate in emit_lsl/emit_lsr/emit_asr.
> 
> Fixes: 9f4469d9e83a ("bpf/arm: add logical operations")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Acked-by: Marat Khalili <marat.khalili@huawei.com>

> ---
>  lib/bpf/bpf_jit_arm64.c | 12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/bpf/bpf_jit_arm64.c b/lib/bpf/bpf_jit_arm64.c
> index ba7ae4d680..7582370062 100644
> --- a/lib/bpf/bpf_jit_arm64.c
> +++ b/lib/bpf/bpf_jit_arm64.c
> @@ -545,12 +545,14 @@ emit_bitfield(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
>  	emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) ||
>  		  check_immr_imms(is64, immr, imms));
>  }
> +
>  static void
>  emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
>  {
>  	const unsigned int width = is64 ? 64 : 32;
>  	uint8_t imms, immr;
> 
> +	imm &= width - 1;
>  	immr = (width - imm) & (width - 1);
>  	imms = width - 1 - imm;
> 
> @@ -560,13 +562,19 @@ emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
>  static void
>  emit_lsr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
>  {
> -	emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_UBFM);
> +	const unsigned int width = is64 ? 64 : 32;
> +
> +	imm &= width - 1;
> +	emit_bitfield(ctx, is64, rd, rd, imm, width - 1, A64_UBFM);
>  }
> 
>  static void
>  emit_asr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
>  {
> -	emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_SBFM);
> +	const unsigned int width = is64 ? 64 : 32;
> +
> +	imm &= width - 1;
> +	emit_bitfield(ctx, is64, rd, rd, imm, width - 1, A64_SBFM);
>  }
> 
>  #define A64_AND 0
> --
> 2.53.0


^ permalink raw reply

* RE: [PATCH v5 5/9] test/bpf: add test for large shift
From: Marat Khalili @ 2026-06-25 15:38 UTC (permalink / raw)
  To: Stephen Hemminger, dev@dpdk.org; +Cc: Konstantin Ananyev
In-Reply-To: <20260624175815.673064-6-stephen@networkplumber.org>

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday 24 June 2026 18:55
> To: dev@dpdk.org
> Cc: Stephen Hemminger <stephen@networkplumber.org>; Konstantin Ananyev <konstantin.ananyev@huawei.com>;
> Marat Khalili <marat.khalili@huawei.com>
> Subject: [PATCH v5 5/9] test/bpf: add test for large shift
> 
> There were multiple bugs with immediate values in shift instructions.
> The code was not masking as required by RFC.
> 
> Add new tests that cover these instructions.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Acked-by: Marat Khalili <marat.khalili@huawei.com>

> ---
>  app/test/test_bpf.c | 59 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 59 insertions(+)
> 
> diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
> index 232e9e2a98..0e5894a532 100644
> --- a/app/test/test_bpf.c
> +++ b/app/test/test_bpf.c
> @@ -2005,6 +2005,51 @@ test_div1_check(uint64_t rc, const void *arg)
>  	return cmp_res(__func__, 0, rc, dve.out, dvt->out, sizeof(dve.out));
>  }
> 
> +/*
> + * Shift counts are masked to the operand width (RFC 9669: 0x3f for 64-bit,
> + * 0x1f for 32-bit). Counts >= 128 also exercise the x86 imm_size() path that
> + * used to desync the stream, and the arm64 UBFM/SBFM immediate encoding.
> + */
> +static const struct ebpf_insn test_shift_big_imm_prog[] = {
> +	{
> +		.code = (EBPF_ALU64 | EBPF_MOV | BPF_K),
> +		.dst_reg = EBPF_REG_0,
> +		.imm = 1
> +	},
> +	{
> +		.code = (EBPF_ALU64 | BPF_LSH | BPF_K),
> +		.dst_reg = EBPF_REG_0,
> +		.imm = 191
> +	},
> +	{
> +		.code = (EBPF_ALU64 | EBPF_ARSH | BPF_K),
> +		.dst_reg = EBPF_REG_0,
> +		.imm = 200
> +	},
> +	{
> +		.code = (EBPF_ALU64 | BPF_RSH | BPF_K),
> +		.dst_reg = EBPF_REG_0,
> +		.imm = 130
> +	},
> +	{
> +		.code = (BPF_JMP | EBPF_EXIT)
> +	},
> +};
> +
> +static void
> +test_shift_big_imm_prepare(void *arg)
> +{
> +	memset(arg, 0, sizeof(struct dummy_offset));
> +}
> +
> +static int
> +test_shift_big_imm_check(uint64_t rc, const void *arg)
> +{
> +	uint64_t expect = 0x3FE0000000000000ULL;
> +
> +	return cmp_res(__func__, expect, rc, arg, arg, 0);
> +}
> +
>  /* call test-cases */
>  static const struct ebpf_insn test_call1_prog[] = {
> 
> @@ -3409,6 +3454,20 @@ static const struct bpf_test tests[] = {
>  		.prepare = test_mul1_prepare,
>  		.check_result = test_div1_check,
>  	},
> +	{
> +		.name = "test_shift_big_imm",
> +		.arg_sz = sizeof(struct dummy_offset),
> +		.prm = {
> +			.ins = test_shift_big_imm_prog,
> +			.nb_ins = RTE_DIM(test_shift_big_imm_prog),
> +			.prog_arg = {
> +				.type = RTE_BPF_ARG_PTR,
> +				.size = sizeof(struct dummy_offset),
> +			},
> +		},
> +		.prepare = test_shift_big_imm_prepare,
> +		.check_result = test_shift_big_imm_check,
> +	},
>  	{
>  		.name = "test_call1",
>  		.arg_sz = sizeof(struct dummy_offset),
> --
> 2.53.0


^ permalink raw reply

* RE: [PATCH v5 3/9] bpf: mask shift count in interpreter per RFC 9669
From: Marat Khalili @ 2026-06-25 15:35 UTC (permalink / raw)
  To: Stephen Hemminger, dev@dpdk.org
  Cc: stable@dpdk.org, Konstantin Ananyev, Ferruh Yigit
In-Reply-To: <20260624175815.673064-4-stephen@networkplumber.org>

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday 24 June 2026 18:55
> To: dev@dpdk.org
> Cc: Stephen Hemminger <stephen@networkplumber.org>; stable@dpdk.org; Konstantin Ananyev
> <konstantin.ananyev@huawei.com>; Marat Khalili <marat.khalili@huawei.com>; Ferruh Yigit
> <ferruh.yigit@amd.com>
> Subject: [PATCH v5 3/9] bpf: mask shift count in interpreter per RFC 9669
> 
> The interpreter shifted by the raw immediate or register value, which
> is undefined behavior in C when the count is >= the operand width and
> trips UBSan. RFC 9669 masks shift counts (0x3f for 64-bit, 0x1f for
> 32-bit); mask the count in the LSH/RSH/ARSH cases.
> 
> Fixes: 94972f35a02e ("bpf: add BPF loading and execution framework")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Acked-by: Marat Khalili <marat.khalili@huawei.com>

> ---
>  lib/bpf/bpf_exec.c | 31 +++++++++++++++++++++----------
>  1 file changed, 21 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/bpf/bpf_exec.c b/lib/bpf/bpf_exec.c
> index d423ef28f5..bb03c9cc2c 100644
> --- a/lib/bpf/bpf_exec.c
> +++ b/lib/bpf/bpf_exec.c
> @@ -4,6 +4,7 @@
> 
>  #include <stdio.h>
>  #include <stdint.h>
> +#include <limits.h>
> 
>  #include <eal_export.h>
>  #include <rte_common.h>
> @@ -43,6 +44,16 @@
>  	((reg)[(ins)->dst_reg] = \
>  		(type)(reg)[(ins)->dst_reg] op (type)(ins)->imm)
> 
> +#define BPF_OP_SHIFT_IMM(reg, ins, op, type)	\
> +	((reg)[(ins)->dst_reg] =		\
> +		(type)(reg)[(ins)->dst_reg] op	\
> +		((ins)->imm & (sizeof(type) * CHAR_BIT - 1)))
> +
> +#define BPF_OP_SHIFT_REG(reg, ins, op, type)	\
> +	((reg)[(ins)->dst_reg] =		\
> +		(type)(reg)[(ins)->dst_reg] op	\
> +		((reg)[(ins)->src_reg] & (sizeof(type) * CHAR_BIT - 1)))
> +
>  #define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \
>  	if ((type)(reg)[(ins)->src_reg] == 0) { \
>  		RTE_BPF_LOG_LINE(ERR, \
> @@ -183,10 +194,10 @@ bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
>  			BPF_OP_ALU_IMM(reg, ins, |, uint32_t);
>  			break;
>  		case (BPF_ALU | BPF_LSH | BPF_K):
> -			BPF_OP_ALU_IMM(reg, ins, <<, uint32_t);
> +			BPF_OP_SHIFT_IMM(reg, ins, <<, uint32_t);
>  			break;
>  		case (BPF_ALU | BPF_RSH | BPF_K):
> -			BPF_OP_ALU_IMM(reg, ins, >>, uint32_t);
> +			BPF_OP_SHIFT_IMM(reg, ins, >>, uint32_t);
>  			break;
>  		case (BPF_ALU | BPF_XOR | BPF_K):
>  			BPF_OP_ALU_IMM(reg, ins, ^, uint32_t);
> @@ -217,10 +228,10 @@ bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
>  			BPF_OP_ALU_REG(reg, ins, |, uint32_t);
>  			break;
>  		case (BPF_ALU | BPF_LSH | BPF_X):
> -			BPF_OP_ALU_REG(reg, ins, <<, uint32_t);
> +			BPF_OP_SHIFT_REG(reg, ins, <<, uint32_t);
>  			break;
>  		case (BPF_ALU | BPF_RSH | BPF_X):
> -			BPF_OP_ALU_REG(reg, ins, >>, uint32_t);
> +			BPF_OP_SHIFT_REG(reg, ins, >>, uint32_t);
>  			break;
>  		case (BPF_ALU | BPF_XOR | BPF_X):
>  			BPF_OP_ALU_REG(reg, ins, ^, uint32_t);
> @@ -262,13 +273,13 @@ bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
>  			BPF_OP_ALU_IMM(reg, ins, |, uint64_t);
>  			break;
>  		case (EBPF_ALU64 | BPF_LSH | BPF_K):
> -			BPF_OP_ALU_IMM(reg, ins, <<, uint64_t);
> +			BPF_OP_SHIFT_IMM(reg, ins, <<, uint64_t);
>  			break;
>  		case (EBPF_ALU64 | BPF_RSH | BPF_K):
> -			BPF_OP_ALU_IMM(reg, ins, >>, uint64_t);
> +			BPF_OP_SHIFT_IMM(reg, ins, >>, uint64_t);
>  			break;
>  		case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
> -			BPF_OP_ALU_IMM(reg, ins, >>, int64_t);
> +			BPF_OP_SHIFT_IMM(reg, ins, >>, int64_t);
>  			break;
>  		case (EBPF_ALU64 | BPF_XOR | BPF_K):
>  			BPF_OP_ALU_IMM(reg, ins, ^, uint64_t);
> @@ -299,13 +310,13 @@ bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
>  			BPF_OP_ALU_REG(reg, ins, |, uint64_t);
>  			break;
>  		case (EBPF_ALU64 | BPF_LSH | BPF_X):
> -			BPF_OP_ALU_REG(reg, ins, <<, uint64_t);
> +			BPF_OP_SHIFT_REG(reg, ins, <<, uint64_t);
>  			break;
>  		case (EBPF_ALU64 | BPF_RSH | BPF_X):
> -			BPF_OP_ALU_REG(reg, ins, >>, uint64_t);
> +			BPF_OP_SHIFT_REG(reg, ins, >>, uint64_t);
>  			break;
>  		case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
> -			BPF_OP_ALU_REG(reg, ins, >>, int64_t);
> +			BPF_OP_SHIFT_REG(reg, ins, >>, int64_t);
>  			break;
>  		case (EBPF_ALU64 | BPF_XOR | BPF_X):
>  			BPF_OP_ALU_REG(reg, ins, ^, uint64_t);
> --
> 2.53.0


^ permalink raw reply

* Re: [PATCH v4] dts: report dut/NIC info during DTS run
From: Koushik Bhargav Nimoji @ 2026-06-25 15:30 UTC (permalink / raw)
  To: Patrick Robb; +Cc: luca.vizzarro, dev, abailey, ahassick, lylavoie
In-Reply-To: <CAK6DuxucO6-D0qTeTb=mdpWWP48j9rDyvbwfHGgjfwQXTM93Gg@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 7237 bytes --]

On Wed, Jun 24, 2026 at 10:06 PM Patrick Robb <patrickrobb1997@gmail.com>
wrote:

>
>
> On Wed, Jun 24, 2026 at 5:33 PM Koushik Bhargav Nimoji <
> knimoji@iol.unh.edu> wrote:
>
>> This patch gathers NIC info during a DTS run and writes it to an output
>> json file. This allows the json file to be used when reporting results
>> on the DTS results dashboard.
>>
>> Signed-off-by: Koushik Bhargav Nimoji <knimoji@iol.unh.edu>
>> ---
>> v2:
>>     *Resolved merge conflicts
>> v3:
>>     *Fixed an issue with retrieving
>>      the NIC's hardware version
>> v4:
>>     *Moved nic info gathering step before the nics get
>>      binded to their respective drivers
>>     *Condensed some areas of code in order to make them
>>      more readable
>>     *Removed redundant None checks and added some where
>>      required
>>     *Fixed LshwOutput class to better reflect the lshw
>>      command output
>> ---
>>  dts/framework/test_run.py                    |  8 +++
>>  dts/framework/testbed_model/linux_session.py | 68 ++++++++++++++++++++
>>  dts/framework/testbed_model/os_session.py    | 11 ++++
>>  3 files changed, 87 insertions(+)
>>
>> diff --git a/dts/framework/test_run.py b/dts/framework/test_run.py
>> index 94dc6023a7..c92fe90f2e 100644
>> --- a/dts/framework/test_run.py
>> +++ b/dts/framework/test_run.py
>> @@ -98,6 +98,7 @@
>>          "InternalError" -> "exit":ew
>>  """
>>
>> +import json
>>  import random
>>  from collections import deque
>>  from collections.abc import Iterable
>> @@ -347,6 +348,12 @@ def next(self) -> State | None:
>>          test_run.ctx.dpdk.setup()
>>          test_run.ctx.topology.setup()
>>
>> +        used_nic_info: list[dict[str, str]] =
>> self.test_run.ctx.sut_node.main_session.get_nic_info()
>>
>
> drop "used" for nic_info or change to testrun_nic_info?
>
>
>> +        with open(f"{SETTINGS.output_dir}/dut_info.json", "w") as file:
>> +            json.dump(used_nic_info, file, indent=3)
>> +
>> +        self.logger.info(f"DUT NIC info written to:
>> {SETTINGS.output_dir}/dut_info.json")
>> +
>>          if test_run.config.use_virtual_functions:
>>              test_run.ctx.topology.instantiate_vf_ports()
>>          if test_run.ctx.sut_node.cryptodevs and test_run.config.crypto:
>> @@ -370,6 +377,7 @@ def next(self) -> State | None:
>>          test_run.supported_capabilities = get_supported_capabilities(
>>              test_run.ctx.sut_node, test_run.ctx.topology,
>> test_run.required_capabilities
>>          )
>> +
>>          return TestRunExecution(test_run, self.result)
>>
>>      def on_error(self, ex: BaseException) -> State | None:
>> diff --git a/dts/framework/testbed_model/linux_session.py
>> b/dts/framework/testbed_model/linux_session.py
>> index 3a6e97974b..9e9146c372 100644
>> --- a/dts/framework/testbed_model/linux_session.py
>> +++ b/dts/framework/testbed_model/linux_session.py
>> @@ -38,6 +38,8 @@ class LshwConfigurationOutput(TypedDict):
>>      driver: str
>>      #:
>>      link: str
>> +    #:
>> +    firmware: str
>>
>>
>>  class LshwOutput(TypedDict):
>> @@ -61,6 +63,12 @@ class LshwOutput(TypedDict):
>>              ...
>>      """
>>
>> +    #:
>> +    vendor: NotRequired[str]
>> +    #:
>> +    product: NotRequired[str]
>> +    #:
>> +    version: NotRequired[str]
>>      #:
>>      businfo: str
>>      #:
>> @@ -197,6 +205,66 @@ def unbind_ports(self, ports: list[Port]):
>>          if self._lshw_net_info:
>>              del self._lshw_net_info
>>
>> +    def get_nic_info(self) -> list[dict[str, str]]:
>> +        """Overrides :meth`~.os_session.OSSession.get_nic_info`.
>> +
>> +        Raises:
>> +            ConfigurationError: If the NIC info could not be found.
>> +        """
>> +        port_data = {
>> +            port.get("businfo"): port for port in self._lshw_net_info if
>> port.get("businfo")
>> +        }
>> +
>> +        all_nic_info: list[dict[str, str]] = []
>> +        for port in self._config.ports:
>> +            pci_addr = port.pci
>> +
>> +            command_result = self.send_command(
>>
>
> rename to lshw_result please.
>
>
>> +                f"sudo lshw -c network -businfo | grep '{pci_addr}' |
>> cut -d'@' -f1"
>> +            )
>> +            if command_result.return_code != 0 and command_result.stdout
>> == "":
>> +                raise ConfigurationError(f"Unable to get bus type for
>> port {pci_addr}.")
>> +            bus_type = command_result.stdout
>> +
>> +            bus_info = f"{bus_type}@{pci_addr}"
>> +            nic_port: LshwOutput | None = port_data[bus_info]
>> +            if nic_port is None:
>> +                raise ConfigurationError(f"Port {pci_addr} could not be
>> found on the node.")
>> +
>> +            config: LshwConfigurationOutput | None =
>> nic_port["configuration"]
>> +            if config is None:
>> +                raise ConfigurationError(
>> +                    f"Configuration info for port {pci_addr} could not
>> be found on the node."
>> +                )
>> +
>> +            if "logicalname" not in nic_port:
>> +                raise ConfigurationError(
>> +                    f"Logical name for port {pci_addr} could not be
>> found on the node."
>> +                )
>> +
>> +            command_result = self.send_command(
>>
>
> ethtool_result
>
>
>> +                f"ethtool {nic_port['logicalname']} | grep 'Speed:' |
>> awk '{{print $2}}'"
>> +            )
>
> +            if command_result.return_code == 0 and command_result.stdout:
>> +                nic_speed = command_result.stdout
>> +            else:
>> +                self._logger.error(f"Unable to get speed for NIC:
>> {pci_addr}")
>> +                nic_speed = None
>> +
>> +            dut_json = {
>> +                "make": nic_port["vendor"] if "vendor" in nic_port else
>> "Unknown",
>> +                "model": nic_port["product"] if "product" in nic_port
>> else "Unknown",
>> +                "hardware version": nic_port["version"] if "version" in
>> nic_port else "Unknown",
>> +                "firmware version": config["firmware"] if "firmware" in
>> config else "Unknown",
>> +                "deviceBusType": bus_type,
>> +                "deviceId": nic_port["serial"] if "serial" in nic_port
>> else "Unknown",
>> +                "pmd": config["driver"] if "driver" in config else
>> "Unknown",
>> +                "speed": nic_speed or "Unknown",
>> +            }
>> +            all_nic_info.append(dut_json)
>> +
>> +        return all_nic_info
>> +
>>
>
> What is the intended behavior for cryptodev tests? I realize the ports
> list will be empty and we will not enter the initial loop, but is this
> intended? Do we want to gether cryptodev info too?
>
>
The intended behavior here is to skip cryptodev devices. Not entering the
initial loop, and therefore returning an empty list is the expected
behavior when running cryptodev tests.

>

>      def bind_ports_to_driver(self, ports: list[Port], driver_name: str)
>> -> None:
>>
>>
> Reviewed-by: Patrick Robb <patrickrobb1997@gmail.com>
>

[-- Attachment #2: Type: text/html, Size: 10501 bytes --]

^ permalink raw reply

* Re: [PATCH v3 2/4] build: support function versioning for drivers
From: David Marchand @ 2026-06-25 14:45 UTC (permalink / raw)
  To: Dariusz Sosnowski; +Cc: Bruce Richardson, dev, Yu Jiang
In-Reply-To: <20260625133311.1299705-3-dsosnowski@nvidia.com>

On Thu, 25 Jun 2026 at 15:34, Dariusz Sosnowski <dsosnowski@nvidia.com> wrote:
>
> Add support for enabling function versioning
> (through use_function_versioning meson variable) for drivers,
> similar to libraries.
>
> Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
> ---
>  drivers/meson.build | 21 ++++++++++++++++++++-
>  1 file changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/meson.build b/drivers/meson.build
> index 4d95604ecd..8f3ab490ee 100644
> --- a/drivers/meson.build
> +++ b/drivers/meson.build
> @@ -171,6 +171,7 @@ foreach subpath:subdirs
>          pkgconfig_extra_libs = []
>          testpmd_sources = []
>          require_iova_in_mbuf = true
> +        use_function_versioning = false
>          # for handling base code files which may need extra cflags
>          base_sources = []
>          base_cflags = []
> @@ -273,6 +274,13 @@ foreach subpath:subdirs
>          endif
>          dpdk_conf.set(lib_name.to_upper(), 1)
>
> +        if developer_mode and is_windows and use_function_versioning
> +            message('@0@: Function versioning is not supported by Windows.'.format(name))
> +        endif
> +        if use_function_versioning
> +            cflags += '-DRTE_USE_FUNCTION_VERSIONING'
> +        endif
> +
>          dpdk_extra_ldflags += pkgconfig_extra_libs
>
>          dpdk_headers += headers
> @@ -363,7 +371,18 @@ foreach subpath:subdirs
>                      depends: [version_map])
>          endif
>
> -        shared_lib = shared_library(lib_name, sources_pmd_info,
> +        if not use_function_versioning or is_windows
> +            # Use pre-built objects and pmdinfo sources to build shared library.
> +            shared_sources = sources_pmd_info
> +        else
> +            # For compat we need to rebuild with RTE_BUILD_SHARED_LIB defined.
> +            # Use original sources and pmdinfo sources.
> +            cflags += '-DRTE_BUILD_SHARED_LIB'
> +            shared_sources = sources + sources_pmd_info
> +            objs = []
> +        endif
> +
> +        shared_lib = shared_library(lib_name, shared_sources,
>                  objects: objs,
>                  include_directories: includes,
>                  dependencies: shared_deps,

Older meson version don't like this form:

drivers/meson.build:381:12: ERROR: Invalid use of addition: can only
concatenate list (not "CustomTargetHolder") to list

It seems to work with something like:

diff --git a/drivers/meson.build b/drivers/meson.build
index 8f3ab490ee..79c215a7c8 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -373,12 +373,12 @@ foreach subpath:subdirs

         if not use_function_versioning or is_windows
             # Use pre-built objects and pmdinfo sources to build
shared library.
-            shared_sources = sources_pmd_info
+            shared_sources = [sources_pmd_info]
         else
             # For compat we need to rebuild with RTE_BUILD_SHARED_LIB defined.
             # Use original sources and pmdinfo sources.
             cflags += '-DRTE_BUILD_SHARED_LIB'
-            shared_sources = sources + sources_pmd_info
+            shared_sources = sources + [sources_pmd_info]
             objs = []
         endif



-- 
David Marchand


^ permalink raw reply related

* RE: [PATCH v5 02/24] bpf: add format instruction function
From: Marat Khalili @ 2026-06-25 14:22 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Konstantin Ananyev, dev@dpdk.org
In-Reply-To: <20260624100934.00e99af1@phoenix.local>

> On Wed, 24 Jun 2026 13:17:35 +0100
> Marat Khalili <marat.khalili@huawei.com> wrote:
> 
> > BPF library already contains BPF instruction formatting functions, but
> > they could only be used via `rte_bpf_dump` to dump result into file. Add
> > new function `rte_bpf_format` to format instruction in various way
> > (hexadecimal, disassembly) into a user-provided buffer, as well as a
> > service function `rte_bpf_insn_is_wide` to detect wide instructions.
> >
> > Signed-off-by: Marat Khalili <marat.khalili@huawei.com>
> > Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > ---
> 
> Is this format similar to what tcpdump -d and objdump produce?

Closest I could find is bpf_dbg, the rest are slightly different.

Git log says it was originally added by you :)

^ permalink raw reply

* Re: [PATCH v1 2/2] dts: add latency coverage for cryptodev testing
From: Patrick Robb @ 2026-06-25 14:17 UTC (permalink / raw)
  To: Andrew Bailey; +Cc: luca.vizzarro, dev, lylavoie, ahassick, knimoji
In-Reply-To: <20260513152715.133381-2-abailey@iol.unh.edu>

[-- Attachment #1: Type: text/plain, Size: 11431 bytes --]

On Wed, May 13, 2026 at 11:27 AM Andrew Bailey <abailey@iol.unh.edu> wrote:

> Currently, next DTS only has cryptodev testing coverage for throughput
> metrics. This patch adds a test suite to include latency testing for
> crypto devices.
>
> Signed-off-by: Andrew Bailey <abailey@iol.unh.edu>
> ---
>  .../dts/tests.TestSuite_cryptodev_latency.rst |   8 +
>  dts/tests/TestSuite_cryptodev_latency.py      | 695 ++++++++++++++++++
>  2 files changed, 703 insertions(+)
>  create mode 100644 doc/api/dts/tests.TestSuite_cryptodev_latency.rst
>  create mode 100644 dts/tests/TestSuite_cryptodev_latency.py
>
>
> +    @crypto_test
> +    def aesni_gcm_vdev(self) -> None:
> +        """aesni_gcm virtual device latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "aesni_gcm_vdev" not in self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_aesni_gcm0")],
> +            devtype=DeviceType.crypto_aesni_gcm,
> +            optype=OperationType.aead,
> +            aead_op=EncryptDecryptSwitch.encrypt,
> +            aead_key_sz=16,
> +            aead_iv_sz=12,
> +            aead_aad_sz=16,
> +            digest_sz=16,
> +            burst_sz=32,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["aesni_gcm_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(), "aesni_gcm_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(result["passed"] == "PASS", "latency fell more than
> the delta tolerance")
>

Why this one does not have "below baseline" in the string like other verify
assertions in this suite?


> +
> +    @crypto_test
> +    def aesni_mb_cipher_then_auth_vdev(self) -> None:
> +        """aesni_mb vdev cipher and auth latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "aesni_mb_cipher_then_auth_vdev" not in
> self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_aesni_mb0")],
> +            devtype=DeviceType.crypto_aesni_mb,
> +            optype=OperationType.cipher_then_auth,
> +            cipher_algo=CipherAlgorithm.aes_cbc,
> +            cipher_op=EncryptDecryptSwitch.encrypt,
> +            cipher_key_sz=16,
> +            auth_algo=AuthenticationAlgorithm.sha1_hmac,
> +            auth_op=AuthenticationOpMode.generate,
> +            auth_key_sz=64,
> +            digest_sz=12,
> +            burst_sz=32,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["aesni_mb_cipher_then_auth_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(),
> "aesni_mb_cipher_then_auth_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(
> +                result["passed"] == "PASS",
> +                "latency fell more than the delta tolerance below
> baseline",
> +            )
> +
> +    @crypto_test
> +    def aesni_mb_vdev(self) -> None:
> +        """aesni_mb vdev latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "aesni_mb_vdev" not in self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_aesni_mb0")],
> +            devtype=DeviceType.crypto_aesni_mb,
> +            optype=OperationType.cipher_only,
> +            cipher_algo=CipherAlgorithm.aes_cbc,
> +            cipher_op=EncryptDecryptSwitch.encrypt,
> +            cipher_key_sz=16,
> +            cipher_iv_sz=16,
> +            burst_sz=32,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["aesni_mb_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(), "aesni_mb_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(result["passed"] == "PASS", "Gbps fell below delta
> tolerance")
> +
> +    @crypto_test
> +    def kasumi_vdev(self) -> None:
> +        """Kasumi vdev latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "kasumi_vdev" not in self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_kasumi0")],
> +            devtype=DeviceType.crypto_kasumi,
> +            optype=OperationType.cipher_then_auth,
> +            cipher_algo=CipherAlgorithm.kasumi_f8,
> +            cipher_op=EncryptDecryptSwitch.encrypt,
> +            cipher_key_sz=16,
> +            cipher_iv_sz=8,
> +            auth_algo=AuthenticationAlgorithm.kasumi_f9,
> +            auth_op=AuthenticationOpMode.generate,
> +            auth_key_sz=16,
> +            digest_sz=4,
> +            burst_sz=32,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["kasumi_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(), "kasmui_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(result["passed"] == "PASS", "Gbps fell below delta
> tolerance")
>

Should this be latency instead of Gbps?


> +
> +    @crypto_test
> +    def open_ssl_vdev(self) -> None:
> +        """open_ssl vdev latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "open_ssl_vdev" not in self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_openssl0")],
> +            devtype=DeviceType.crypto_openssl,
> +            optype=OperationType.aead,
> +            aead_algo=AeadAlgName.aes_gcm,
> +            aead_op=EncryptDecryptSwitch.encrypt,
> +            aead_key_sz=16,
> +            aead_iv_sz=16,
> +            aead_aad_sz=16,
> +            digest_sz=16,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["open_ssl_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(), "open_ssl_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(result["passed"] == "PASS", "Gbps fell below delta
> tolerance")
>
Same


> +
> +    @crypto_test
> +    def snow3g_vdev(self) -> None:
> +        """snow3g vdev latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "snow3g_vdev" not in self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_snow3g0")],
> +            devtype=DeviceType.crypto_snow3g,
> +            optype=OperationType.cipher_then_auth,
> +            cipher_algo=CipherAlgorithm.snow3g_uea2,
> +            cipher_op=EncryptDecryptSwitch.encrypt,
> +            cipher_key_sz=16,
> +            cipher_iv_sz=16,
> +            auth_algo=AuthenticationAlgorithm.snow3g_uia2,
> +            auth_op=AuthenticationOpMode.generate,
> +            auth_key_sz=16,
> +            auth_iv_sz=16,
> +            digest_sz=16,
> +            burst_sz=32,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["open_ssl_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(), "open_ssl_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(result["passed"] == "PASS", "Gbps fell below delta
> tolerance")
>
Same


> +
> +    @crypto_test
> +    def zuc_vdev(self) -> None:
> +        """Zuc vdev latency test.
> +
> +        Steps:
> +            * Create a cryptodev instance with provided device type and
> buffer sizes.
> +        Verify:
> +            * The latency is below or within delta of provided baseline.
> +
> +        Raises:
> +            SkippedTestException: When configuration is not provided.
> +        """
> +        if "zuc_vdev" not in self.latency_test_parameters:
> +            skip("test not configured")
> +        app = Cryptodev(
> +            ptest=TestType.latency,
> +            vdevs=[VirtualDevice("crypto_zuc0")],
> +            devtype=DeviceType.crypto_zuc,
> +            optype=OperationType.cipher_then_auth,
> +            cipher_algo=CipherAlgorithm.zuc_eea3,
> +            cipher_op=EncryptDecryptSwitch.encrypt,
> +            cipher_key_sz=16,
> +            cipher_iv_sz=16,
> +            auth_algo=AuthenticationAlgorithm.zuc_eia3,
> +            auth_op=AuthenticationOpMode.generate,
> +            auth_key_sz=16,
> +            auth_iv_sz=16,
> +            digest_sz=4,
> +            burst_sz=32,
> +            total_ops=TOTAL_OPS,
> +            buffer_sz=self.buffer_sizes["zuc_vdev"],
> +        )
> +        results = self._verify_latency(app.run_app(), "zuc_vdev")
> +        self._print_stats(results)
> +        for result in results:
> +            verify(result["passed"] == "PASS", "Gbps fell below delta
> tolerance")
>

Same

> --
> 2.50.1
>
>
Make sure you also see the ai code review mention of kasumi and snow3g typo
or misassignment
https://mails.dpdk.org/archives/test-report/2026-May/990932.html

The suggestions about safety when reading index 0 of a list are worth
implementing too.

Reviewed-by: Patrick Robb <patrickrobb1997@gmail.com>

[-- Attachment #2: Type: text/html, Size: 15261 bytes --]

^ permalink raw reply

* [PATCH v6 6/6] eal: fix async IPC callback not fired when no peers
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev, Jianfeng Tan
In-Reply-To: <cover.1782395581.git.anatoly.burakov@intel.com>

Currently, when rte_mp_request_async() is called and no peer processes
are connected (nb_sent == 0), the user callback is never invoked.

The original implementation used a dedicated background thread and
pthread_cond_signal() to wake it after queuing the dummy request. When
that thread was replaced with per-message alarms, no alarm was set for
the dummy request, silently breaking the nb_sent == 0 path.

This was not noticed because async requests are usually used while handling
secondary process requests, where peers are typically already present.

Fix it by setting a 1us alarm on the dummy request, so the callback path
immediately triggers and processes it.

Fixes: daf9bfca717e ("ipc: remove thread for async requests")
Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 235687ab84..2b8874e416 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -1197,11 +1197,22 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
 		ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
 
-		/* if we didn't send anything, put dummy request on the queue */
+		/* if we didn't send anything, put dummy request on the queue
+		 * and set a minimum-delay alarm so the callback fires immediately.
+		 */
 		if (ret == 0 && reply->nb_sent == 0) {
 			TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
 					next);
 			dummy_used = true;
+			if (rte_eal_alarm_set(1, async_reply_handle,
+					(void *)(uintptr_t)dummy->id) < 0) {
+				EAL_LOG(ERR, "Fail to set alarm for dummy request");
+				/* roll back the changes */
+				TAILQ_REMOVE(&pending_requests.requests, dummy, next);
+				dummy_used = false;
+				ret = -1;
+				goto unlock_fail;
+			}
 		}
 
 		pthread_mutex_unlock(&pending_requests.lock);
@@ -1275,6 +1286,16 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	if (ret == 0 && reply->nb_sent == 0) {
 		TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
 		dummy_used = true;
+
+		if (rte_eal_alarm_set(1, async_reply_handle,
+				(void *)(uintptr_t)dummy->id) < 0) {
+			EAL_LOG(ERR, "Fail to set alarm for dummy request");
+			/* roll back the changes */
+			TAILQ_REMOVE(&pending_requests.requests, dummy, next);
+			dummy_used = false;
+			ret = -1;
+			goto closedir_fail;
+		}
 	}
 
 	/* finally, unlock the queue */
-- 
2.47.3


^ permalink raw reply related

* [PATCH v6 5/6] eal: fix memory leak in async IPC secondary path
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev, Jianfeng Tan
In-Reply-To: <cover.1782395581.git.anatoly.burakov@intel.com>

When rte_mp_request_async() succeeds on the secondary process path, the
dummy request is freed only if it was inserted into the queue. However,
when the actual request was sent successfully (nb_sent > 0), the dummy is
not used and the function returns without freeing it.

Free dummy before returning on the success path when it was not inserted
into the queue.

Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 5cd1bb8d13..235687ab84 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -1209,6 +1209,8 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		/* if we couldn't send anything, clean up */
 		if (ret != 0)
 			goto fail;
+		if (!dummy_used)
+			free(dummy);
 		return 0;
 	}
 
-- 
2.47.3


^ permalink raw reply related

* [PATCH v6 4/6] eal: fix async IPC memory leaks on partial failure
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev, Jianfeng Tan
In-Reply-To: <cover.1782395581.git.anatoly.burakov@intel.com>

When rte_mp_request_async() fails to send requests to all peers,
copy and param can lose ownership and leak.

However, we cannot simply free them unconditionally, as "partial failure"
means some requests were already queued and thus still reference `copy` and
`param`, so freeing them directly on the error path can cause
use-after-free when those requests are later handled by the async timeout.

Fix this by rolling back queued requests from the current batch, and reset
nb_sent to 0. Freeing the requests is now safe even if some requests were
sent, as any responses or timeouts will not find the request ID in the
queue and will safely exit without doing anything.

Coverity issue: 501503
Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 34 +++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 869ce99bf9..5cd1bb8d13 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -1242,7 +1242,34 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		} else if (mp_request_async(path, copy, param, ts))
 			ret = -1;
 	}
-	/* if we didn't send anything, put dummy request on the queue */
+
+	/*
+	 * On partial failure, roll back all queued requests. We hold the lock
+	 * so no one else touches the queue. All requests in this batch share
+	 * the same param pointer. Stale alarms will fire and harmlessly find
+	 * nothing via ID-based lookup.
+	 */
+	if (ret != 0 && reply->nb_sent > 0) {
+		struct pending_request *r, *next;
+
+		for (r = TAILQ_FIRST(&pending_requests.requests);
+				r != NULL; r = next) {
+			next = TAILQ_NEXT(r, next);
+			if (r->type == REQUEST_TYPE_ASYNC &&
+					r->async.param == param) {
+				TAILQ_REMOVE(&pending_requests.requests,
+						r, next);
+				free(r->reply);
+				/* r->request == copy, freed below after the loop */
+				free(r);
+			}
+		}
+		reply->nb_sent = 0;
+	}
+
+	/* if we didn't send anything, put dummy request on the queue
+	 * and set a minimum-delay alarm so the callback fires immediately.
+	 */
 	if (ret == 0 && reply->nb_sent == 0) {
 		TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
 		dummy_used = true;
@@ -1260,6 +1287,11 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	/* if dummy was unused, free it */
 	if (!dummy_used)
 		free(dummy);
+	/* if nothing was sent, nobody owns copy/param */
+	if (ret != 0) {
+		free(param);
+		free(copy);
+	}
 
 	return ret;
 closedir_fail:
-- 
2.47.3


^ permalink raw reply related

* [PATCH v6 3/6] eal: avoid deadlock in async IPC alarm callback
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev, Jianfeng Tan
In-Reply-To: <cover.1782395581.git.anatoly.burakov@intel.com>

async_reply_handle_thread_unsafe() can run while holding
pending_requests.lock and currently calls rte_eal_alarm_cancel().

rte_eal_alarm_cancel() may spin-wait for an executing callback, which can
deadlock if that callback is blocked on the same lock.

Remove callback-side alarm cancellation. It is safe to do so, because any
callback triggered without a pending request becomes a noop due to the
async request lookup now using numerical ID.

Fixes: daf9bfca717e ("ipc: remove thread for async requests")
Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 3e32ee5027..869ce99bf9 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -549,19 +549,6 @@ async_reply_handle_thread_unsafe(struct pending_request *req)
 
 	TAILQ_REMOVE(&pending_requests.requests, req, next);
 
-	if (rte_eal_alarm_cancel(async_reply_handle,
-			(void *)(uintptr_t)req->id) < 0) {
-		/* if we failed to cancel the alarm because it's already in
-		 * progress, don't proceed because otherwise we will end up
-		 * handling the same message twice.
-		 */
-		if (rte_errno == EINPROGRESS) {
-			EAL_LOG(DEBUG, "Request handling is already in progress");
-			goto no_trigger;
-		}
-		EAL_LOG(ERR, "Failed to cancel alarm");
-	}
-
 	if (action == ACTION_TRIGGER)
 		return req;
 no_trigger:
-- 
2.47.3


^ permalink raw reply related

* [PATCH v6 2/6] eal: use request ID instead of pointers
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev, Jianfeng Tan
In-Reply-To: <cover.1782395581.git.anatoly.burakov@intel.com>

Initial implementation of async IPC request handling was using request
pointers directly. Because of the nature of how IPC is meant to work and
that requests ownership is disconnected from their creation (as in, freeing
a request may happen due to timeout, or due to received response, or due
to rollback because of a later failure), using pointers as identity is not
safe.

Use numeric request ID for async request lookup instead. This way, we can
safely free requests even if we are already waiting on responses/timeouts
for them, as the pointers themselves will not be referenced directly by
the response/timeout.

Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 63 ++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 799c6e81b0..3e32ee5027 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -74,6 +74,7 @@ struct async_request_param {
 
 struct pending_request {
 	TAILQ_ENTRY(pending_request) next;
+	unsigned long id;
 	enum {
 		REQUEST_TYPE_SYNC,
 		REQUEST_TYPE_ASYNC
@@ -92,6 +93,8 @@ struct pending_request {
 	};
 };
 
+static unsigned long next_request_id;
+
 TAILQ_HEAD(pending_request_list, pending_request);
 
 static struct {
@@ -111,15 +114,15 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type);
 static void
 async_reply_handle(void *arg);
 
-/* for use with process_msg */
+/* for use with alarm callback and process_msg */
 static struct pending_request *
-async_reply_handle_thread_unsafe(void *arg);
+async_reply_handle_thread_unsafe(struct pending_request *req);
 
 static void
 trigger_async_action(struct pending_request *req);
 
 static struct pending_request *
-find_pending_request(const char *dst, const char *act_name)
+find_request_by_name(const char *dst, const char *act_name)
 {
 	struct pending_request *r;
 
@@ -132,6 +135,19 @@ find_pending_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static struct pending_request *
+find_async_request_by_id(unsigned long id)
+{
+	struct pending_request *r;
+
+	TAILQ_FOREACH(r, &pending_requests.requests, next) {
+		if (r->id == id && r->type == REQUEST_TYPE_ASYNC)
+			return r;
+	}
+
+	return NULL;
+}
+
 /*
  * Combine prefix and name(optional) to return unix domain socket path
  * return the number of characters that would have been put into buffer.
@@ -354,7 +370,7 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 		struct pending_request *req = NULL;
 
 		pthread_mutex_lock(&pending_requests.lock);
-		pending_req = find_pending_request(s->sun_path, msg->name);
+		pending_req = find_request_by_name(s->sun_path, msg->name);
 		if (pending_req) {
 			memcpy(pending_req->reply, msg, sizeof(*msg));
 			/* -1 indicates that we've been asked to ignore */
@@ -519,9 +535,8 @@ trigger_async_action(struct pending_request *sr)
 }
 
 static struct pending_request *
-async_reply_handle_thread_unsafe(void *arg)
+async_reply_handle_thread_unsafe(struct pending_request *req)
 {
-	struct pending_request *req = (struct pending_request *)arg;
 	enum async_action action;
 	struct timespec ts_now;
 
@@ -534,7 +549,8 @@ async_reply_handle_thread_unsafe(void *arg)
 
 	TAILQ_REMOVE(&pending_requests.requests, req, next);
 
-	if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+	if (rte_eal_alarm_cancel(async_reply_handle,
+			(void *)(uintptr_t)req->id) < 0) {
 		/* if we failed to cancel the alarm because it's already in
 		 * progress, don't proceed because otherwise we will end up
 		 * handling the same message twice.
@@ -557,9 +573,13 @@ static void
 async_reply_handle(void *arg)
 {
 	struct pending_request *req;
+	/* alarm arg carries the request ID packed into a void * via uintptr_t */
+	unsigned long id = (uintptr_t)arg;
 
 	pthread_mutex_lock(&pending_requests.lock);
-	req = async_reply_handle_thread_unsafe(arg);
+	req = find_async_request_by_id(id);
+	if (req != NULL)
+		req = async_reply_handle_thread_unsafe(req);
 	pthread_mutex_unlock(&pending_requests.lock);
 
 	if (req != NULL)
@@ -878,8 +898,19 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 {
 	struct rte_mp_msg *reply_msg;
 	struct pending_request *pending_req, *exist;
+	unsigned long id;
 	int ret = -1;
 
+	/* queue already locked by caller */
+
+	exist = find_request_by_name(dst, req->name);
+	if (exist) {
+		EAL_LOG(ERR, "A pending request %s:%s", dst, req->name);
+		rte_errno = EEXIST;
+		return -1;
+	}
+
+	id = ++next_request_id;
 	pending_req = calloc(1, sizeof(*pending_req));
 	reply_msg = calloc(1, sizeof(*reply_msg));
 	if (pending_req == NULL || reply_msg == NULL) {
@@ -890,21 +921,12 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 	}
 
 	pending_req->type = REQUEST_TYPE_ASYNC;
+	pending_req->id = id;
 	strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
 	pending_req->request = req;
 	pending_req->reply = reply_msg;
 	pending_req->async.param = param;
 
-	/* queue already locked by caller */
-
-	exist = find_pending_request(dst, req->name);
-	if (exist) {
-		EAL_LOG(ERR, "A pending request %s:%s", dst, req->name);
-		rte_errno = EEXIST;
-		ret = -1;
-		goto fail;
-	}
-
 	ret = send_msg(dst, req, MP_REQ);
 	if (ret < 0) {
 		EAL_LOG(ERR, "Fail to send request %s:%s",
@@ -919,7 +941,7 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 
 	/* if alarm set fails, we simply ignore the reply */
 	if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
-			      async_reply_handle, pending_req) < 0) {
+			async_reply_handle, (void *)(uintptr_t)id) < 0) {
 		EAL_LOG(ERR, "Fail to set alarm for request %s:%s",
 			dst, req->name);
 		ret = -1;
@@ -952,7 +974,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
 	pthread_cond_init(&pending_req.sync.cond, &attr);
 
-	exist = find_pending_request(dst, req->name);
+	exist = find_request_by_name(dst, req->name);
 	if (exist) {
 		EAL_LOG(ERR, "A pending request %s:%s", dst, req->name);
 		rte_errno = EEXIST;
@@ -1178,6 +1200,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	 * it, and put it on the queue if we don't send any requests.
 	 */
 	dummy->type = REQUEST_TYPE_ASYNC;
+	dummy->id = ++next_request_id;
 	dummy->request = copy;
 	dummy->reply = NULL;
 	dummy->async.param = param;
-- 
2.47.3


^ permalink raw reply related

* [PATCH v6 1/6] eal: fix wrong log message in async IPC request
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev, Jianfeng Tan
In-Reply-To: <cover.1782395581.git.anatoly.burakov@intel.com>

The allocation failure log message in mp_request_async() says "sync
request" but the function handles asynchronous requests.

Fix the log to say "async request".

Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 06f151818c..799c6e81b0 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -883,7 +883,7 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 	pending_req = calloc(1, sizeof(*pending_req));
 	reply_msg = calloc(1, sizeof(*reply_msg));
 	if (pending_req == NULL || reply_msg == NULL) {
-		EAL_LOG(ERR, "Could not allocate space for sync request");
+		EAL_LOG(ERR, "Could not allocate space for async request");
 		rte_errno = ENOMEM;
 		ret = -1;
 		goto fail;
-- 
2.47.3


^ permalink raw reply related

* [PATCH v6 0/6] IPC fixes
From: Anatoly Burakov @ 2026-06-25 14:01 UTC (permalink / raw)
  To: dev
In-Reply-To: <740b39c5098b4d40cafb9881ad70865a3c889012.1773936429.git.anatoly.burakov@intel.com>

Coverity has reported (issue ID 501503) a memory leak, but there
actually were a few more problems with IPC than that. This patchset
addresses said problems.

1. Using pointer as async request identity is unsafe

Because asynchronous requests can fail at arbitrary points while
having arbitrary number of requests or alarms already in flight,
using pointer as request identity can create use-after-free risks.
Patchset replaces this with using numeric request ID instead.

2. Alarm cancel can deadlock

Async request handler may attempt to cancel the alarm, but an alarm
might have already been in progress blocking on the same lock that
is held by async request, leading to a deadlock. Patchset removes
the alarm cancel call, and allows the alarm to fire. This is fine,
because due to fix #1 the worst that can happen from calling stale
alarm is a noop, as request ID would not be found.

3. Memory leaks

There are a couple of memory leaks in failure paths. Patchset fixes
those.

4. Zero-peer async request does not trigger alarm

When async requests are performed but no peers exist, we created
a dummy request and put it on the queue, but we never set the
dummy alarm that is supposed to handle that request. Patchset adds
the alarm set in dummy paths where none was present before.

v6:

Moved pieces around, namely:

1) apply request ID refactor first as a standalone patch
2) fix the deadlock immediately after
3) fix memory leaks next
4) add missing callback as a final step

Contents of the patchset remain the same.

Anatoly Burakov (6):
  eal: fix wrong log message in async IPC request
  eal: use request ID instead of pointers
  eal: avoid deadlock in async IPC alarm callback
  eal: fix async IPC memory leaks on partial failure
  eal: fix memory leak in async IPC secondary path
  eal: fix async IPC callback not fired when no peers

 lib/eal/common/eal_common_proc.c | 133 +++++++++++++++++++++++--------
 1 file changed, 99 insertions(+), 34 deletions(-)

-- 
2.47.3

^ permalink raw reply

* RE: [PATCH v5 7/9] bpf/arm64: add BPF_ABS/BPF_IND packet load support
From: Marat Khalili @ 2026-06-25 13:59 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Wathsala Vithanage, Konstantin Ananyev, dev@dpdk.org
In-Reply-To: <20260624175815.673064-8-stephen@networkplumber.org>

Below is what gdb shows actually generated for instruction 15 of
test_ld_mbuf1_prog (with minimal changes and comments for readability). I
suggest adding this to the comments or (if we don't feel like keeping it
updated) the commit message, it helps analyzing the code a bit.

(Also, stack drawings in the file do not include the buffer we use here.)

     0: 0x92800069      mov     x9, #-4         // mov x9, <imm>
     1: 0x8b150129      add     x9, x9, x21     // add x9, src_reg
     2: 0xd280050a      mov     x10, #40        // mov x9, <&::data_len>
     3: 0x786a6a6a      ldrh    w10, [x19, x10]
     4: 0xcb09014a      sub     x10, x10, x9
     5: 0xd280008b      mov     x11, #4         // mov x11, <sz>
     6: 0xeb0b014f      subs    x15, x10, x11
     7: 0x5400010b      b.lt    +8              // b.lt slow
     8: 0xd280020a      mov     x10, #16        // mov x10, <&::data_off>
     9: 0x786a6a6a      ldrh    w10, [x19, x10]
    10: 0xd2800007      mov     x7, #0          // mov x7, <&::buf_addr>
    11: 0xf8676a67      ldr     x7, [x19, x7]
    12: 0x8b0a00e7      add     x7, x7, x10
    13: 0x8b0900e7      add     x7, x7, x9
    14: 0x1400000c      b       +12             // b load
                        slow:
    15: 0x91000121      add     x1, x9, #0      // mov x1, x9
    16: 0x91000260      add     x0, x19, #0     // mov x0, x19
    17: 0x52800082      mov     w2, #4          // mov w2, <sz>
    18: 0xd1002323      sub     x3, x25, #8     // sub x3, x25, <stack_ofs>
    19: 0xd2a04d49      mov     x9, #0x26a0000  // mov x9,
    20: 0xf29d3409      movk    x9, #0xe9a0     //   __rte_pktmbuf_read
    21: 0xd63f0120      blr     x9
    22: 0x91000007      add     x7, x0, #0      // mov x7, x0
    23: 0xb5000067      cbnz    x7, +3          // cbnz load
    24: 0xd2800007      mov     x7, #0x0
    25: 0x17ffff88      b       -120            // b epilogue
                        load:
    26: 0xb87f68e7      ldr     w7, [x7, xzr]
    27: 0xdac008e7      rev32   x7, x7

Opcode variations:
* Instruction 1 is omitted for BPF_ABS.
* Instruction 26 varies depending on sz.
* Instruction 27 varies or is omitted depending on sz.

Some benign nits:
* Instruction 6 should probably be `subs xzr, x10, x11`, a slight 1-bit error in
  the existing code, though x15 is unused.
* Instructions 5 and 17 use different encoding for the same operation, would be
  nice to keep them consistent, though operand never exceeds INT32_MAX.
* Instruction 10 is redundant.

I see two problems:
* We never check that x9 is non-negative. We could either add one more check,
  or rearrange the code and use unsigned comparison at 7: (currently b.lt).
  (There was some discussion previously regarding the special meaning of
  negative BPF_ABS immediate, but I believe this is out of scope of this patch,
  here we should just fail on negative _effective_ offset regardless of opcode.)
* Second argument of __rte_pktmbuf_read is `uint32_t off`, and we are trying to
  pass 64-bit offset in x1. We need a check that it does not exceed UINT32_MAX.

Otherwise looks good to me.

^ permalink raw reply

* [PATCH v3 4/4] ethdev: fix promoted flow metadata symbols
From: Dariusz Sosnowski @ 2026-06-25 13:33 UTC (permalink / raw)
  To: David Marchand, Bruce Richardson, Thomas Monjalon,
	Andrew Rybchenko, Ori Kam
  Cc: dev, Yu Jiang
In-Reply-To: <20260625133311.1299705-1-dsosnowski@nvidia.com>

Offending commit stabilized the following symbols
related to flow metadata:

- 1 function symbol:
    - rte_flow_dynf_metadata_register
- 2 variable symbols:
    - rte_flow_dynf_metadata_offs
    - rte_flow_dynf_metadata_mask

Any application using experimental flow metadata symbols,
which was linked dynamically against 25.11 version of ethdev
library and using current version of ethdev library
would fail to start on symbol lookup error:

/tmp/dpdk-25.11/usr/local/bin/dpdk-testpmd:
  symbol lookup error: /tmp/dpdk-25.11/usr/local/bin/dpdk-testpmd:
    undefined symbol: rte_flow_dynf_metadata_offs, version EXPERIMENTAL

This patch addresses that issue by restoring EXPERIMENTAL version
on the global variables to keep ABI compatibility [1].
Related inline helpers and variable declarations are kept as stable
(i.e., no __rte_experimental marker).
EXPERIMENTAL version will be removed from these global variables
in 26.11 release cycle on next ABI version bump.

Standard function symbol versioning is also applied on
rte_flow_dynf_metadata_register() function.

[1]: https://inbox.dpdk.org/dev/m7s3jl2566kibbapr2mfa2ic2opuc6b4ok2g67j3il5dgduzih@cz5wcdstb75n/

Bugzilla ID: 1957
Fixes: 4ee2f5c1cedf ("ethdev: promote flow metadata API to stable")

Reported-by: Yu Jiang <yux.jiang@intel.com>
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: David Marchand <david.marchand@redhat.com>
---
 lib/ethdev/meson.build |  2 ++
 lib/ethdev/rte_flow.c  | 13 ++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index 8ba6c708a2..63fd866af9 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+use_function_versioning = true
+
 sources = files(
         'ethdev_driver.c',
         'ethdev_private.c',
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index ec0fe08355..24eb5a95b0 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -23,11 +23,11 @@
 #define FLOW_LOG RTE_ETHDEV_LOG_LINE
 
 /* Mbuf dynamic field name for metadata. */
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_offs)
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_flow_dynf_metadata_offs, 19.11)
 int32_t rte_flow_dynf_metadata_offs = -1;
 
 /* Mbuf dynamic field flag bit number for metadata. */
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_mask)
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_flow_dynf_metadata_mask, 19.11)
 uint64_t rte_flow_dynf_metadata_mask;
 
 /**
@@ -281,9 +281,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 	MK_FLOW_ACTION(JUMP_TO_TABLE_INDEX, sizeof(struct rte_flow_action_jump_to_table_index)),
 };
 
-RTE_EXPORT_SYMBOL(rte_flow_dynf_metadata_register)
-int
-rte_flow_dynf_metadata_register(void)
+RTE_DEFAULT_SYMBOL(26, int, rte_flow_dynf_metadata_register, (void))
 {
 	int offset;
 	int flag;
@@ -316,6 +314,11 @@ rte_flow_dynf_metadata_register(void)
 	return -rte_errno;
 }
 
+RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_flow_dynf_metadata_register, (void))
+{
+	return rte_flow_dynf_metadata_register();
+}
+
 static inline void
 fts_enter(struct rte_eth_dev *dev)
 {
-- 
2.47.3


^ permalink raw reply related

* [PATCH v3 3/4] net/mlx5: fix stabilized function versions
From: Dariusz Sosnowski @ 2026-06-25 13:33 UTC (permalink / raw)
  To: David Marchand, Bruce Richardson, Viacheslav Ovsiienko, Bing Zhao,
	Ori Kam, Suanming Mou, Matan Azrad
  Cc: dev, Yu Jiang
In-Reply-To: <20260625133311.1299705-1-dsosnowski@nvidia.com>

Offending patch stabilized the following function symbols:

- rte_pmd_mlx5_driver_event_cb_register
- rte_pmd_mlx5_driver_event_cb_unregister
- rte_pmd_mlx5_enable_steering
- rte_pmd_mlx5_disable_steering

These function symbols were introduced in 25.11.
Any application using these functions, linked against 25.11 version,
would fail when used with 26.07 libraries, because only DPDK_26 versions
of these symbols were exported.

This patch fixes that by adding proper function symbol versioning
to these symbols.

Fixes: e8cab133645f ("net/mlx5: promote some private API to stable")

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: David Marchand <david.marchand@redhat.com>
---
 drivers/net/mlx5/meson.build         |  2 ++
 drivers/net/mlx5/mlx5_driver_event.c | 22 ++++++++++++++++------
 drivers/net/mlx5/mlx5_flow.c         | 18 ++++++++++++------
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 82a7dfe782..0fa6322779 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -2,6 +2,8 @@
 # Copyright 2018 6WIND S.A.
 # Copyright 2018 Mellanox Technologies, Ltd
 
+use_function_versioning = true
+
 if not (is_linux or is_windows)
     build = false
     reason = 'only supported on Linux and Windows'
diff --git a/drivers/net/mlx5/mlx5_driver_event.c b/drivers/net/mlx5/mlx5_driver_event.c
index 89e49331c8..d0e22d6151 100644
--- a/drivers/net/mlx5/mlx5_driver_event.c
+++ b/drivers/net/mlx5/mlx5_driver_event.c
@@ -236,9 +236,8 @@ notify_existing_devices(rte_pmd_mlx5_driver_event_callback_t cb, void *opaque)
 		notify_existing_queues(port_id, cb, opaque);
 }
 
-RTE_EXPORT_SYMBOL(rte_pmd_mlx5_driver_event_cb_register)
-int
-rte_pmd_mlx5_driver_event_cb_register(rte_pmd_mlx5_driver_event_callback_t cb, void *opaque)
+RTE_DEFAULT_SYMBOL(26, int, rte_pmd_mlx5_driver_event_cb_register,
+		   (rte_pmd_mlx5_driver_event_callback_t cb, void *opaque))
 {
 	struct registered_cb *r;
 
@@ -264,9 +263,14 @@ rte_pmd_mlx5_driver_event_cb_register(rte_pmd_mlx5_driver_event_callback_t cb, v
 	return 0;
 }
 
-RTE_EXPORT_SYMBOL(rte_pmd_mlx5_driver_event_cb_unregister)
-int
-rte_pmd_mlx5_driver_event_cb_unregister(rte_pmd_mlx5_driver_event_callback_t cb)
+RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_pmd_mlx5_driver_event_cb_register,
+				(rte_pmd_mlx5_driver_event_callback_t cb, void *opaque))
+{
+	return rte_pmd_mlx5_driver_event_cb_register(cb, opaque);
+}
+
+RTE_DEFAULT_SYMBOL(26, int, rte_pmd_mlx5_driver_event_cb_unregister,
+		   (rte_pmd_mlx5_driver_event_callback_t cb))
 {
 	struct registered_cb *r;
 	bool found = false;
@@ -289,6 +293,12 @@ rte_pmd_mlx5_driver_event_cb_unregister(rte_pmd_mlx5_driver_event_callback_t cb)
 	return 0;
 }
 
+RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_pmd_mlx5_driver_event_cb_unregister,
+				(rte_pmd_mlx5_driver_event_callback_t cb))
+{
+	return rte_pmd_mlx5_driver_event_cb_unregister(cb);
+}
+
 RTE_FINI(rte_pmd_mlx5_driver_event_cb_cleanup) {
 	struct registered_cb *r;
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a95dd9dc94..4b984df892 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -12506,9 +12506,7 @@ flow_disable_steering_run_on_related(struct rte_eth_dev *dev,
 	}
 }
 
-RTE_EXPORT_SYMBOL(rte_pmd_mlx5_disable_steering)
-void
-rte_pmd_mlx5_disable_steering(void)
+RTE_DEFAULT_SYMBOL(26, void, rte_pmd_mlx5_disable_steering, (void))
 {
 	uint16_t port_id;
 
@@ -12532,9 +12530,12 @@ rte_pmd_mlx5_disable_steering(void)
 	mlx5_steering_disabled = true;
 }
 
-RTE_EXPORT_SYMBOL(rte_pmd_mlx5_enable_steering)
-int
-rte_pmd_mlx5_enable_steering(void)
+RTE_VERSION_EXPERIMENTAL_SYMBOL(void, rte_pmd_mlx5_disable_steering, (void))
+{
+	rte_pmd_mlx5_disable_steering();
+}
+
+RTE_DEFAULT_SYMBOL(26, int, rte_pmd_mlx5_enable_steering, (void))
 {
 	uint16_t port_id;
 
@@ -12551,6 +12552,11 @@ rte_pmd_mlx5_enable_steering(void)
 	return 0;
 }
 
+RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_pmd_mlx5_enable_steering, (void))
+{
+	return rte_pmd_mlx5_enable_steering();
+}
+
 bool
 mlx5_vport_rx_metadata_passing_enabled(const struct mlx5_dev_ctx_shared *sh)
 {
-- 
2.47.3


^ permalink raw reply related

* [PATCH v3 2/4] build: support function versioning for drivers
From: Dariusz Sosnowski @ 2026-06-25 13:33 UTC (permalink / raw)
  To: David Marchand, Bruce Richardson; +Cc: dev, Yu Jiang
In-Reply-To: <20260625133311.1299705-1-dsosnowski@nvidia.com>

Add support for enabling function versioning
(through use_function_versioning meson variable) for drivers,
similar to libraries.

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
 drivers/meson.build | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/meson.build b/drivers/meson.build
index 4d95604ecd..8f3ab490ee 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -171,6 +171,7 @@ foreach subpath:subdirs
         pkgconfig_extra_libs = []
         testpmd_sources = []
         require_iova_in_mbuf = true
+        use_function_versioning = false
         # for handling base code files which may need extra cflags
         base_sources = []
         base_cflags = []
@@ -273,6 +274,13 @@ foreach subpath:subdirs
         endif
         dpdk_conf.set(lib_name.to_upper(), 1)
 
+        if developer_mode and is_windows and use_function_versioning
+            message('@0@: Function versioning is not supported by Windows.'.format(name))
+        endif
+        if use_function_versioning
+            cflags += '-DRTE_USE_FUNCTION_VERSIONING'
+        endif
+
         dpdk_extra_ldflags += pkgconfig_extra_libs
 
         dpdk_headers += headers
@@ -363,7 +371,18 @@ foreach subpath:subdirs
                     depends: [version_map])
         endif
 
-        shared_lib = shared_library(lib_name, sources_pmd_info,
+        if not use_function_versioning or is_windows
+            # Use pre-built objects and pmdinfo sources to build shared library.
+            shared_sources = sources_pmd_info
+        else
+            # For compat we need to rebuild with RTE_BUILD_SHARED_LIB defined.
+            # Use original sources and pmdinfo sources.
+            cflags += '-DRTE_BUILD_SHARED_LIB'
+            shared_sources = sources + sources_pmd_info
+            objs = []
+        endif
+
+        shared_lib = shared_library(lib_name, shared_sources,
                 objects: objs,
                 include_directories: includes,
                 dependencies: shared_deps,
-- 
2.47.3


^ permalink raw reply related

* [PATCH v3 1/4] eal: fix macro for versioned experimental symbol
From: Dariusz Sosnowski @ 2026-06-25 13:33 UTC (permalink / raw)
  To: David Marchand, Bruce Richardson; +Cc: dev, Yu Jiang
In-Reply-To: <20260625133311.1299705-1-dsosnowski@nvidia.com>

Add a missing semicolon after __asm__ block in
RTE_VERSION_EXPERIMENTAL_SYMBOL macro.
It's lack triggers the following compilation error with clang:

    ../lib/ethdev/rte_flow.c:320:1: error: expected ';' after top-level asm block
      320 | RTE_VERSION_EXPERIMENTAL_SYMBOL(int, rte_flow_dynf_metadata_register, (void))
          | ^
    ../lib/eal/common/eal_export.h:75:74: note: expanded from macro 'RTE_VERSION_EXPERIMENTAL_SYMBOL'
       75 | __asm__(".symver " RTE_STR(name) "_exp, " RTE_STR(name) "@EXPERIMENTAL") \
          |                                                                          ^
    ../lib/eal/include/rte_common.h:237:20: note: expanded from macro '\
    __rte_used'
      237 | #define __rte_used __attribute__((used))
          |                    ^

Fixes: e30e194c4d06 ("eal: rework function versioning macros")
Cc: david.marchand@redhat.com

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Reviewed-by: David Marchand <david.marchand@redhat.com>
---
 lib/eal/common/eal_export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/common/eal_export.h b/lib/eal/common/eal_export.h
index 888fd9f9ed..7971bf8d7a 100644
--- a/lib/eal/common/eal_export.h
+++ b/lib/eal/common/eal_export.h
@@ -72,7 +72,7 @@ __rte_used type name ## _v ## ver args; \
 type name ## _v ## ver args
 
 #define RTE_VERSION_EXPERIMENTAL_SYMBOL(type, name, args) VERSIONING_WARN \
-__asm__(".symver " RTE_STR(name) "_exp, " RTE_STR(name) "@EXPERIMENTAL") \
+__asm__(".symver " RTE_STR(name) "_exp, " RTE_STR(name) "@EXPERIMENTAL"); \
 __rte_used type name ## _exp args; \
 type name ## _exp args
 
-- 
2.47.3


^ permalink raw reply related

* [PATCH v3 0/4] add versioned symbols for recently stabilized APIs
From: Dariusz Sosnowski @ 2026-06-25 13:33 UTC (permalink / raw)
  To: David Marchand, Bruce Richardson, Thomas Monjalon,
	Andrew Rybchenko, Viacheslav Ovsiienko, Bing Zhao, Ori Kam,
	Suanming Mou, Matan Azrad
  Cc: dev, Yu Jiang
In-Reply-To: <20260624131337.1127323-1-dsosnowski@nvidia.com>

Main goal of this patchset is to address https://bugs.dpdk.org/show_bug.cgi?id=1957
but it also handles other recently stabilized symbols and has some minor fixes:

- Patch 1 - Fix RTE_VERSION_EXPERIMENTAL_SYMBOL macro on clang.
- Patch 2 - Allow function versioning inside drivers.
- Patch 3 - Version the function symbols stabilized in
  https://git.dpdk.org/dpdk/commit/?id=e8cab133645f5466ef75e511629add43b68a5027
- Patch 4 - Version the rte_flow_dynf_metadata_register() function stabilized in
  https://git.dpdk.org/dpdk/commit/?id=4ee2f5c1cedf9ee7f39afa667f71b07f4004ba5c
  Restore EXPERIMENTAL version on global variable symbols
  rte_flow_dynf_metadata_offs and rte_flow_dynf_metadata_mask.

v3:
- Added rebuilding of drivers with -DRTE_BUILD_SHARED_LIB
  whenever function versioning is enabled.

v2:
- https://inbox.dpdk.org/dev/20260624131337.1127323-1-dsosnowski@nvidia.com/
- Drop patches introducing versioning macros for symbol aliases
  and their usage (patch 4 and 5 from v1)
- EXPERIMENTAL version on global variable symbols
  rte_flow_dynf_metadata_offs and rte_flow_dynf_metadata_mask,
  as discussed under v1.
- Change commit title prefix in patch (2) from "drivers" to "build".

v1: https://inbox.dpdk.org/dev/20260623113752.1100072-1-dsosnowski@nvidia.com/

Dariusz Sosnowski (4):
  eal: fix macro for versioned experimental symbol
  build: support function versioning for drivers
  net/mlx5: fix stabilized function versions
  ethdev: fix promoted flow metadata symbols

 drivers/meson.build                  | 21 ++++++++++++++++++++-
 drivers/net/mlx5/meson.build         |  2 ++
 drivers/net/mlx5/mlx5_driver_event.c | 22 ++++++++++++++++------
 drivers/net/mlx5/mlx5_flow.c         | 18 ++++++++++++------
 lib/eal/common/eal_export.h          |  2 +-
 lib/ethdev/meson.build               |  2 ++
 lib/ethdev/rte_flow.c                | 13 ++++++++-----
 7 files changed, 61 insertions(+), 19 deletions(-)

--
2.47.3


^ permalink raw reply

* [PATCH v8 23/23] net/sxe2: update sxe2 feature matrix docs
From: liujie5 @ 2026-06-25 13:31 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260625055021.63243-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

Update the sxe2.ini feature sheet to accurately reflect the recently
implemented hardware capabilities in the sxe2 PMD.

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 doc/guides/nics/features/sxe2.ini |  56 ++++++++++
 doc/guides/nics/sxe2.rst          | 168 ++++++++++++++++++++++++++++++
 2 files changed, 224 insertions(+)

diff --git a/doc/guides/nics/features/sxe2.ini b/doc/guides/nics/features/sxe2.ini
index 09ba2f558c..3c1e6a8a39 100644
--- a/doc/guides/nics/features/sxe2.ini
+++ b/doc/guides/nics/features/sxe2.ini
@@ -7,17 +7,73 @@
 ; is selected.
 ;
 [Features]
+Speed capabilities   = Y
+Link status          = Y
+Link status event    = Y
+Rx interrupt         = Y
 Fast mbuf free       = P
 Free Tx mbuf on demand = Y
 Burst mode info      = Y
 Queue start/stop     = Y
+Power mgmt address monitor = Y
 Buffer split on Rx   = P
 Scattered Rx         = Y
+Traffic manager      = Y
 CRC offload          = Y
+VLAN offload         = Y
+QinQ offload         = P
 L3 checksum offload  = Y
 L4 checksum offload  = Y
+Timestamp offload    = P
+Inner L3 checksum    = P
+Inner L4 checksum    = P
 Rx descriptor status = Y
 Tx descriptor status = Y
+MTU update           = Y
+TSO                  = P
+Promiscuous mode     = Y
+Allmulticast mode    = Y
+Unicast MAC filter   = Y
+RSS hash             = Y
+RSS key update       = Y
+RSS reta update      = Y
+VLAN filter          = Y
+Inline crypto        = Y
+Packet type parsing  = Y
+Timesync             = Y
+Basic stats          = Y
+Extended stats       = Y
+FW version           = Y
+Module EEPROM dump   = Y
+Multiprocess aware   = Y
 Linux                = Y
 x86-32               = Y
 x86-64               = Y
+
+[rte_flow items]
+eth                  = P
+geneve               = Y
+gre                  = Y
+gtpu                 = Y
+ipv4                 = Y
+ipv6                 = Y
+ipv6_frag_ext        = Y
+nvgre                = Y
+sctp                 = Y
+tcp                  = Y
+udp                  = Y
+vlan                 = P
+vxlan                = Y
+vxlan_gpe            = Y
+
+[rte_flow actions]
+count                = Y
+drop                 = Y
+mark                 = Y
+passthru             = Y
+port_representor     = Y
+queue                = Y
+represented_port     = Y
+rss                  = Y
+send_to_kernel       = Y
+port_id              = Y
diff --git a/doc/guides/nics/sxe2.rst b/doc/guides/nics/sxe2.rst
index 539072b076..51110629d8 100644
--- a/doc/guides/nics/sxe2.rst
+++ b/doc/guides/nics/sxe2.rst
@@ -35,3 +35,171 @@ preventing unauthorized access to random physical memory.
 This capability allows the PMD to coexist with kernel network interfaces
 which remain functional, although they stop receiving unicast packets
 as long as they share the same MAC address.
+
+Configuration
+-------------
+
+Runtime Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+- ``Traffic Management Scheduling Levels``
+
+  The DPDK Traffic Management (rte_tm) APIs can be used to configure the Tx scheduler on the NIC.
+  The ``sched-layer-mode`` parameter can be used to set the number of scheduling levels
+  in the transmit scheduling hierarchy.
+  The provided value must be between 0 and 3.
+  If the value provided is greater than the number of levels supported by the HW,
+  the driver will use the hardware maximum value.
+
+- ``flow-duplicate-pattern`` parameter [int]
+
+  There are three options to choose:
+
+  - 0. Prevent insertion of flow rules with the same pattern items.
+    In this case, duplicate rules are rejected and error code EEXIST is returned.
+
+  - 1. Allow duplicate rules with the same pattern items.
+    The last added rule takes effect.
+    If the current active rule is deleted, the second-to-last added rule takes effect, and so on.
+
+  - 2. Allow duplicate rules with the same pattern items.
+    The first added rule takes effect.
+    If the current active rule is deleted, the second added rule takes effect, and so on.
+
+  This option only applies to the switch engine flow type.
+  For the Fnav flow engine type, duplicate rules are always rejected.
+
+  By default, the PMD will set this value to 1 (last added rule takes effect).
+
+
+- ``fnav-stat-type`` parameter [int]
+
+  This parameter controls the Fnav flow engine statistics type used
+  for flow rule hit counting (via ``rte_flow_query``).
+
+  - 1: Only count the number of packets.
+  - 2: Only count the number of bytes.
+  - 3: Count both packets and bytes (default).
+
+  Default value is 3 (count both packets and bytes).
+
+- ``drv-sw-stats`` parameter [int]
+
+  This parameter controls whether per-packet software statistics
+  (SW stats) are collected in the Rx data path.
+
+  Hardware packet statistic counters may be inaccurate for certain
+  packet types due to hardware design limitations.
+  When accuracy of Rx packet classification statistics is critical,
+  enabling this parameter allows the driver to accumulate statistics
+  in software as packets are received, providing an alternative
+  statistical path that bypasses hardware counter inaccuracies.
+
+  - 0: Disable software statistics collection (default).
+    The basic port statistics (``ipackets``, ``ibytes``) are reported
+    from the hardware counters.
+  - 1: Enable software statistics collection.
+    Per-packet software statistics are accumulated for unicast,
+    multicast, broadcast, and dropped packets in the Rx data path.
+
+  When enabled, the following extended statistics (xstats) are available:
+  ``rx_sw_unicast_packets``, ``rx_sw_multicast_packets``,
+  ``rx_sw_broadcast_packets``, ``rx_sw_drop_packets``,
+  and ``rx_sw_drop_bytes``.
+
+- ``no-sched-mode`` parameter [int]
+
+  This parameter enables non-scheduling mode (no-sched mode).
+  When enabled, the transmit path bypasses the hardware scheduling module
+  and packets are sent directly out through the port.
+  This results in lower transmit latency and higher throughput,
+  but Traffic Management (rte_tm) APIs are not supported in this mode.
+
+  - 0: Disable non-scheduling mode (default).
+    The transmit path goes through the hardware scheduling hierarchy.
+    Traffic Management (rte_tm) APIs can be used to configure the Tx scheduler.
+  - 1: Enable non-scheduling mode.
+    The transmit path bypasses the hardware scheduling module.
+    Packets are sent directly from the port at full speed without scheduling.
+    Traffic Management (rte_tm) APIs are not available in this mode.
+
+- ``rx-low-latency`` parameter [int]
+
+  This parameter controls the interrupt throttling (ITR) interval
+  for Rx queue interrupts.
+
+  When enabled, the driver sets a shorter interrupt coalescing timeout
+  (``SXE2_ITR_INTERVAL_LOW``, approximately 1 μs),
+  reducing the time between packet arrival and interrupt delivery to the CPU.
+  This lowers receive latency at the cost of increased CPU interrupt rate.
+
+  When disabled (default), the driver uses the normal interrupt throttling
+  interval (``SXE2_ITR_INTERVAL_NORMAL``, approximately 20 μs),
+  which reduces the CPU interrupt rate at the expense of higher receive latency.
+
+  - 0: Disable Rx low latency (default).
+    Normal interrupt throttling interval (~20 μs) is used.
+  - 1: Enable Rx low latency.
+    Low interrupt throttling interval (~1 μs) is used
+    for reduced receive latency.
+
+- ``function-flow-direct`` parameter [int]
+
+  This parameter controls whether flow rules from different functional units
+  (DPDK vs kernel driver) are isolated or combined when both drivers
+  control the same physical port.
+
+  When the DPDK PMD and the kernel network driver coexist on the same port,
+  flow rules may originate from either driver.
+  This parameter determines how the source VSI (Virtual Switch Interface)
+  of each flow rule is handled during hardware programming.
+
+  - 0 (default): Isolate flow rules between DPDK and kernel.
+    When ``flow_isolated`` is enabled (``rte_flow_isolate()`` called),
+    kernel-side flow rules take priority and DPDK-side flow rules are suppressed.
+    When ``flow_isolated`` is disabled, DPDK-side flow rules take priority
+    and kernel-side flow rules are suppressed.
+    Only one functional unit's flows are active at a time.
+
+  - 1: Allow direct flow rules from both DPDK and kernel simultaneously.
+    Both DPDK and kernel source VSIs are preserved in the hardware flow table.
+    Flow rules from both sides are programmed without isolation.
+
+  This option only applies to FNAV and ACL flow engine types
+  and does not apply to PF bond devices.
+
+Extended Statistics (xstats)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The PMD provides the following extended statistics (xstats) for detailed
+monitoring of receive-side packet classification and software-level accounting.
+The software statistics path is provided as a workaround for hardware
+counter inaccuracies on certain packet types --- it accumulates per-packet
+statistics directly in the Rx data path, ensuring that unicast, multicast,
+broadcast, and drop counts reflect the actual packets processed by the driver.
+
+Receive Software Statistics
+  These counters are collected in the Rx data path when ``drv-sw-stats=1``
+  is configured (see the ``drv-sw-stats`` devarg above).
+  When ``drv-sw-stats`` is disabled (default), these xstats report zero.
+
+  - ``rx_sw_unicast_packets``: Number of unicast packets received.
+  - ``rx_sw_multicast_packets``: Number of multicast packets received.
+  - ``rx_sw_broadcast_packets``: Number of broadcast packets received.
+  - ``rx_sw_drop_packets``: Number of packets dropped in the Rx data path.
+  - ``rx_sw_drop_bytes``: Number of bytes dropped in the Rx data path.
+
+  When ``drv-sw-stats`` is enabled, the basic counters ``ipackets`` and
+  ``ibytes`` (from ``rte_eth_stats``) also reflect the software-accumulated
+  packet and byte counts. Otherwise, they are reported from hardware counters.
+
+Fnav Flow Engine Statistics
+  The Fnav flow engine statistics type is controlled by the ``fnav-stat-type``
+  devarg (see above). Depending on the configuration:
+
+  - ``fnav-stat-type=1``: Only packet count is available.
+  - ``fnav-stat-type=2``: Only byte count is available.
+  - ``fnav-stat-type=3`` (default): Both packet and byte counts are available.
+
+  Flow query results (via ``rte_flow_query``) expose these per-flow counters
+  through the query API, not via xstats.
-- 
2.52.0


^ permalink raw reply related

* [PATCH v8 22/23] net/sxe2: implement private dump info
From: liujie5 @ 2026-06-25 13:31 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260625055021.63243-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

This patch implements the 'eth_dev_priv_dump' ops for the sxe2 PMD.
This interface allows applications to dump driver-specific internal
state and configuration information to a file stream.

The output includes:
- capabilities.
- device base info.
- device args info.
- device filter info.
- reprenstor info.

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 drivers/net/sxe2/meson.build        |   1 +
 drivers/net/sxe2/sxe2_dump.c        | 287 ++++++++++++++++++++++++++++
 drivers/net/sxe2/sxe2_dump.h        |  12 ++
 drivers/net/sxe2/sxe2_ethdev.c      |   3 +
 drivers/net/sxe2/sxe2_ethdev_repr.c |   3 +
 5 files changed, 306 insertions(+)
 create mode 100644 drivers/net/sxe2/sxe2_dump.c
 create mode 100644 drivers/net/sxe2/sxe2_dump.h

diff --git a/drivers/net/sxe2/meson.build b/drivers/net/sxe2/meson.build
index a172bb2867..01366c5378 100644
--- a/drivers/net/sxe2/meson.build
+++ b/drivers/net/sxe2/meson.build
@@ -78,4 +78,5 @@ sources += files(
         'sxe2_flow_parse_pattern.c',
         'sxe2_flow_parse_engine.c',
         'sxe2_txrx_check_mbuf.c',
+        'sxe2_dump.c',
 )
diff --git a/drivers/net/sxe2/sxe2_dump.c b/drivers/net/sxe2/sxe2_dump.c
new file mode 100644
index 0000000000..d43473e083
--- /dev/null
+++ b/drivers/net/sxe2/sxe2_dump.c
@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2025, Wuxi Stars Micro System Technologies Co., Ltd.
+ */
+
+#include <rte_malloc.h>
+#include <arpa/inet.h>
+
+#include "sxe2_common_log.h"
+#include "sxe2_ethdev.h"
+#include "sxe2_dump.h"
+#include "sxe2_stats.h"
+
+static void
+sxe2_dump_dev_feature_capability(FILE *file, struct rte_eth_dev *dev)
+{
+	uint32_t i;
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	const struct {
+		uint32_t cap_bit;
+		const char *name;
+	} caps_name[] = {
+		{SXE2_DEV_CAPS_OFFLOAD_L2, "L2"},
+		{SXE2_DEV_CAPS_OFFLOAD_VLAN, "VLAN"},
+		{SXE2_DEV_CAPS_OFFLOAD_IPSEC, "IPSEC"},
+		{SXE2_DEV_CAPS_OFFLOAD_RSS, "RSS"},
+		{SXE2_DEV_CAPS_OFFLOAD_FNAV, "FNAV"},
+		{SXE2_DEV_CAPS_OFFLOAD_TM, "TM"},
+		{SXE2_DEV_CAPS_OFFLOAD_PTP, "PTP"},
+	};
+	if (adapter->is_dev_repr)
+		goto l_end;
+
+	fprintf(file, "  - Dev Capability:\n");
+	for (i = 0; i < RTE_DIM(caps_name); i++) {
+		fprintf(file, "\t  -- support %s: %s\n", caps_name[i].name,
+			(adapter->cap_flags & caps_name[i].cap_bit) ? "Yes" :
+									 "No");
+	}
+l_end:
+	return;
+}
+
+static void
+sxe2_dump_device_basic_info(FILE *file, struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+
+	fprintf(file,
+		"  - Device Base Info:\n"
+		"\t  -- name: %s\n"
+		"\t  -- pf_idx: %u port_idx: %u\n"
+		"\t  -- tx_mode_flags: 0x%x rx_mode_flags: 0x%x\n"
+		"\t  -- flow_isolate_cfg: 0x%x flow_isolated: 0x%x\n"
+		"\t  -- dev_type: 0x%x is_switchdev: 0x%x\n"
+		"\t  -- is_dev_repr: 0x%x dev_port_id: 0x%x\n"
+		"\t  -- dev_flags: 0x%x\n"
+		"\t  -- intr_conf lsc: %u rxq: %u rmv: %u\n",
+		dev->data->name,
+		adapter->pf_idx, adapter->port_idx,
+		adapter->tx_mode_flags, adapter->rx_mode_flags,
+		adapter->flow_isolate_cfg, adapter->flow_isolated,
+		adapter->dev_type, adapter->switchdev_info.is_switchdev,
+		adapter->is_dev_repr, adapter->dev_port_id,
+		dev->data->dev_flags,
+		dev->data->dev_conf.intr_conf.lsc,
+		dev->data->dev_conf.intr_conf.rxq,
+		dev->data->dev_conf.intr_conf.rmv);
+}
+
+static void
+sxe2_dump_dev_args_info(FILE *file, struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+
+	if (adapter->is_dev_repr)
+		goto l_end;
+
+	fprintf(file,
+		"  - Device Args Info:\n"
+		"\t  -- no_sched_mode: %s\n"
+		"\t  -- flow-duplicate-pattern: %u\n"
+		"\t  -- fnav-stat-type: %u\n"
+		"\t  -- sched_layer_mode: %u\n"
+		"\t  -- rx_low_latency: %s\n"
+		"\t  -- function-flow-direct: %s\n",
+		adapter->devargs.no_sched_mode ? "On" : "Off",
+		adapter->devargs.flow_dup_pattern_mode,
+		adapter->devargs.fnav_stat_type,
+		adapter->devargs.sched_layer_mode,
+		adapter->devargs.rx_low_latency ? "On" : "Off",
+		adapter->devargs.func_flow_direct_en ? "On" : "Off");
+l_end:
+	return;
+}
+
+static void sxe2_dump_filter_info(FILE *file, struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	struct sxe2_mac_filter *mac_entry;
+	struct sxe2_mac_filter *next_mac_entry;
+	struct sxe2_vlan_filter *vlan_entry;
+	struct sxe2_vlan_filter *next_vlan_entry;
+
+	if (adapter->is_dev_repr)
+		goto l_end;
+
+	fprintf(file,
+		"  - Device Filter Info:\n"
+		"\t  -- cur_promisc:0x%x hw_promisc:0x%x\n"
+		"\t  -- unicast_num: %u multicast_num: %u\n"
+		"\t  -- vlan_num: %u filter_on: %u hw_filter_on: %u\n"
+		"\t  -- vlan max_cnt: %u cnt: %u\n"
+		"\t  -- tpid: 0x%x vid: 0x%x\n"
+		"\t  -- vlan_outer_insert: 0x%x vlan_outer_strip: 0x%x\n"
+		"\t  -- vlan_inner_insert: 0x%x vlan_inner_strip: 0x%x\n",
+		adapter->filter_ctxt.cur_promisc_flags,
+		adapter->filter_ctxt.hw_promisc_flags,
+		adapter->filter_ctxt.uc_num,
+		adapter->filter_ctxt.mc_num,
+		adapter->filter_ctxt.vlan_num,
+		adapter->filter_ctxt.vlan_info.filter_on,
+		adapter->filter_ctxt.vlan_info.hw_filter_on,
+		adapter->filter_ctxt.vlan_info.max_cnt,
+		adapter->filter_ctxt.vlan_info.cnt,
+		adapter->filter_ctxt.vlan_info.tpid,
+		adapter->filter_ctxt.vlan_info.vid,
+		adapter->filter_ctxt.vlan_info.outer_insert,
+		adapter->filter_ctxt.vlan_info.outer_strip,
+		adapter->filter_ctxt.vlan_info.inner_insert,
+		adapter->filter_ctxt.vlan_info.inner_strip);
+
+	if (adapter->filter_ctxt.uc_num > 0) {
+		fprintf(file,
+			"\t  -- Unicast entry:\n");
+		RTE_TAILQ_FOREACH_SAFE(mac_entry, &adapter->filter_ctxt.uc_list, next,
+				       next_mac_entry) {
+			fprintf(file,
+				"\t  -- addr: %02x:%02x:%02x:%02x:%02x:%02x hw status:%u "
+				"default:%u\n",
+				mac_entry->mac_addr.addr_bytes[0],
+				mac_entry->mac_addr.addr_bytes[1],
+				mac_entry->mac_addr.addr_bytes[2],
+				mac_entry->mac_addr.addr_bytes[3],
+				mac_entry->mac_addr.addr_bytes[4],
+				mac_entry->mac_addr.addr_bytes[5],
+				mac_entry->hw_config,
+				mac_entry->default_config);
+		}
+	}
+
+	if (adapter->filter_ctxt.mc_num > 0) {
+		fprintf(file,
+			"\t  -- Multicast entry:\n");
+		RTE_TAILQ_FOREACH_SAFE(mac_entry, &adapter->filter_ctxt.mc_list,
+				       next, next_mac_entry) {
+			fprintf(file,
+				"\t  -- addr: %02x:%02x:%02x:%02x:%02x:%02x "
+				"hw status:%u default:%u\n",
+				mac_entry->mac_addr.addr_bytes[0],
+				mac_entry->mac_addr.addr_bytes[1],
+				mac_entry->mac_addr.addr_bytes[2],
+				mac_entry->mac_addr.addr_bytes[3],
+				mac_entry->mac_addr.addr_bytes[4],
+				mac_entry->mac_addr.addr_bytes[5],
+				mac_entry->hw_config,
+				mac_entry->default_config);
+		}
+	}
+
+	if (adapter->filter_ctxt.vlan_num > 0) {
+		fprintf(file,
+			"\t  -- Vlan entry:\n");
+		RTE_TAILQ_FOREACH_SAFE(vlan_entry, &adapter->filter_ctxt.vlan_list,
+			next, next_vlan_entry) {
+			fprintf(file,
+				"\t  -- vlan tpid:0x%04x vid:0x%04x prio:%d "
+				"hw status:%u default:%u\n",
+				vlan_entry->vlan_info.tpid,
+				vlan_entry->vlan_info.vid,
+				vlan_entry->vlan_info.prio,
+				vlan_entry->hw_config,
+				vlan_entry->default_config);
+		}
+	}
+l_end:
+	return;
+}
+
+static const char *sxe2_vsi_id_str(uint16_t vsi_id, char *buf, size_t len)
+{
+	if (vsi_id == SXE2_INVALID_VSI_ID)
+		return "NA";
+
+	snprintf(buf, len, "%u", vsi_id);
+	return buf;
+}
+
+static void
+sxe2_dump_switchdev_info(FILE *file, struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	uint32_t idx;
+	char k_vsi_buf[16];
+	char u_vsi_buf[16];
+
+	if (adapter->is_dev_repr && adapter->repr_priv_data) {
+		fprintf(file,
+			"  - Reprenstor Info:\n"
+			"\t  -- repr_id: %u\n"
+			"\t  -- repr_q_id: %u\n"
+			"\t  -- repr_pf_id: %u\n"
+			"\t  -- repr_vf_id: %u\n"
+			"\t  -- repr_vf_vsi_id: %u\n"
+			"\t  -- repr_vf_k_vsi_id: %s\n"
+			"\t  -- repr_vf_u_vsi_id: %s\n",
+			adapter->repr_priv_data->repr_id,
+			adapter->repr_priv_data->repr_q_id,
+			adapter->repr_priv_data->repr_pf_id,
+			adapter->repr_priv_data->repr_vf_id,
+			adapter->repr_priv_data->repr_vf_vsi_id,
+			sxe2_vsi_id_str(adapter->repr_priv_data->repr_vf_k_vsi_id,
+					k_vsi_buf, sizeof(k_vsi_buf)),
+			sxe2_vsi_id_str(adapter->repr_priv_data->repr_vf_u_vsi_id,
+					u_vsi_buf, sizeof(u_vsi_buf)));
+		goto l_end;
+	}
+	if (adapter->switchdev_info.is_switchdev) {
+		fprintf(file,
+			"  - Switchdev Info:\n"
+			"\t  -- primary:0x%x\n"
+			"\t  -- representor: 0x%x\n"
+			"\t  -- port_name_type: 0x%x\n"
+			"\t  -- nb_vf: %u nb_repr_vf: %u\n",
+			adapter->switchdev_info.primary,
+			adapter->switchdev_info.representor,
+			adapter->switchdev_info.port_name_type,
+			adapter->repr_ctxt.nb_vf,
+			adapter->repr_ctxt.nb_repr_vf);
+		if (adapter->repr_ctxt.nb_vf > 0) {
+			fprintf(file,
+				"\t  -- vf entry:\n");
+			for (idx = 0; idx < adapter->repr_ctxt.nb_vf; idx++) {
+				fprintf(file,
+					"\t  -- func_id:%u vsi_type:%u kernel_vsi_id:%u dpdk_vsi_id:%u\n",
+					adapter->repr_ctxt.repr_vf_id[idx].func_id,
+					adapter->repr_ctxt.repr_vf_id[idx].vsi_type,
+					adapter->repr_ctxt.repr_vf_id[idx].kernel_vsi_id,
+					adapter->repr_ctxt.repr_vf_id[idx].dpdk_vsi_id);
+			}
+		}
+	}
+
+l_end:
+	return;
+}
+
+int32_t sxe2_eth_dev_priv_dump(struct rte_eth_dev *dev, FILE *file)
+{
+	char *buf = NULL;
+	size_t size = 0;
+	FILE *str;
+	int32_t ret = -1;
+
+	str = open_memstream(&buf, &size);
+	if (!str) {
+		PMD_LOG_ERR(DRV, "fopen fail.");
+		goto l_end;
+	}
+
+	sxe2_dump_dev_feature_capability(str, dev);
+	sxe2_dump_device_basic_info(str, dev);
+	sxe2_dump_dev_args_info(str, dev);
+	sxe2_dump_filter_info(str, dev);
+	sxe2_dump_switchdev_info(str, dev);
+
+	(void)fflush(str);
+
+	(void)fwrite(buf, 1, size, file);
+	(void)fflush(file);
+
+	ret = 0;
+
+	(void)fclose(str);
+	free(buf);
+l_end:
+	return ret;
+}
diff --git a/drivers/net/sxe2/sxe2_dump.h b/drivers/net/sxe2/sxe2_dump.h
new file mode 100644
index 0000000000..05d6db9b3d
--- /dev/null
+++ b/drivers/net/sxe2/sxe2_dump.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2025, Wuxi Stars Micro System Technologies Co., Ltd.
+ */
+
+#ifndef __SXE2_DUMP_H__
+#define __SXE2_DUMP_H__
+
+#include <ethdev_driver.h>
+
+int32_t sxe2_eth_dev_priv_dump(struct rte_eth_dev *dev, FILE *file);
+
+#endif /* __SXE2_DUMP_H__ */
diff --git a/drivers/net/sxe2/sxe2_ethdev.c b/drivers/net/sxe2/sxe2_ethdev.c
index 85b3ad7d40..2f946d45d7 100644
--- a/drivers/net/sxe2/sxe2_ethdev.c
+++ b/drivers/net/sxe2/sxe2_ethdev.c
@@ -37,6 +37,7 @@
 #include "sxe2_host_regs.h"
 #include "sxe2_switchdev.h"
 #include "sxe2_ioctl_chnl_func.h"
+#include "sxe2_dump.h"
 #include "sxe2_ethdev_repr.h"
 #include "sxe2vf_regs.h"
 #include "sxe2_switchdev.h"
@@ -202,6 +203,8 @@ static const struct eth_dev_ops sxe2_eth_dev_ops = {
 
 	.get_module_info            = sxe2_get_module_info,
 	.get_module_eeprom          = sxe2_get_module_eeprom,
+
+	.eth_dev_priv_dump          = sxe2_eth_dev_priv_dump,
 };
 
 static int32_t sxe2_dev_configure(struct rte_eth_dev *dev)
diff --git a/drivers/net/sxe2/sxe2_ethdev_repr.c b/drivers/net/sxe2/sxe2_ethdev_repr.c
index 15b839bb74..f32318b731 100644
--- a/drivers/net/sxe2/sxe2_ethdev_repr.c
+++ b/drivers/net/sxe2/sxe2_ethdev_repr.c
@@ -11,6 +11,7 @@
 #include "sxe2_txrx.h"
 #include "sxe2_switchdev.h"
 #include "sxe2_mp.h"
+#include "sxe2_dump.h"
 #include "sxe2_stats.h"
 #include "sxe2_flow.h"
 
@@ -236,6 +237,8 @@ static const struct eth_dev_ops sxe2_switchdev_repr_dev_ops = {
 	.allmulticast_enable        = sxe2_repr_allmulti_enable,
 	.allmulticast_disable       = sxe2_repr_allmulti_disable,
 
+	.eth_dev_priv_dump          = sxe2_eth_dev_priv_dump,
+
 	.stats_get                  = sxe2_stats_info_get,
 	.stats_reset                = sxe2_stats_info_reset,
 	.xstats_get                 = sxe2_xstats_info_get,
-- 
2.52.0


^ permalink raw reply related

* [PATCH v8 21/23] net/sxe2: add private devargs parsing
From: liujie5 @ 2026-06-25 13:30 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260625055021.63243-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

Introduce runtime private device arguments (devargs) support for the
sxe2 PMD. This allows users to customize driver behavior at startup
without recompiling the source code.

The parameters are parsed using the standard 'rte_kvargs' library during
the PCI/vdev probing phase. Documentation for these parameters is also
updated.

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 drivers/net/sxe2/sxe2_cmd_chnl.c |  21 +++
 drivers/net/sxe2/sxe2_cmd_chnl.h |   3 +
 drivers/net/sxe2/sxe2_drv_cmd.h  |  17 +++
 drivers/net/sxe2/sxe2_ethdev.c   | 241 +++++++++++++++++++++++++++++++
 drivers/net/sxe2/sxe2_ethdev.h   |   6 +
 drivers/net/sxe2/sxe2_flow.c     |   9 +-
 drivers/net/sxe2/sxe2_irq.c      |  30 ++++
 drivers/net/sxe2/sxe2_rx.c       |  12 ++
 8 files changed, 336 insertions(+), 3 deletions(-)

diff --git a/drivers/net/sxe2/sxe2_cmd_chnl.c b/drivers/net/sxe2/sxe2_cmd_chnl.c
index 43e8c59487..b09989fe50 100644
--- a/drivers/net/sxe2/sxe2_cmd_chnl.c
+++ b/drivers/net/sxe2/sxe2_cmd_chnl.c
@@ -99,6 +99,27 @@ int32_t sxe2_drv_dev_info_get(struct sxe2_adapter *adapter,
 	return ret;
 }
 
+int32_t sxe2_drv_fc_state_get(struct sxe2_adapter *adapter,
+			      struct sxe2_drv_vsi_fc_get_resp *dev_fc_state_resp)
+{
+	int32_t ret = 0;
+	struct sxe2_common_device *cdev = adapter->cdev;
+	struct sxe2_drv_cmd_params param = {0};
+	struct sxe2_drv_vsi_fc_get_req req = {0};
+
+	req.vsi_id = adapter->vsi_ctxt.main_vsi->vsi_id;
+	sxe2_drv_cmd_params_fill(adapter, &param, SXE2_DRV_CMD_VSI_FC_GET,
+				&req, sizeof(req),
+				dev_fc_state_resp,
+				sizeof(*dev_fc_state_resp));
+	ret = sxe2_drv_cmd_exec(cdev, &param);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, DRV, "get fc state failed, ret=%d", ret);
+		ret = -EIO;
+	}
+	return ret;
+}
+
 int32_t sxe2_drv_dev_fw_info_get(struct sxe2_adapter *adapter,
 				struct sxe2_drv_dev_fw_info_resp *dev_fw_info_resp)
 {
diff --git a/drivers/net/sxe2/sxe2_cmd_chnl.h b/drivers/net/sxe2/sxe2_cmd_chnl.h
index 988d4b458b..d63caad526 100644
--- a/drivers/net/sxe2/sxe2_cmd_chnl.h
+++ b/drivers/net/sxe2/sxe2_cmd_chnl.h
@@ -99,6 +99,9 @@ int32_t sxe2_drv_vsi_stats_reset(struct sxe2_adapter *adapter);
 int32_t sxe2_drv_queue_info_get_update(struct sxe2_adapter *adapter,
 				       struct eth_queue_stats *qstats);
 
+int32_t sxe2_drv_fc_state_get(struct sxe2_adapter *adapter,
+			      struct sxe2_drv_vsi_fc_get_resp *dev_fc_state_resp);
+
 int32_t sxe2_drv_rxq_mapping_set(struct rte_eth_dev *eth_dev, uint16_t queue_id, uint8_t pool_idx);
 
 int32_t sxe2_drv_txq_mapping_set(struct rte_eth_dev *eth_dev, uint16_t queue_id, uint8_t pool_idx);
diff --git a/drivers/net/sxe2/sxe2_drv_cmd.h b/drivers/net/sxe2/sxe2_drv_cmd.h
index 3fabf351af..03ef3b315d 100644
--- a/drivers/net/sxe2/sxe2_drv_cmd.h
+++ b/drivers/net/sxe2/sxe2_drv_cmd.h
@@ -651,6 +651,23 @@ struct __rte_aligned(4) __rte_packed_begin sxe2_drv_sfp_resp {
 	uint8_t data[];
 } __rte_packed_end;
 
+enum sxe2_fc_type {
+	SXE2_FC_T_DIS = 0,
+	SXE2_FC_T_LFC,
+	SXE2_FC_T_PFC,
+	SXE2_FC_T_UNKNOWN = 255,
+};
+
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_fc_get_req {
+	uint16_t vsi_id;
+	uint8_t rsv[2];
+} __rte_packed_end;
+
+struct __rte_aligned(4) __rte_packed_begin sxe2_drv_vsi_fc_get_resp {
+	uint8_t fc_enable;
+	uint8_t rsv[3];
+} __rte_packed_end;
+
 enum sxe2_drv_cmd_module {
 	SXE2_DRV_CMD_MODULE_HANDSHAKE = 0,
 	SXE2_DRV_CMD_MODULE_DEV = 1,
diff --git a/drivers/net/sxe2/sxe2_ethdev.c b/drivers/net/sxe2/sxe2_ethdev.c
index e881cc2af6..85b3ad7d40 100644
--- a/drivers/net/sxe2/sxe2_ethdev.c
+++ b/drivers/net/sxe2/sxe2_ethdev.c
@@ -67,6 +67,15 @@ static const struct rte_pci_id pci_id_sxe2_tbl[] = {
 	{ .vendor_id = 0, },
 };
 
+#define SXE2_TXSCH_NODE_ADJ_LVL_MAX 3
+
+#define SXE2_DEVARG_FLOW_DUP_PATTERN_MODE "flow-duplicate-pattern"
+#define SXE2_DEVARG_FUNC_FLOW_DIRCT "function-flow-direct"
+#define SXE2_DEVARG_FNAV_STAT_TYPE "fnav-stat-type"
+#define SXE2_DEVARG_NO_SCHED_MODE "no-sched-mode"
+#define SXE2_DEVARG_SCHED_LAYER_MODE "sched-layer-mode"
+#define SXE2_DEVARG_RX_LOW_LATENCY "rx-low-latency"
+
 static struct sxe2_pci_map_addr_info sxe2_net_map_addr_info_pf[SXE2_PCI_MAP_RES_MAX_COUNT] = {
 	[SXE2_PCI_MAP_RES_INVALID] = {.addr_base = 0,
 				      .bar_idx = 0,
@@ -960,6 +969,124 @@ sxe2_buffer_split_supported_hdr_ptypes_get(struct rte_eth_dev *dev __rte_unused,
 	return ptypes;
 }
 
+static int32_t sxe2_parse_fnav_stat_type(const char *key, const char *value, void *args)
+{
+	int32_t ret = -EINVAL;
+	uint8_t *num = (uint8_t *)args;
+	unsigned long fnav_stat_type;
+	char *endptr = NULL;
+
+	if (value == NULL || args == NULL) {
+		ret = 0;
+		goto l_end;
+	}
+	errno = 0;
+	fnav_stat_type = strtoul(value, &endptr, 10);
+	if (errno != 0 || endptr == value || *endptr != '\0') {
+		PMD_LOG_WARN(INIT, "%s: \"%s\" is not a valid int value.",
+			key, value);
+		goto l_end;
+	}
+	if (fnav_stat_type > SXE2_FNAV_STAT_ENA_ALL ||
+		fnav_stat_type == SXE2_FNAV_STAT_ENA_NONE) {
+		PMD_LOG_ERR(INIT, "%s: \"%s\" out of range [1-3].",
+			key, value);
+		goto l_end;
+	}
+	*num = (uint8_t)fnav_stat_type;
+	ret = 0;
+l_end:
+	return ret;
+}
+
+static int32_t sxe2_parse_sched_layer_mode(const char *key, const char *value, void *args)
+{
+	int32_t ret = -EINVAL;
+	uint8_t *num = (uint8_t *)args;
+	unsigned long sched_layer_mode;
+	char *endptr = NULL;
+
+	if (value == NULL || args == NULL) {
+		ret = 0;
+		goto l_end;
+	}
+	errno = 0;
+	sched_layer_mode = strtoul(value, &endptr, 10);
+	if (errno != 0 || endptr == value || *endptr != '\0') {
+		PMD_LOG_WARN(INIT, "%s: \"%s\" is not a valid int value.",
+			key, value);
+		goto l_end;
+	}
+	if (sched_layer_mode > SXE2_TXSCH_NODE_ADJ_LVL_MAX) {
+		PMD_LOG_ERR(INIT, "%s: \"%s\" > 3.",
+			key, value);
+		goto l_end;
+	}
+	*num = (uint8_t)sched_layer_mode;
+	ret = 0;
+l_end:
+	return ret;
+}
+
+static int32_t sxe2_parse_flow_dup_pattern_mode(const char *key, const char *value, void *args)
+{
+	uint8_t *num = (uint8_t *)args;
+	char *end;
+	unsigned long val;
+	int32_t ret = -EINVAL;
+
+	if (value == NULL || args == NULL) {
+		ret = 0;
+		goto l_end;
+	}
+	errno = 0;
+	val = strtoul(value, &end, 10);
+	if (errno != 0 || end == value || *end != '\0') {
+		PMD_LOG_ERR(INIT, "Invalid 8-bit integer value for key %s: %s", key, value);
+		goto l_end;
+	}
+
+	if (val >= SXE2_FLOW_SW_PATTERN_MAX) {
+		PMD_LOG_ERR(INIT, "%s: \"%s\" out of range [0-%u].",
+			key, value, SXE2_FLOW_SW_PATTERN_MAX - 1);
+		goto l_end;
+	}
+
+	*num = (uint8_t)val;
+	ret = 0;
+l_end:
+	return ret;
+}
+
+static int32_t sxe2_parse_bool(const char *key, const char *value, void *args)
+{
+	int32_t ret = -EINVAL;
+	uint8_t *num = (uint8_t *)args;
+	unsigned long bool_val;
+	char *endptr = NULL;
+
+	if (value == NULL || args == NULL) {
+		ret = 0;
+		goto l_end;
+	}
+	errno = 0;
+	bool_val = strtoul(value, &endptr, 10);
+	if (errno != 0 || endptr == value || *endptr != '\0') {
+		PMD_LOG_WARN(INIT, "%s: \"%s\" is not a valid int value.",
+			key, value);
+		goto l_end;
+	}
+	if (bool_val != 0 && bool_val != 1) {
+		PMD_LOG_ERR(INIT, "%s: \"%s\" out of range [0|1].",
+			key, value);
+		goto l_end;
+	}
+	*num = (uint8_t)bool_val;
+	ret = 0;
+l_end:
+	return ret;
+}
+
 struct sxe2_pci_map_bar_info *sxe2_dev_get_bar_info(struct sxe2_adapter *adapter,
 						    enum sxe2_pci_map_resource res_type)
 {
@@ -1027,6 +1154,67 @@ void *sxe2_pci_map_addr_get(struct sxe2_adapter *adapter,
 	return addr;
 }
 
+static int32_t sxe2_args_parse(struct rte_eth_dev *dev, struct sxe2_dev_kvargs_info *kvargs)
+{
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	int32_t ret = 0;
+	PMD_INIT_FUNC_TRACE();
+
+	adapter->devargs.flow_dup_pattern_mode = SXE2_FLOW_SW_PATTERN_LAST;
+
+	if (kvargs == NULL)
+		goto l_end;
+	ret = sxe2_kvargs_process(kvargs, SXE2_DEVARG_FNAV_STAT_TYPE,
+				 &sxe2_parse_fnav_stat_type,
+				 &adapter->devargs.fnav_stat_type);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, INIT, "Failed to parse fnav stat type, ret:%d", ret);
+		goto l_end;
+	}
+
+	ret = sxe2_kvargs_process(kvargs, SXE2_DEVARG_NO_SCHED_MODE,
+				 &sxe2_parse_bool,
+				 &adapter->devargs.no_sched_mode);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, INIT, "Failed to parse no sched mode, ret:%d", ret);
+		goto l_end;
+	}
+	ret = sxe2_kvargs_process(kvargs, SXE2_DEVARG_SCHED_LAYER_MODE,
+				 &sxe2_parse_sched_layer_mode,
+				 &adapter->devargs.sched_layer_mode);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, INIT, "Failed to parse sched layer mode, ret:%d", ret);
+		goto l_end;
+	}
+
+	ret = sxe2_kvargs_process(kvargs, SXE2_DEVARG_FLOW_DUP_PATTERN_MODE,
+				 &sxe2_parse_flow_dup_pattern_mode,
+				 &adapter->devargs.flow_dup_pattern_mode);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, INIT, "Failed to parse flow dup pattern mode, ret:%d",
+				ret);
+		goto l_end;
+	}
+
+	ret = sxe2_kvargs_process(kvargs, SXE2_DEVARG_FUNC_FLOW_DIRCT,
+				 &sxe2_parse_bool,
+				 &adapter->devargs.func_flow_direct_en);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, INIT, "Failed to parse function flow rule enable,"
+				"ret:%d", ret);
+		goto l_end;
+	}
+	ret = sxe2_kvargs_process(kvargs, SXE2_DEVARG_RX_LOW_LATENCY,
+				 &sxe2_parse_bool,
+				 &adapter->devargs.rx_low_latency);
+	if (ret) {
+		PMD_DEV_LOG_ERR(adapter, INIT, "Failed to parse rx low latency, ret:%d", ret);
+		goto l_end;
+	}
+l_end:
+	return ret;
+}
+
 static int32_t sxe2_eth_init(struct rte_eth_dev *dev)
 {
 	int32_t ret = 0;
@@ -1579,6 +1767,37 @@ void sxe2_dev_pci_map_uinit(struct rte_eth_dev *dev)
 	adapter->dev_info.dev_data = NULL;
 }
 
+static int32_t sxe2_fc_state_init(struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter =
+		SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	struct sxe2_drv_vsi_fc_get_resp fc_resp = {0};
+	int32_t ret;
+
+	if (!(adapter->cap_flags & SXE2_DEV_CAPS_OFFLOAD_FC_STATE)) {
+		adapter->fc_state_ctx.cfg_state = 0;
+		adapter->fc_state_ctx.curr_state = 0;
+		ret = 0;
+		goto l_end;
+	}
+	ret = sxe2_drv_fc_state_get(adapter, &fc_resp);
+	if (ret) {
+		PMD_LOG_ERR(INIT, "Failed to get fc state, ret=[%d]", ret);
+		goto l_end;
+	}
+	adapter->fc_state_ctx.cfg_state = fc_resp.fc_enable;
+	adapter->fc_state_ctx.curr_state = 0;
+l_end:
+	return ret;
+}
+static void sxe2_fc_state_uinit(struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter =
+		SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	adapter->fc_state_ctx.cfg_state = 0;
+	adapter->fc_state_ctx.curr_state = 0;
+}
+
 uint32_t sxe2_sched_mode_get(struct sxe2_adapter *adapter)
 {
 	uint32_t ret_mode = SXE2_SCHED_MODE_INVALID;
@@ -1661,6 +1880,12 @@ static int32_t sxe2_dev_init(struct rte_eth_dev *dev,
 		goto l_end;
 	}
 
+	ret = sxe2_args_parse(dev, kvargs);
+	if (ret) {
+		PMD_LOG_ERR(INIT, "Failed to parse devargs, ret=%d", ret);
+		goto l_end;
+	}
+
 	ret = sxe2_hw_init(dev);
 	if (ret) {
 		PMD_LOG_ERR(INIT, "Failed to initialize hw, ret=[%d]", ret);
@@ -1727,6 +1952,12 @@ static int32_t sxe2_dev_init(struct rte_eth_dev *dev,
 		goto init_flow_err;
 	}
 
+	ret = sxe2_fc_state_init(dev);
+	if (ret) {
+		PMD_LOG_ERR(INIT, "Failed to init fc state, ret=%d", ret);
+		goto init_fc_state_err;
+	}
+
 	ret = sxe2_sched_init(dev);
 	if (ret) {
 		PMD_LOG_ERR(INIT, "Failed to init sched, ret=%d", ret);
@@ -1750,6 +1981,8 @@ static int32_t sxe2_dev_init(struct rte_eth_dev *dev,
 init_xstats_err:
 	(void)sxe2_sched_uinit(dev);
 init_sched_err:
+	sxe2_fc_state_uinit(dev);
+init_fc_state_err:
 	(void)sxe2_flow_uninit(dev);
 init_flow_err:
 init_rss_err:
@@ -1795,6 +2028,7 @@ static int32_t sxe2_dev_close(struct rte_eth_dev *dev)
 	sxe2_eth_uinit(dev);
 	sxe2_dev_pci_map_uinit(dev);
 	sxe2_free_repr_info(dev);
+	sxe2_fc_state_uinit(dev);
 
 l_end:
 	return 0;
@@ -2100,6 +2334,13 @@ RTE_INIT(rte_sxe2_pmd_init)
 RTE_PMD_EXPORT_NAME(net_sxe2);
 RTE_PMD_REGISTER_PCI_TABLE(net_sxe2, pci_id_sxe2_tbl);
 RTE_PMD_REGISTER_KMOD_DEP(net_sxe2, "* sxe2");
+RTE_PMD_REGISTER_PARAM_STRING(net_sxe2,
+	"flow-duplicate-pattern=<0|1|2> "
+	"function-flow-direct=<0|1> "
+	"fnav-stat-type=<1|2|3> "
+	"no-sched-mode=<0|1> "
+	"sched-layer-mode=<0-3> "
+	"rx-low-latency=<0|1>");
 
 RTE_LOG_REGISTER_SUFFIX(sxe2_log_init, init, NOTICE);
 RTE_LOG_REGISTER_SUFFIX(sxe2_log_driver, driver, NOTICE);
diff --git a/drivers/net/sxe2/sxe2_ethdev.h b/drivers/net/sxe2/sxe2_ethdev.h
index a68b95c0d0..c54e8a435e 100644
--- a/drivers/net/sxe2/sxe2_ethdev.h
+++ b/drivers/net/sxe2/sxe2_ethdev.h
@@ -310,6 +310,11 @@ struct sxe2_filter_context {
 	bool cur_l2_config;
 };
 
+struct sxe2_fc_state_ctxt {
+	uint8_t curr_state;
+	uint8_t cfg_state;
+};
+
 struct sxe2_adapter {
 	struct sxe2_common_device      *cdev;
 	struct sxe2_dev_info            dev_info;
@@ -331,6 +336,7 @@ struct sxe2_adapter {
 	struct sxe2_security_ctx      security_ctx;
 	struct sxe2_repr_context      repr_ctxt;
 	struct sxe2_switchdev_info    switchdev_info;
+	struct sxe2_fc_state_ctxt     fc_state_ctx;
 	bool                          rule_started;
 	bool                          flow_isolated;
 	bool                          flow_isolate_cfg;
diff --git a/drivers/net/sxe2/sxe2_flow.c b/drivers/net/sxe2/sxe2_flow.c
index 63cfc36968..1aa5813ee4 100644
--- a/drivers/net/sxe2/sxe2_flow.c
+++ b/drivers/net/sxe2/sxe2_flow.c
@@ -762,6 +762,7 @@ static int32_t sxe2_flow_validate_with_flow(struct rte_eth_dev *dev,
 					const struct rte_flow_action actions[],
 					struct rte_flow_error *error)
 {
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
 	int32_t ret = 0;
 	struct sxe2_flow *flow = NULL;
 
@@ -804,9 +805,11 @@ static int32_t sxe2_flow_validate_with_flow(struct rte_eth_dev *dev,
 
 	ret = sxe2_flow_check_flow_list_duplicate(dev, flow_list);
 	if (ret != 0) {
-		rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_ITEM,
-				NULL, "Duplicate flow.");
-		PMD_LOG_ERR(DRV, "Duplicate flow.");
+		rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+				   adapter->devargs.flow_dup_pattern_mode ?
+				   "Duplicate flow pattern." :
+				   "Duplicate flow pattern is not allowed.");
+		PMD_LOG_ERR(DRV, "Duplicate flow pattern.");
 		goto l_end;
 	}
 l_end:
diff --git a/drivers/net/sxe2/sxe2_irq.c b/drivers/net/sxe2/sxe2_irq.c
index d8e0b19463..3306504761 100644
--- a/drivers/net/sxe2/sxe2_irq.c
+++ b/drivers/net/sxe2/sxe2_irq.c
@@ -10,6 +10,7 @@
 #include <rte_alarm.h>
 #include <fcntl.h>
 #include <rte_stdatomic.h>
+#include <rte_common.h>
 
 #include "sxe2_ethdev.h"
 #include "sxe2_irq.h"
@@ -47,6 +48,31 @@ static struct sxe2_event_handler event_handler = {
 
 static RTE_ATOMIC(uint32_t)event_thread_run;
 
+static int32_t sxe2_fc_state_callback(struct rte_eth_dev *dev)
+{
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	struct sxe2_drv_vsi_fc_get_resp fc_resp = {0};
+	int32_t ret;
+
+	if (!(adapter->cap_flags & SXE2_DEV_CAPS_OFFLOAD_FC_STATE)) {
+		ret = 0;
+		goto l_end;
+	}
+	ret = sxe2_drv_fc_state_get(adapter, &fc_resp);
+	if (ret) {
+		PMD_LOG_ERR(INIT, "Failed to get fc state, ret=[%d]", ret);
+		goto l_end;
+	}
+	adapter->fc_state_ctx.cfg_state = fc_resp.fc_enable;
+	if (dev->data->dev_started) {
+		PMD_LOG_NOTICE(DRV, "Interrupt event: FC status changed."
+			       "cfg_state:%u curr_state:%u",
+				adapter->fc_state_ctx.cfg_state,
+				adapter->fc_state_ctx.curr_state);
+	}
+l_end:
+	return ret;
+}
 
 static void sxe2_event_irq_common_handler(struct sxe2_adapter *adapter, uint64_t oicr)
 {
@@ -68,6 +94,10 @@ static void sxe2_event_irq_common_handler(struct sxe2_adapter *adapter, uint64_t
 		PMD_DEV_LOG_INFO(adapter, DRV, "event notify legacy");
 		(void)sxe2_switchdev_notify_callback(adapter, false);
 	}
+	if (oicr & RTE_BIT32(SXE2_COM_FC_ST_CHANGE)) {
+		PMD_DEV_LOG_INFO(adapter, DRV, "fc event notify legacy");
+		(void)sxe2_fc_state_callback(dev);
+	}
 }
 
 static uint32_t sxe2_event_intr_handle(void *param __rte_unused)
diff --git a/drivers/net/sxe2/sxe2_rx.c b/drivers/net/sxe2/sxe2_rx.c
index 820d4f0620..d700c60083 100644
--- a/drivers/net/sxe2/sxe2_rx.c
+++ b/drivers/net/sxe2/sxe2_rx.c
@@ -467,12 +467,24 @@ int32_t __rte_cold sxe2_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queu
 int32_t __rte_cold sxe2_rxqs_all_start(struct rte_eth_dev *dev)
 {
 	struct rte_eth_dev_data *data = dev->data;
+	struct sxe2_adapter *adapter = SXE2_DEV_PRIVATE_TO_ADAPTER(dev);
+	struct sxe2_drv_vsi_fc_get_resp fc_resp = {0};
 	struct sxe2_rx_queue *rxq;
 	uint16_t nb_rxq;
 	uint16_t nb_started_rxq;
 	int32_t ret;
 	PMD_INIT_FUNC_TRACE();
 
+	if (adapter->cap_flags & SXE2_DEV_CAPS_OFFLOAD_FC_STATE) {
+		ret = sxe2_drv_fc_state_get(adapter, &fc_resp);
+		if (ret) {
+			PMD_LOG_ERR(RX, "Failed to get fc state, ret=[%d]", ret);
+			goto l_end;
+		}
+		adapter->fc_state_ctx.cfg_state = fc_resp.fc_enable;
+		adapter->fc_state_ctx.curr_state = adapter->fc_state_ctx.cfg_state;
+	}
+
 	for (nb_rxq = 0; nb_rxq < data->nb_rx_queues; nb_rxq++) {
 		rxq = dev->data->rx_queues[nb_rxq];
 		if (!rxq || rxq->rx_deferred_start)
-- 
2.52.0


^ permalink raw reply related

* [PATCH v8 20/23] common/sxe2: add callback for memory event handling
From: liujie5 @ 2026-06-25 13:30 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260625055021.63243-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

During memory hotplug events, the SXE2 driver needs to track memory
segment layout changes to maintain internal DMA mappings. However,
existing memseg walk functions (rte_memseg_walk) acquire memory locks
and cannot be called from within memory event callbacks, leading to
potential deadlocks.

The implementation follows the standard rte_memseg_walk_t prototype,
processing each memseg to update driver-specific data structures.

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 drivers/common/sxe2/sxe2_common.c     | 110 ++++++++++++++++++++++++++
 drivers/common/sxe2/sxe2_common.h     |   2 +
 drivers/common/sxe2/sxe2_ioctl_chnl.c |   2 +-
 3 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/drivers/common/sxe2/sxe2_common.c b/drivers/common/sxe2/sxe2_common.c
index c000a55cd0..5c5db85f29 100644
--- a/drivers/common/sxe2/sxe2_common.c
+++ b/drivers/common/sxe2/sxe2_common.c
@@ -196,6 +196,102 @@ static int32_t sxe2_parse_representor(const char *key, const char *value, void *
 
 	PMD_LOG_INFO(COM, "representor arg %s: \"%s\".", key, value);
 
+l_end:
+	return ret;
+}
+static int32_t sxe2_dma_mem_map(struct sxe2_common_device *cdev,
+				const void *addr, size_t len, bool do_map)
+{
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0;
+	int32_t ret = 0;
+
+	msl = rte_mem_virt2memseg_list(addr);
+	if (msl == NULL) {
+		ret = -EINVAL;
+		PMD_LOG_ERR(COM, "Invalid virt addr=%p.", addr);
+		goto l_end;
+	}
+
+	if ((uintptr_t)addr != RTE_ALIGN((uintptr_t)addr, msl->page_sz) ||
+		(len != RTE_ALIGN(len, msl->page_sz))) {
+		ret = -EINVAL;
+		PMD_LOG_ERR(COM, "Addr=%p and len=%zu not align page size=%" PRIu64 ".",
+			    addr, len, msl->page_sz);
+		goto l_end;
+	}
+
+	/* memsegs are contiguous in memory */
+	ms = rte_mem_virt2memseg(addr, msl);
+	while (cur_len < len) {
+		/* some memory segments may have invalid IOVA */
+		if (ms->iova == RTE_BAD_IOVA) {
+			PMD_LOG_WARN(COM, "Memory segment at %p has bad IOVA, skipping.",
+					ms->addr);
+			goto next;
+		}
+		if (do_map)
+			sxe2_drv_dev_dma_map(cdev, ms->addr_64,
+					ms->iova, ms->len);
+		else
+			sxe2_drv_dev_dma_unmap(cdev, ms->iova);
+
+next:
+		cur_len += ms->len;
+		++ms;
+	}
+
+l_end:
+	return ret;
+}
+
+RTE_EXPORT_INTERNAL_SYMBOL(sxe2_common_mem_event_cb)
+void
+sxe2_common_mem_event_cb(enum rte_mem_event type,
+		const void *addr, size_t size, void *arg __rte_unused)
+{
+	struct sxe2_common_device *cdev = NULL;
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		goto l_end;
+
+	pthread_mutex_lock(&sxe2_common_devices_list_lock);
+	switch (type) {
+	case RTE_MEM_EVENT_FREE:
+		TAILQ_FOREACH(cdev, &sxe2_common_devices_list, next)
+			(void)sxe2_dma_mem_map(cdev, addr, size, 0);
+		break;
+	case RTE_MEM_EVENT_ALLOC:
+		TAILQ_FOREACH(cdev, &sxe2_common_devices_list, next)
+			(void)sxe2_dma_mem_map(cdev, addr, size, 1);
+		break;
+	default:
+		break;
+	}
+	pthread_mutex_unlock(&sxe2_common_devices_list_lock);
+l_end:
+	return;
+}
+
+static int32_t sxe2_memseg_walk_cb(const struct rte_memseg_list *msl,
+				   const struct rte_memseg *ms, void *arg)
+{
+	struct sxe2_common_device *cdev = arg;
+	int32_t ret = 0;
+
+	if (msl->external && !msl->heap)
+		goto l_end;
+
+	if (ms->iova == RTE_BAD_IOVA)
+		goto l_end;
+
+	ret = sxe2_drv_dev_dma_map(cdev, ms->addr_64, ms->iova, ms->len);
+	if (ret != 0) {
+		PMD_LOG_ERR(COM, "Fail to memseg dma map.");
+		goto l_end;
+	}
+
 l_end:
 	return ret;
 }
@@ -220,6 +316,18 @@ static int32_t sxe2_common_device_setup(struct sxe2_common_device *cdev)
 		goto l_close_dev;
 	}
 
+	rte_mcfg_mem_read_lock();
+	ret = rte_memseg_walk_thread_unsafe(sxe2_memseg_walk_cb, cdev);
+	if (ret) {
+		PMD_LOG_ERR(COM, "Fail to walk memseg, ret=%d", ret);
+		rte_mcfg_mem_read_unlock();
+		goto l_close_dev;
+	}
+	rte_mcfg_mem_read_unlock();
+
+	(void)rte_mem_event_callback_register("SXE2_MEM_EVENT_CB",
+			sxe2_common_mem_event_cb, NULL);
+
 	goto l_end;
 
 l_close_dev:
@@ -251,6 +359,7 @@ static struct sxe2_common_device *sxe2_common_device_alloc(
 	}
 	cdev->dev = rte_dev;
 	cdev->class_type = class_type;
+	cdev->config.cmd_fd = SXE2_CMD_FD_INVALID;
 	cdev->config.kernel_reset = false;
 	pthread_mutex_init(&cdev->config.lock, NULL);
 
@@ -631,6 +740,7 @@ static int32_t sxe2_common_pci_id_table_update(const struct rte_pci_id *id_table
 
 	updated_table = calloc(num_ids, sizeof(*updated_table));
 	if (!updated_table) {
+		ret = -ENOMEM;
 		PMD_LOG_ERR(COM, "Failed to allocate memory for PCI ID table");
 		goto l_end;
 	}
diff --git a/drivers/common/sxe2/sxe2_common.h b/drivers/common/sxe2/sxe2_common.h
index b02b6317da..efc8d3585a 100644
--- a/drivers/common/sxe2/sxe2_common.h
+++ b/drivers/common/sxe2/sxe2_common.h
@@ -14,6 +14,8 @@
 
 #define SXE2_COMMON_PCI_DRIVER_NAME "sxe2_pci"
 
+#define SXE2_CMD_FD_INVALID (-1)
+
 #define SXE2_CDEV_TO_CMD_FD(cdev) \
 	((cdev)->config.cmd_fd)
 
diff --git a/drivers/common/sxe2/sxe2_ioctl_chnl.c b/drivers/common/sxe2/sxe2_ioctl_chnl.c
index 173d8d57ae..a233a78136 100644
--- a/drivers/common/sxe2/sxe2_ioctl_chnl.c
+++ b/drivers/common/sxe2/sxe2_ioctl_chnl.c
@@ -110,7 +110,7 @@ sxe2_drv_dev_close(struct sxe2_common_device *cdev)
 	if (fd >= 0)
 		close(fd);
 	PMD_LOG_INFO(COM, "closed device fd=%d", fd);
-	SXE2_CDEV_TO_CMD_FD(cdev) = -1;
+	SXE2_CDEV_TO_CMD_FD(cdev) = SXE2_CMD_FD_INVALID;
 }
 
 RTE_EXPORT_INTERNAL_SYMBOL(sxe2_drv_dev_handshake)
-- 
2.52.0


^ permalink raw reply related

* [PATCH v8 19/23] net/sxe2: add mbuf validation in Tx debug mode
From: liujie5 @ 2026-06-25 13:29 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260625055021.63243-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

Introduce the `sxe2_txrx_check_mbuf` helper function to validate outgoing
mbufs when `RTE_ETHDEV_DEBUG_TX` is enabled. This helps developers catch
malformed mbufs (e.g., invalid segment lengths, bad offload flags, or
unaligned buffers) before passing them to the hardware rings, avoiding
potential hardware hangs or silent packet drops.

The validation is fully wrapped inside `RTE_ETHDEV_DEBUG_TX` conditional
compilation blocks to ensure zero performance overhead in standard
production builds.

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 drivers/net/sxe2/meson.build            |   1 +
 drivers/net/sxe2/sxe2_txrx.c            |   8 +-
 drivers/net/sxe2/sxe2_txrx_check_mbuf.c | 595 ++++++++++++++++++++++++
 drivers/net/sxe2/sxe2_txrx_check_mbuf.h |  38 ++
 4 files changed, 640 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/sxe2/sxe2_txrx_check_mbuf.c
 create mode 100644 drivers/net/sxe2/sxe2_txrx_check_mbuf.h

diff --git a/drivers/net/sxe2/meson.build b/drivers/net/sxe2/meson.build
index 65286299aa..a172bb2867 100644
--- a/drivers/net/sxe2/meson.build
+++ b/drivers/net/sxe2/meson.build
@@ -77,4 +77,5 @@ sources += files(
         'sxe2_flow_parse_action.c',
         'sxe2_flow_parse_pattern.c',
         'sxe2_flow_parse_engine.c',
+        'sxe2_txrx_check_mbuf.c',
 )
diff --git a/drivers/net/sxe2/sxe2_txrx.c b/drivers/net/sxe2/sxe2_txrx.c
index 82b2e4fb7c..2081b96499 100644
--- a/drivers/net/sxe2/sxe2_txrx.c
+++ b/drivers/net/sxe2/sxe2_txrx.c
@@ -13,6 +13,7 @@
 #include "sxe2_txrx_common.h"
 #include "sxe2_txrx_vec.h"
 #include "sxe2_txrx_poll.h"
+#include "sxe2_txrx_check_mbuf.h"
 #include "sxe2_ethdev.h"
 #include "sxe2_common_log.h"
 #include "sxe2_osal.h"
@@ -120,13 +121,11 @@ uint16_t sxe2_tx_pkts_prepare(void *tx_queue,
 			rte_errno = -EINVAL;
 			goto l_end;
 		}
-#ifdef RTE_ETHDEV_DEBUG_TX
 		ret = rte_validate_tx_offload(mbuf);
 		if (ret != 0) {
 			rte_errno = -ret;
 			goto l_end;
 		}
-#endif
 		ret = rte_net_intel_cksum_prepare(mbuf);
 		if (ret != 0) {
 			rte_errno = -ret;
@@ -137,6 +136,11 @@ uint16_t sxe2_tx_pkts_prepare(void *tx_queue,
 			rte_errno = -ret;
 			goto l_end;
 		}
+		ret = sxe2_txrx_check_mbuf(mbuf);
+		if (ret != 0) {
+			rte_errno = -ret;
+			goto l_end;
+		}
 	}
 l_end:
 	return i;
diff --git a/drivers/net/sxe2/sxe2_txrx_check_mbuf.c b/drivers/net/sxe2/sxe2_txrx_check_mbuf.c
new file mode 100644
index 0000000000..7d316ae652
--- /dev/null
+++ b/drivers/net/sxe2/sxe2_txrx_check_mbuf.c
@@ -0,0 +1,595 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2025, Wuxi Stars Micro System Technologies Co., Ltd.
+ */
+
+#include <rte_common.h>
+#include <rte_net.h>
+#include <rte_vect.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <ethdev_driver.h>
+#include <rte_geneve.h>
+
+#include "sxe2_txrx_check_mbuf.h"
+#include "sxe2_common_log.h"
+
+#define TX_IPPROTO_IPIP 4
+#define TX_IPPROTO_GRE  47
+#define GRE_CHECKSUM_PRESENT 0x8000
+#define GRE_KEY_PRESENT 0x2000
+#define GRE_SEQUENCE_PRESENT 0x1000
+#define GRE_EXT_LEN 4
+#define GRE_SUPPORTED_FIELDS (GRE_CHECKSUM_PRESENT | GRE_KEY_PRESENT | GRE_SEQUENCE_PRESENT)
+
+
+static uint16_t vxlan_gpe_udp_port = RTE_VXLAN_GPE_DEFAULT_PORT;
+static uint16_t geneve_udp_port = RTE_GENEVE_DEFAULT_PORT;
+
+static inline int32_t check_mbuf_len(struct offload_info *info, struct rte_mbuf *m)
+{
+	int32_t ret = 0;
+	if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		if (info->outer_l2_len != m->outer_l2_len) {
+			PMD_LOG_ERR(TX, "outer_l2_len error in mbuf. Original "
+				    "length:%u calculated length:%u", m->outer_l2_len,
+				    info->outer_l2_len);
+			ret = -1;
+			goto end;
+		}
+		if (info->outer_l3_len != m->outer_l3_len) {
+			PMD_LOG_ERR(TX, "outer_l3_len error in mbuf. Original "
+				    "length:%u calculated length:%u", m->outer_l3_len,
+				    info->outer_l3_len);
+			ret = -1;
+			goto end;
+		}
+	}
+
+	if (info->l2_len != m->l2_len) {
+		PMD_LOG_ERR(TX, "l2_len error in mbuf. Original "
+			"length:%u calculated length:%u", m->l2_len, info->l2_len);
+		ret = -1;
+		goto end;
+	}
+	if (info->l3_len != m->l3_len) {
+		PMD_LOG_ERR(TX, "l3_len error in mbuf. Original "
+			"length:%u calculated length:%u", m->l3_len, info->l3_len);
+		ret = -1;
+		goto end;
+	}
+	if (info->l4_len != m->l4_len) {
+		PMD_LOG_ERR(TX, "l4_len error in mbuf. Original "
+			"length:%u calculated length:%u", m->l4_len, info->l4_len);
+		ret = -1;
+		goto end;
+	}
+	ret = 0;
+
+end:
+	return ret;
+}
+
+static inline int32_t check_ether_type(struct offload_info *info, struct rte_mbuf *m)
+{
+	int32_t ret = 0;
+
+	if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		if (info->outer_ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+			if (!(m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) {
+				PMD_LOG_ERR(TX, "Outer ethernet type is ipv4, "
+					"tx offload missing `RTE_MBUF_F_TX_OUTER_IPV4` flag");
+				ret = -1;
+				goto end;
+			}
+			if (m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) {
+				PMD_LOG_ERR(TX, "Outer ethernet type is ipv4, tx "
+					"offload contains wrong `RTE_MBUF_F_TX_OUTER_IPV6` flag");
+				ret = -1;
+				goto end;
+			}
+		} else if (info->outer_ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+			if (!(m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) {
+				PMD_LOG_ERR(TX, "Outer ethernet type is ipv6, "
+					"tx offload missing `RTE_MBUF_F_TX_OUTER_IPV6` flag");
+				ret = -1;
+				goto end;
+			}
+			if (m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) {
+				PMD_LOG_ERR(TX, "Outer ethernet type is ipv6, tx "
+					"offload contains wrong `RTE_MBUF_F_TX_OUTER_IPV4` flag");
+				ret = -1;
+				goto end;
+			}
+		}
+	}
+
+	if (info->ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+		if (!(m->ol_flags & RTE_MBUF_F_TX_IPV4)) {
+			PMD_LOG_ERR(TX, "Ethernet type is ipv4, tx offload "
+				"missing `RTE_MBUF_F_TX_IPV4` flag.");
+			ret = -1;
+			goto end;
+		}
+		if (m->ol_flags & RTE_MBUF_F_TX_IPV6) {
+			PMD_LOG_ERR(TX, "Ethernet type is ipv4, tx "
+				"offload contains wrong `RTE_MBUF_F_TX_IPV6` flag");
+			ret = -1;
+			goto end;
+		}
+	} else if (info->ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+		if (!(m->ol_flags & RTE_MBUF_F_TX_IPV6)) {
+			PMD_LOG_ERR(TX, "Ethernet type is ipv6, tx offload "
+				"missing `RTE_MBUF_F_TX_IPV6` flag.");
+			ret = -1;
+			goto end;
+		}
+		if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
+			PMD_LOG_ERR(TX, "Ethernet type is ipv6, tx offload "
+				"contains wrong `RTE_MBUF_F_TX_IPV4` flag");
+			ret = -1;
+			goto end;
+		}
+	}
+	ret = 0;
+
+end:
+	return ret;
+}
+
+static inline void parse_ipv4(struct rte_ipv4_hdr *ipv4_hdr, struct offload_info *info)
+{
+	struct rte_tcp_hdr *tcp_hdr;
+
+	info->l3_len   = rte_ipv4_hdr_len(ipv4_hdr);
+	info->l4_proto = ipv4_hdr->next_proto_id;
+
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else if (info->l4_proto == IPPROTO_UDP) {
+		info->l4_len = sizeof(struct rte_udp_hdr);
+	} else {
+		info->l4_len = 0;
+	}
+}
+
+static inline void parse_ipv6(struct rte_ipv6_hdr *ipv6_hdr, struct offload_info *info)
+{
+	struct rte_tcp_hdr *tcp_hdr;
+
+	info->l3_len   = sizeof(struct rte_ipv6_hdr);
+	info->l4_proto = ipv6_hdr->proto;
+
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv6_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else if (info->l4_proto == IPPROTO_UDP) {
+		info->l4_len = sizeof(struct rte_udp_hdr);
+	} else {
+		info->l4_len = 0;
+	}
+}
+
+static inline void parse_ethernet(struct rte_ether_hdr *eth_hdr, struct offload_info *info)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_vlan_hdr *vlan_hdr;
+
+	info->l2_len = sizeof(struct rte_ether_hdr);
+	info->ethertype = eth_hdr->ether_type;
+
+	while (info->ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) ||
+		   info->ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ)) {
+		vlan_hdr = (struct rte_vlan_hdr *)
+			((char *)eth_hdr + info->l2_len);
+		info->l2_len   += sizeof(struct rte_vlan_hdr);
+		info->ethertype = vlan_hdr->eth_proto;
+	}
+
+	switch (info->ethertype) {
+	case RTE_STATIC_BSWAP16(RTE_ETHER_TYPE_IPV4):
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + info->l2_len);
+		parse_ipv4(ipv4_hdr, info);
+		break;
+	case RTE_STATIC_BSWAP16(RTE_ETHER_TYPE_IPV6):
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)eth_hdr + info->l2_len);
+		parse_ipv6(ipv6_hdr, info);
+		break;
+	default:
+		info->l4_len = 0;
+		info->l3_len = 0;
+		info->l4_proto = 0;
+		break;
+	}
+}
+
+static inline void update_tunnel_outer(struct offload_info *info)
+{
+	info->is_tunnel       = 1;
+	info->outer_ethertype = info->ethertype;
+	info->outer_l2_len    = info->l2_len;
+	info->outer_l3_len    = info->l3_len;
+	info->outer_l4_proto  = info->l4_proto;
+}
+
+static inline void parse_gtp(struct rte_udp_hdr *udp_hdr, struct offload_info *info)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_gtp_hdr *gtp_hdr;
+	uint8_t gtp_len = sizeof(*gtp_hdr);
+	uint8_t ip_ver;
+
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(RTE_GTPC_UDP_PORT) &&
+		udp_hdr->src_port != rte_cpu_to_be_16(RTE_GTPC_UDP_PORT) &&
+		udp_hdr->dst_port != rte_cpu_to_be_16(RTE_GTPU_UDP_PORT))
+		goto end;
+
+	update_tunnel_outer(info);
+	info->l2_len = 0;
+
+	gtp_hdr = (struct rte_gtp_hdr *)((char *)udp_hdr + sizeof(*udp_hdr));
+
+	if (gtp_hdr->msg_type == 0xff) {
+		ip_ver = *(uint8_t *)((char *)udp_hdr + sizeof(*udp_hdr) + sizeof(*gtp_hdr));
+		ip_ver = (ip_ver) & 0xf0;
+
+		if (ip_ver == RTE_GTP_TYPE_IPV4) {
+			ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gtp_hdr + gtp_len);
+			info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+			parse_ipv4(ipv4_hdr, info);
+		} else if (ip_ver == RTE_GTP_TYPE_IPV6) {
+			ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gtp_hdr + gtp_len);
+			info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+			parse_ipv6(ipv6_hdr, info);
+		}
+	} else {
+		info->ethertype = 0;
+		info->l4_len    = 0;
+		info->l3_len    = 0;
+		info->l4_proto  = 0;
+	}
+
+	info->l2_len += RTE_ETHER_GTP_HLEN;
+
+end:
+	return;
+}
+
+static inline void parse_vxlan(struct rte_udp_hdr *udp_hdr, struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(RTE_VXLAN_DEFAULT_PORT))
+		goto end;
+
+	update_tunnel_outer(info);
+
+	eth_hdr = (struct rte_ether_hdr *)((char *)udp_hdr +
+		sizeof(struct rte_udp_hdr) + sizeof(struct rte_vxlan_hdr));
+
+	parse_ethernet(eth_hdr, info);
+	info->l2_len += RTE_ETHER_VXLAN_HLEN;
+
+end:
+	return;
+}
+
+static inline void parse_vxlan_gpe(struct rte_udp_hdr *udp_hdr, struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_vxlan_gpe_hdr *vxlan_gpe_hdr;
+	uint8_t vxlan_gpe_len = sizeof(*vxlan_gpe_hdr);
+
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(vxlan_gpe_udp_port))
+		goto end;
+
+	vxlan_gpe_hdr = (struct rte_vxlan_gpe_hdr *)((char *)udp_hdr + sizeof(struct rte_udp_hdr));
+
+	if (!vxlan_gpe_hdr->proto || vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_IPV4) {
+		update_tunnel_outer(info);
+
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)vxlan_gpe_hdr + vxlan_gpe_len);
+
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		info->l2_len = 0;
+
+	} else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_IPV6) {
+		update_tunnel_outer(info);
+
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)vxlan_gpe_hdr + vxlan_gpe_len);
+
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+		parse_ipv6(ipv6_hdr, info);
+		info->l2_len = 0;
+
+	} else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_ETH) {
+		update_tunnel_outer(info);
+
+		eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_gpe_hdr + vxlan_gpe_len);
+
+		parse_ethernet(eth_hdr, info);
+	} else {
+		goto end;
+	}
+
+	info->l2_len += RTE_ETHER_VXLAN_GPE_HLEN;
+
+end:
+	return;
+}
+
+static inline void parse_geneve(struct rte_udp_hdr *udp_hdr, struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_geneve_hdr *geneve_hdr;
+	uint16_t geneve_len;
+
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(geneve_udp_port))
+		goto end;
+
+	geneve_hdr = (struct rte_geneve_hdr *)((char *)udp_hdr + sizeof(struct rte_udp_hdr));
+	geneve_len = sizeof(struct rte_geneve_hdr) + geneve_hdr->opt_len * 4;
+	if (!geneve_hdr->proto || geneve_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+		update_tunnel_outer(info);
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)geneve_hdr + geneve_len);
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		info->l2_len = 0;
+	} else if (geneve_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+		update_tunnel_outer(info);
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)geneve_hdr + geneve_len);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+		parse_ipv6(ipv6_hdr, info);
+		info->l2_len = 0;
+
+	} else if (geneve_hdr->proto == rte_cpu_to_be_16(RTE_GENEVE_TYPE_ETH)) {
+		update_tunnel_outer(info);
+		eth_hdr = (struct rte_ether_hdr *)((char *)geneve_hdr + geneve_len);
+		parse_ethernet(eth_hdr, info);
+	} else {
+		goto end;
+	}
+
+	info->l2_len += (sizeof(struct rte_udp_hdr) + sizeof(struct rte_geneve_hdr) +
+		((struct rte_geneve_hdr *)geneve_hdr)->opt_len * 4);
+
+end:
+	return;
+}
+
+static inline void parse_gre(struct simple_gre_hdr *gre_hdr, struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	uint8_t gre_len = 0;
+
+	gre_len += sizeof(struct simple_gre_hdr);
+
+	if (gre_hdr->flags & rte_cpu_to_be_16(GRE_KEY_PRESENT))
+		gre_len += GRE_EXT_LEN;
+	if (gre_hdr->flags & rte_cpu_to_be_16(GRE_SEQUENCE_PRESENT))
+		gre_len += GRE_EXT_LEN;
+	if (gre_hdr->flags & rte_cpu_to_be_16(GRE_CHECKSUM_PRESENT))
+		gre_len += GRE_EXT_LEN;
+
+	if (gre_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+		update_tunnel_outer(info);
+
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gre_hdr + gre_len);
+
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		info->l2_len = 0;
+
+	} else if (gre_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+		update_tunnel_outer(info);
+
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gre_hdr + gre_len);
+
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+		parse_ipv6(ipv6_hdr, info);
+		info->l2_len = 0;
+
+	} else if (gre_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_TEB)) {
+		update_tunnel_outer(info);
+
+		eth_hdr = (struct rte_ether_hdr *)((char *)gre_hdr + gre_len);
+
+		parse_ethernet(eth_hdr, info);
+	} else {
+		goto end;
+	}
+
+	info->l2_len += gre_len;
+
+end:
+	return;
+}
+
+static inline void parse_encap_ip(void *encap_ip, struct offload_info *info)
+{
+	struct rte_ipv4_hdr *ipv4_hdr = encap_ip;
+	struct rte_ipv6_hdr *ipv6_hdr = encap_ip;
+	uint8_t ip_version;
+
+	ip_version = ((ipv4_hdr->version_ihl & 0xf0) >> 4);
+
+	if (ip_version != 4 && ip_version != 6)
+		goto end;
+
+	info->is_tunnel = 1;
+	info->outer_ethertype = info->ethertype;
+	info->outer_l2_len = info->l2_len;
+	info->outer_l3_len = info->l3_len;
+
+	if (ip_version == 4) {
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+	} else {
+		parse_ipv6(ipv6_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+	}
+	info->l2_len = 0;
+
+end:
+	return;
+}
+
+__rte_unused int32_t sxe2_txrx_check_mbuf(struct rte_mbuf *m)
+{
+	int32_t ret = 0;
+	struct rte_ether_hdr *eth_hdr;
+	void *l3_hdr = NULL;
+	struct offload_info info = {0};
+	uint64_t ol_flags = m->ol_flags;
+	uint64_t tunnel_type = ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+	parse_ethernet(eth_hdr, &info);
+	l3_hdr = (char *)eth_hdr + info.l2_len;
+	if (info.l4_proto == IPPROTO_UDP) {
+		struct rte_udp_hdr *udp_hdr;
+
+		udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info.l3_len);
+		if ((info.l2_len + info.l3_len + sizeof(struct rte_udp_hdr)) > m->data_len) {
+			PMD_LOG_ERR(TX, "UDP header exceeds mbuf data length");
+			ret = -1;
+			goto end;
+		}
+		parse_gtp(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_LOG_ERR(TX, "gtp tunnel packet missing tx "
+					"offload missing `RTE_MBUF_F_TX_TUNNEL_GTP` flag");
+				ret = -1;
+				goto end;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_GTP) {
+				PMD_LOG_ERR(TX, "gtp tunnel packet, tx offload has wrong "
+					"`%s` flag correct is `RTE_MBUF_F_TX_TUNNEL_GTP` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				ret = -1;
+				goto end;
+			}
+			goto check_len;
+		}
+		parse_vxlan_gpe(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_LOG_ERR(TX, "vxlan gpe tunnel packet missing tx "
+					"offload missing `RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE` flag");
+				ret = -1;
+				goto end;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE) {
+				PMD_LOG_ERR(TX, "vxlan gpe tunnel packet, tx offload has "
+					"wrong `%s` flag correct is `RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				ret = -1;
+				goto end;
+			}
+			goto check_len;
+		}
+		parse_vxlan(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_LOG_ERR(TX, "vxlan tunnel packet missing tx "
+					"offload missing `RTE_MBUF_F_TX_TUNNEL_VXLAN` flag");
+				ret = -1;
+				goto end;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN) {
+				PMD_LOG_ERR(TX, "vxlan tunnel packet, tx offload has "
+					"wrong `%s` flag correct is `RTE_MBUF_F_TX_TUNNEL_VXLAN` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				ret = -1;
+				goto end;
+			}
+			goto check_len;
+		}
+		parse_geneve(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_LOG_ERR(TX, "geneve tunnel packet missing tx "
+					"offload missing `RTE_MBUF_F_TX_TUNNEL_GENEVE` flag");
+				ret = -1;
+				goto end;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE) {
+				PMD_LOG_ERR(TX, "geneve tunnel packet, tx offload has "
+					"wrong `%s` flag correct is `RTE_MBUF_F_TX_TUNNEL_GENEVE` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				ret = -1;
+				goto end;
+			}
+			goto check_len;
+		}
+
+		if (unlikely(RTE_ETH_IS_TUNNEL_PKT(m->packet_type) != 0)) {
+			PMD_LOG_ERR(TX, "Unknown tunnel packet UDP dst port:%u",
+				    udp_hdr->dst_port);
+			ret = -1;
+			goto end;
+		}
+	} else if (info.l4_proto == TX_IPPROTO_GRE) {
+		struct simple_gre_hdr *gre_hdr;
+
+		gre_hdr = (struct simple_gre_hdr *)((char *)l3_hdr + info.l3_len);
+		parse_gre(gre_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_LOG_ERR(TX, "gre tunnel packet missing tx "
+					"offload missing `RTE_MBUF_F_TX_TUNNEL_GRE` flag.");
+				ret = -1;
+				goto end;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_GRE) {
+				PMD_LOG_ERR(TX, "gre tunnel packet, tx offload has "
+					"wrong `%s` flag, correct is `RTE_MBUF_F_TX_TUNNEL_GRE` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				ret = -1;
+				goto end;
+			}
+			goto check_len;
+		}
+	} else if (info.l4_proto == TX_IPPROTO_IPIP) {
+		void *encap_ip_hdr;
+
+		encap_ip_hdr = (char *)l3_hdr + info.l3_len;
+		parse_encap_ip(encap_ip_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_LOG_ERR(TX, "Ipip tunnel packet missing tx "
+					"offload missing `RTE_MBUF_F_TX_TUNNEL_IPIP` flag");
+				ret = -1;
+				goto end;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_IPIP) {
+				PMD_LOG_ERR(TX, "Ipip tunnel packet, tx offload has "
+					"wrong `%s` flag, correct is `RTE_MBUF_F_TX_TUNNEL_IPIP` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				ret = -1;
+				goto end;
+			}
+			goto check_len;
+		}
+	}
+
+check_len:
+	if (check_mbuf_len(&info, m) != 0) {
+		ret = -1;
+		goto end;
+	}
+	ret = check_ether_type(&info, m);
+
+end:
+	return ret;
+}
diff --git a/drivers/net/sxe2/sxe2_txrx_check_mbuf.h b/drivers/net/sxe2/sxe2_txrx_check_mbuf.h
new file mode 100644
index 0000000000..98197f85d9
--- /dev/null
+++ b/drivers/net/sxe2/sxe2_txrx_check_mbuf.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2025, Wuxi Stars Micro System Technologies Co., Ltd.
+ */
+
+#ifndef __SXE2_TXRX_CHECK_MBUF_H__
+#define __SXE2_TXRX_CHECK_MBUF_H__
+
+#include <rte_common.h>
+#include <rte_net.h>
+#include <rte_vect.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <ethdev_driver.h>
+
+struct offload_info {
+	uint16_t ethertype;
+	uint8_t  gso_enable;
+	uint16_t l2_len;
+	uint16_t l3_len;
+	uint16_t l4_len;
+	uint8_t  l4_proto;
+	uint8_t  is_tunnel;
+	uint16_t outer_ethertype;
+	uint16_t outer_l2_len;
+	uint16_t outer_l3_len;
+	uint8_t  outer_l4_proto;
+	uint16_t tso_segsz;
+	uint16_t tunnel_tso_segsz;
+	uint32_t pkt_len;
+};
+
+struct simple_gre_hdr {
+	uint16_t flags;
+	uint16_t proto;
+};
+
+__rte_unused int32_t sxe2_txrx_check_mbuf(struct rte_mbuf *m);
+#endif /* __SXE2_TXRX_CHECK_MBUF_H__ */
-- 
2.52.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox