* [PATCH 4/4] test/crypto: add unit test for Rx inject multi seg
From: Tejasree Kondoj @ 2026-06-16 11:21 UTC (permalink / raw)
To: Akhil Goyal, Fan Zhang; +Cc: Vidya Sagar Velumuri, Anoob Joseph, dev
In-Reply-To: <20260616112113.73680-1-ktejasree@marvell.com>
From: Vidya Sagar Velumuri <vvelumuri@marvell.com>
Add unit test to verify the multi segment support in RX Inject
Signed-off-by: Vidya Sagar Velumuri <vvelumuri@marvell.com>
---
app/test/test_cryptodev.c | 27 ++++++++++++++++++++++++++-
1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index bd726ddcf9..a11bc00963 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -1564,7 +1564,8 @@ ut_setup_security_rx_inject(void)
struct rte_eth_conf port_conf = {
.rxmode = {
.offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM |
- RTE_ETH_RX_OFFLOAD_SECURITY,
+ RTE_ETH_RX_OFFLOAD_SECURITY |
+ RTE_ETH_RX_OFFLOAD_SCATTER,
},
.txmode = {
.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
@@ -10781,6 +10782,25 @@ test_ipsec_proto_known_vec_inb_rx_inject(const void *test_data)
return test_ipsec_proto_process(&td_inb, NULL, 1, false, &flags);
}
+static int
+test_ipsec_proto_known_vec_inb_rx_inject_multi_seg(const void *test_data)
+{
+ const struct ipsec_test_data *td = test_data;
+ struct ipsec_test_flags flags;
+ struct ipsec_test_data td_inb;
+
+ memset(&flags, 0, sizeof(flags));
+ flags.rx_inject = true;
+ flags.nb_segs_in_mbuf = 4;
+
+ if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS)
+ test_ipsec_td_in_from_out(td, &td_inb);
+ else
+ memcpy(&td_inb, td, sizeof(td_inb));
+
+ return test_ipsec_proto_process(&td_inb, NULL, 1, false, &flags);
+}
+
static int
test_ipsec_proto_all(const struct ipsec_test_flags *flags)
{
@@ -18389,6 +18409,11 @@ static struct unit_test_suite ipsec_proto_testsuite = {
"Inbound known vector (ESP tunnel mode IPv4 AES-GCM 128) Rx inject",
ut_setup_security_rx_inject, ut_teardown_rx_inject,
test_ipsec_proto_known_vec_inb_rx_inject, &pkt_aes_128_gcm),
+ TEST_CASE_NAMED_WITH_DATA(
+ "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 128) Rx inject multi seg",
+ ut_setup_security_rx_inject, ut_teardown_rx_inject,
+ test_ipsec_proto_known_vec_inb_rx_inject_multi_seg, &pkt_aes_128_gcm),
+
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
--
2.34.1
^ permalink raw reply related
* [PATCH 3/4] test/crypto: add autotest support for cn20k
From: Tejasree Kondoj @ 2026-06-16 11:21 UTC (permalink / raw)
To: Akhil Goyal, Fan Zhang; +Cc: Vidya Sagar Velumuri, Anoob Joseph, dev
In-Reply-To: <20260616112113.73680-1-ktejasree@marvell.com>
From: Vidya Sagar Velumuri <vvelumuri@marvell.com>
Add crypto autotest support for cn20k
Signed-off-by: Vidya Sagar Velumuri <vvelumuri@marvell.com>
---
app/test/test_cryptodev.c | 15 +++++++++++++++
app/test/test_cryptodev.h | 1 +
2 files changed, 16 insertions(+)
diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index a60983c6b7..bd726ddcf9 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -20880,6 +20880,18 @@ test_cryptodev_cn10k_raw_api(void)
return run_cryptodev_raw_testsuite(RTE_STR(CRYPTODEV_NAME_CN10K_PMD));
}
+static int
+test_cryptodev_cn20k(void)
+{
+ return run_cryptodev_testsuite(RTE_STR(CRYPTODEV_NAME_CN20K_PMD));
+}
+
+static int
+test_cryptodev_cn20k_raw_api(void)
+{
+ return run_cryptodev_raw_testsuite(RTE_STR(CRYPTODEV_NAME_CN20K_PMD));
+}
+
static int
test_cryptodev_dpaa2_sec_raw_api(void)
{
@@ -20935,4 +20947,7 @@ REGISTER_DRIVER_TEST(cryptodev_nitrox_autotest, test_cryptodev_nitrox);
REGISTER_DRIVER_TEST(cryptodev_bcmfs_autotest, test_cryptodev_bcmfs);
REGISTER_DRIVER_TEST(cryptodev_cn9k_autotest, test_cryptodev_cn9k);
REGISTER_DRIVER_TEST(cryptodev_cn10k_autotest, test_cryptodev_cn10k);
+REGISTER_DRIVER_TEST(cryptodev_cn20k_autotest, test_cryptodev_cn20k);
+REGISTER_DRIVER_TEST(cryptodev_cn20k_raw_api_autotest,
+ test_cryptodev_cn20k_raw_api);
REGISTER_DRIVER_TEST(cryptodev_zsda_autotest, test_cryptodev_zsda);
diff --git a/app/test/test_cryptodev.h b/app/test/test_cryptodev.h
index 23d12ec961..f4d8f9be0a 100644
--- a/app/test/test_cryptodev.h
+++ b/app/test/test_cryptodev.h
@@ -76,6 +76,7 @@
#define CRYPTODEV_NAME_BCMFS_PMD crypto_bcmfs
#define CRYPTODEV_NAME_CN9K_PMD crypto_cn9k
#define CRYPTODEV_NAME_CN10K_PMD crypto_cn10k
+#define CRYPTODEV_NAME_CN20K_PMD crypto_cn20k
#define CRYPTODEV_NAME_MLX5_PMD crypto_mlx5
#define CRYPTODEV_NAME_UADK_PMD crypto_uadk
#define CRYPTODEV_NAME_ZSDA_SYM_PMD crypto_zsda
--
2.34.1
^ permalink raw reply related
* [PATCH 2/4] test/crypto: add asym autotest support for cn20k
From: Tejasree Kondoj @ 2026-06-16 11:21 UTC (permalink / raw)
To: Akhil Goyal, Fan Zhang; +Cc: Vidya Sagar Velumuri, Anoob Joseph, dev
In-Reply-To: <20260616112113.73680-1-ktejasree@marvell.com>
From: Vidya Sagar Velumuri <vvelumuri@marvell.com>
Add crypto asym autotest support for cn20k
Signed-off-by: Vidya Sagar Velumuri <vvelumuri@marvell.com>
---
app/test/test_cryptodev_asym.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c
index e7cc5a79e2..48637ef247 100644
--- a/app/test/test_cryptodev_asym.c
+++ b/app/test/test_cryptodev_asym.c
@@ -5583,6 +5583,12 @@ test_cryptodev_cn10k_asym(void)
return run_cryptodev_asym_testsuite(RTE_STR(CRYPTODEV_NAME_CN10K_PMD));
}
+static int
+test_cryptodev_cn20k_asym(void)
+{
+ return run_cryptodev_asym_testsuite(RTE_STR(CRYPTODEV_NAME_CN20K_PMD));
+}
+
static int
test_cryptodev_virtio_asym(void)
{
@@ -5600,5 +5606,6 @@ REGISTER_DRIVER_TEST(cryptodev_qat_asym_autotest, test_cryptodev_qat_asym);
REGISTER_DRIVER_TEST(cryptodev_octeontx_asym_autotest, test_cryptodev_octeontx_asym);
REGISTER_DRIVER_TEST(cryptodev_cn9k_asym_autotest, test_cryptodev_cn9k_asym);
REGISTER_DRIVER_TEST(cryptodev_cn10k_asym_autotest, test_cryptodev_cn10k_asym);
+REGISTER_DRIVER_TEST(cryptodev_cn20k_asym_autotest, test_cryptodev_cn20k_asym);
REGISTER_DRIVER_TEST(cryptodev_virtio_asym_autotest, test_cryptodev_virtio_asym);
REGISTER_DRIVER_TEST(cryptodev_virtio_user_asym_autotest, test_cryptodev_virtio_user_asym);
--
2.34.1
^ permalink raw reply related
* [PATCH 1/4] test/crypto: add asymmetric sessionless test case
From: Tejasree Kondoj @ 2026-06-16 11:21 UTC (permalink / raw)
To: Akhil Goyal, Fan Zhang; +Cc: Vidya Sagar Velumuri, Anoob Joseph, dev
In-Reply-To: <20260616112113.73680-1-ktejasree@marvell.com>
From: Vidya Sagar Velumuri <vvelumuri@marvell.com>
Add test for sessionless asymmetric operation
Signed-off-by: Vidya Sagar Velumuri <vvelumuri@marvell.com>
---
app/test/test_cryptodev_asym.c | 106 +++++++++++++++++++++++++++++++--
1 file changed, 100 insertions(+), 6 deletions(-)
diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c
index bf1a1fc417..e7cc5a79e2 100644
--- a/app/test/test_cryptodev_asym.c
+++ b/app/test/test_cryptodev_asym.c
@@ -558,6 +558,13 @@ testsuite_setup(void)
"Failed to configure cryptodev %u with %u qps",
dev_id, ts_params->conf.nb_queue_pairs);
+ ts_params->session_mpool = rte_cryptodev_asym_session_pool_create(
+ "test_asym_sess_mp", TEST_NUM_SESSIONS, 0, 0,
+ SOCKET_ID_ANY);
+
+ TEST_ASSERT_NOT_NULL(ts_params->session_mpool,
+ "session mempool allocation failed");
+
/* configure qp */
ts_params->qp_conf.nb_descriptors = DEFAULT_NUM_OPS_INFLIGHT;
ts_params->qp_conf.mp_session = ts_params->session_mpool;
@@ -569,12 +576,6 @@ testsuite_setup(void)
qp_id, dev_id);
}
- ts_params->session_mpool = rte_cryptodev_asym_session_pool_create(
- "test_asym_sess_mp", TEST_NUM_SESSIONS, 0, 0,
- SOCKET_ID_ANY);
-
- TEST_ASSERT_NOT_NULL(ts_params->session_mpool,
- "session mempool allocation failed");
/* >8 End of device, op pool and session configuration for asymmetric crypto section. */
return TEST_SUCCESS;
}
@@ -1181,6 +1182,98 @@ test_mod_inv(void)
return status;
}
+static int
+test_mod_exp_sessionless(void)
+{
+ struct crypto_testsuite_params_asym *ts_params = &testsuite_params;
+ const struct rte_cryptodev_asymmetric_xform_capability *capability;
+ struct rte_mempool *op_mpool = ts_params->op_mpool;
+ struct rte_crypto_op *op = NULL, *result_op = NULL;
+ struct rte_cryptodev_asym_capability_idx cap_idx;
+ uint8_t dev_id = ts_params->valid_devs[0];
+ struct rte_crypto_asym_op *asym_op = NULL;
+ uint8_t result[sizeof(mod_p)] = { 0 };
+ uint8_t input[TEST_DATA_SIZE] = {0};
+ struct rte_cryptodev_info info;
+ int status = TEST_SUCCESS;
+ int ret = 0;
+
+ rte_cryptodev_info_get(dev_id, &info);
+ if (!(info.feature_flags & RTE_CRYPTODEV_FF_ASYM_SESSIONLESS))
+ return TEST_SKIPPED;
+
+ if (rte_cryptodev_asym_get_xform_enum(&modex_xform.xform_type, "modexp") < 0) {
+ RTE_LOG(ERR, USER1, "Invalid ASYM algorithm specified\n");
+ return -1;
+ }
+
+ /* check for modlen capability */
+ cap_idx.type = modex_xform.xform_type;
+ capability = rte_cryptodev_asym_capability_get(dev_id, &cap_idx);
+
+ if (capability == NULL) {
+ RTE_LOG(INFO, USER1, "Device doesn't support MOD EXP. Test Skipped\n");
+ return TEST_SKIPPED;
+ }
+
+ if (rte_cryptodev_asym_xform_capability_check_modlen(capability,
+ modex_xform.modex.modulus.length)) {
+ RTE_LOG(ERR, USER1, "Unsupported MODULUS length specified\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Create op and process packets. */
+ op = rte_crypto_op_alloc(op_mpool, RTE_CRYPTO_OP_TYPE_ASYMMETRIC);
+ if (!op) {
+ RTE_LOG(ERR, USER1, "line %u FAILED: %s", __LINE__,
+ "Failed to allocate asymmetric crypto operation struct");
+ return TEST_FAILED;
+ }
+
+ asym_op = op->asym;
+ memcpy(input, base, sizeof(base));
+ asym_op->modex.base.data = input;
+ asym_op->modex.base.length = sizeof(base);
+ asym_op->modex.result.data = result;
+ asym_op->modex.result.length = sizeof(result);
+ asym_op->xform = &modex_xform;
+ op->sess_type = RTE_CRYPTO_OP_SESSIONLESS;
+
+ RTE_LOG(DEBUG, USER1, "Process ASYM operation");
+ /* Process crypto operation */
+ if (rte_cryptodev_enqueue_burst(dev_id, 0, &op, 1) != 1) {
+ RTE_LOG(ERR, USER1,
+ "line %u FAILED: %s",
+ __LINE__, "Error sending packet for operation");
+ status = TEST_FAILED;
+ goto error_exit;
+ }
+
+ while (rte_cryptodev_dequeue_burst(dev_id, 0, &result_op, 1) == 0)
+ rte_pause();
+
+ if (result_op == NULL) {
+ RTE_LOG(ERR, USER1,
+ "line %u FAILED: %s",
+ __LINE__, "Failed to process asym crypto op");
+ status = TEST_FAILED;
+ goto error_exit;
+ }
+
+ ret = verify_modexp(mod_exp, result_op);
+ if (ret) {
+ RTE_LOG(ERR, USER1, "operation verification failed\n");
+ status = TEST_FAILED;
+ }
+
+error_exit:
+ rte_crypto_op_free(op);
+
+ TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+ return status;
+}
+
static int
test_mod_exp(void)
{
@@ -5309,6 +5402,7 @@ static struct unit_test_suite cryptodev_asym_mod_ex_testsuite = {
"Modular Exponentiation (mod=128, base=20, exp=3, res=128)",
ut_setup_asym, ut_teardown_asym,
modular_exponentiation, &modex_test_case_m128_b20_e3),
+ TEST_CASE_ST(ut_setup_asym, ut_teardown_asym, test_mod_exp_sessionless),
TEST_CASES_END()
}
};
--
2.34.1
^ permalink raw reply related
* [PATCH 0/4] test/crypto: update CN20K and sessionless coverage
From: Tejasree Kondoj @ 2026-06-16 11:21 UTC (permalink / raw)
To: Akhil Goyal, Fan Zhang; +Cc: Vidya Sagar Velumuri, Anoob Joseph, dev
Add CN20K sym/asym autotests, asymmetric sessionless test
and Rx-inject multi-segment unit test.
Vidya Sagar Velumuri (4):
test/crypto: add asymmetric sessionless test case
test/crypto: add asym autotest support for cn20k
test/crypto: add autotest support for cn20k
test/crypto: add unit test for Rx inject multi seg
app/test/test.h | 4 ++
app/test/test_cryptodev.c | 42 +++++++++++-
app/test/test_cryptodev.h | 1 +
app/test/test_cryptodev_asym.c | 113 +++++++++++++++++++++++++++++++--
4 files changed, 160 insertions(+), 31 deletions(-)
--
2.34.1
^ permalink raw reply
* [DPDK/core Bug 1925] parameters of macros in lib/eal/include/rte_test.h are not parenthesized
From: bugzilla @ 2026-06-16 10:50 UTC (permalink / raw)
To: dev
In-Reply-To: <bug-1925-3@http.bugs.dpdk.org/>
http://bugs.dpdk.org/show_bug.cgi?id=1925
Thomas Monjalon (thomas@monjalon.net) changed:
What |Removed |Added
----------------------------------------------------------------------------
Resolution|--- |FIXED
Status|IN_PROGRESS |RESOLVED
--- Comment #4 from Thomas Monjalon (thomas@monjalon.net) ---
Resolved in https://dpdk.org/id/55ab726133
--
You are receiving this mail because:
You are the assignee for the bug.
^ permalink raw reply
* [DPDK/core Bug 1027] mempool cache size parameter is misleading
From: bugzilla @ 2026-06-16 10:50 UTC (permalink / raw)
To: dev
In-Reply-To: <bug-1027-3@http.bugs.dpdk.org/>
http://bugs.dpdk.org/show_bug.cgi?id=1027
Thomas Monjalon (thomas@monjalon.net) changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |RESOLVED
Resolution|--- |FIXED
--- Comment #2 from Thomas Monjalon (thomas@monjalon.net) ---
Resolved in https://dpdk.org/id/f5e1310f16
--
You are receiving this mail because:
You are the assignee for the bug.
^ permalink raw reply
* [PATCH 6/6] net/dpaa2: implement RSS RETA query and update
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616104717.723087-1-maxime@leroys.fr>
DPAA2 dispatches RX frames to FQs using 'queue_id = hash % dist_size',
where dist_size is set per-TC via the dpni_set_rx_hash_dist MC command.
There is no software-visible indirection table, so the standard DPDK
RETA API has never been exposed by this PMD.
Implement reta_update / reta_query as an emulation on top of
dpni_set_rx_hash_dist. The emulation accepts only the uniform pattern
'reta[i] = i % N' for some N in the HW-allowed set (1, 2, 3, 4, 6, 7,
8, 12, 14, 16, 24, ...). Non-uniform or weighted patterns are rejected
with -ENOTSUP, as the HW has no arbitrary indirection table.
Changing N sets the size of the contiguous queue subset that RSS
spreads traffic over; the queues above N are left out of the hash
distribution. This covers the patterns that matter here, e.g. growing
or shrinking the active subset to scale CPU cores with load, or
reserving the upper queues for specific traffic that rte_flow steers
there for dedicated polling or QoS handling on its own core.
Refactor the existing dpaa2_setup_flow_dist() to delegate to a new
helper dpaa2_setup_flow_dist_size() that takes the dist_size explicitly
and caches it in priv->dist_size_cur[tc] so reta_query() can report it.
reta_query() returns reta[i] = i % N: this is representative, not
bit-exact, as the HW maps the hash to a queue through its distribution
size encoding rather than a plain modulo. reta_update() takes the RSS
hash set from dev_conf (rx_adv_conf.rss_conf.rss_hf); a prior
rss_hash_update() with a different hf is not re-read.
The advertised reta_size is 64 (one rte_eth_rss_reta_entry64 group), the
smallest legal value and enough for all HW-permitted N values up to 64.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
doc/guides/nics/features/dpaa2.ini | 1 +
doc/guides/rel_notes/release_26_07.rst | 1 +
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 34 +++--
drivers/net/dpaa2/dpaa2_ethdev.c | 201 +++++++++++++++++++++++++
drivers/net/dpaa2/dpaa2_ethdev.h | 9 ++
5 files changed, 237 insertions(+), 9 deletions(-)
diff --git a/doc/guides/nics/features/dpaa2.ini b/doc/guides/nics/features/dpaa2.ini
index 5f9c587847..5def653d1d 100644
--- a/doc/guides/nics/features/dpaa2.ini
+++ b/doc/guides/nics/features/dpaa2.ini
@@ -15,6 +15,7 @@ Promiscuous mode = Y
Allmulticast mode = Y
Unicast MAC filter = Y
RSS hash = Y
+RSS reta update = Y
VLAN filter = Y
Flow control = Y
Traffic manager = Y
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 39f3988198..b22ba4d6f0 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -143,6 +143,7 @@ New Features
* **Updated NXP dpaa2 driver.**
* Added inner RSS level support for tunnelled traffic.
+ * Added RSS RETA query and update support.
* **Updated PCAP ethernet driver.**
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index 8a05253bbd..07f4a3d414 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -103,15 +103,10 @@ dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
uint64_t req_dist_set, int tc_index)
{
struct dpaa2_dev_priv *priv = eth_dev->data->dev_private;
- struct fsl_mc_io *dpni = eth_dev->process_private;
- struct dpni_rx_dist_cfg tc_cfg;
- struct dpkg_profile_cfg kg_cfg;
- void *p_params;
- int ret, tc_dist_queues;
+ int tc_dist_queues;
- /*TC distribution size is set with dist_queues or
- * nb_rx_queues % dist_queues in order of TC priority index.
- * Calculating dist size for this tc_index:-
+ /* TC distribution size is set with dist_queues or
+ * (nb_rx_queues - tc_index*dist_queues) in order of TC priority index.
*/
tc_dist_queues = eth_dev->data->nb_rx_queues -
tc_index * priv->dist_queues;
@@ -123,6 +118,24 @@ dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
if (tc_dist_queues > priv->dist_queues)
tc_dist_queues = priv->dist_queues;
+ return dpaa2_setup_flow_dist_size(eth_dev, req_dist_set,
+ tc_index, tc_dist_queues);
+}
+
+int
+dpaa2_setup_flow_dist_size(struct rte_eth_dev *eth_dev,
+ uint64_t req_dist_set, int tc_index, uint16_t dist_size)
+{
+ struct dpaa2_dev_priv *priv = eth_dev->data->dev_private;
+ struct fsl_mc_io *dpni = eth_dev->process_private;
+ struct dpni_rx_dist_cfg tc_cfg;
+ struct dpkg_profile_cfg kg_cfg;
+ void *p_params;
+ int ret;
+
+ if (dist_size == 0)
+ return 0;
+
p_params = rte_malloc(NULL,
DIST_PARAM_IOVA_SIZE, RTE_CACHE_LINE_SIZE);
if (!p_params) {
@@ -150,7 +163,7 @@ dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
return -ENOBUFS;
}
- tc_cfg.dist_size = tc_dist_queues;
+ tc_cfg.dist_size = dist_size;
tc_cfg.enable = true;
tc_cfg.tc = tc_index;
@@ -168,6 +181,9 @@ dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
return ret;
}
+ if (tc_index < MAX_TCS)
+ priv->dist_size_cur[tc_index] = dist_size;
+
return 0;
}
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index c19736fb80..56682717cf 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -80,6 +80,33 @@ bool dpaa2_print_parser_result;
#define MAX_NB_RX_DESC_IN_PEB 11264
static int total_nb_rx_desc;
+/* Size of the RETA (Redirection Table) we expose to the standard DPDK API.
+ * Must be a multiple of RTE_ETH_RETA_GROUP_SIZE (64). DPAA2 has no actual
+ * indirection table in HW; this is the granularity at which uniform RSS
+ * patterns are inspected by dpaa2_dev_rss_reta_update().
+ */
+#define DPAA2_RETA_SIZE 64
+
+/* Values of dist_size accepted by the DPNI 'dpni_set_rx_hash_dist' MC command.
+ * Source: fsl_dpni.h, "struct dpni_rx_dist_cfg::dist_size" documentation.
+ * Used by dpaa2_dev_rss_reta_update() to validate user-requested patterns.
+ */
+static const uint16_t dpaa2_dist_size_allowed[] = {
+ 1, 2, 3, 4, 6, 7, 8, 12, 14, 16, 24, 28, 32, 48, 56, 64,
+ 96, 112, 128, 192, 224, 256, 384, 448, 512, 768, 896, 1024,
+};
+
+static bool
+dpaa2_dist_size_is_supported(uint16_t n)
+{
+ size_t i;
+ for (i = 0; i < RTE_DIM(dpaa2_dist_size_allowed); i++) {
+ if (dpaa2_dist_size_allowed[i] == n)
+ return true;
+ }
+ return false;
+}
+
int dpaa2_valid_dev;
struct rte_mempool *dpaa2_tx_sg_pool;
@@ -426,6 +453,14 @@ dpaa2_dev_info_get(struct rte_eth_dev *dev,
dev_info->max_vmdq_pools = RTE_ETH_16_POOLS;
dev_info->flow_type_rss_offloads = DPAA2_RSS_OFFLOAD_ALL |
RTE_ETH_RSS_LEVEL_OUTERMOST | RTE_ETH_RSS_LEVEL_INNERMOST;
+ /* DPAA2 has no software-visible indirection table: incoming packets are
+ * dispatched to FQs via 'queue_id = hash % dist_size'. We expose the
+ * standard RETA API as an emulation that only accepts uniform patterns
+ * 'reta[i] = i % N' and translates them into a dpni_set_rx_hash_dist
+ * command with dist_size=N. See dpaa2_dev_rss_reta_update().
+ */
+ dev_info->reta_size = DPAA2_RETA_SIZE;
+ dev_info->hash_key_size = 0;
dev_info->default_rxportconf.burst_size = dpaa2_dqrr_size;
/* same is rx size for best perf */
@@ -2509,6 +2544,170 @@ dpaa2_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
return 0;
}
+/* Emulation of the standard DPDK RETA API on top of DPAA2's
+ * dpni_set_rx_hash_dist MC command.
+ *
+ * DPAA2 hardware dispatches incoming frames using 'queue_id = hash % dist_size'
+ * (no software-visible indirection table). To expose the standard
+ * rte_eth_dev_rss_reta_update() interface, we accept ONLY uniform patterns of
+ * the form 'reta[i] = i % N' where N is in the HW-allowed dist_size list. Any
+ * other pattern (weighted RSS, non-contiguous queue IDs, gaps) is rejected
+ * with -ENOTSUP. This is enough to support dynamic RSS scale-up/down across
+ * a contiguous queue subset, which is the main use case for adaptive
+ * dataplane CPU usage.
+ *
+ * Applies the new dist_size on every configured RX TC, mirroring the
+ * behavior of dpaa2_dev_rss_hash_update().
+ */
+static int
+dpaa2_dev_rss_reta_update(struct rte_eth_dev *dev,
+ struct rte_eth_rss_reta_entry64 *reta_conf,
+ uint16_t reta_size)
+{
+ struct dpaa2_dev_priv *priv = dev->data->dev_private;
+ struct rte_eth_conf *eth_conf = &dev->data->dev_conf;
+ uint16_t i, max_q = 0, n;
+ int tc_index, ret;
+ bool any_set = false;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (reta_size != DPAA2_RETA_SIZE) {
+ DPAA2_PMD_ERR("Invalid reta_size %u (expected %u)",
+ reta_size, DPAA2_RETA_SIZE);
+ return -EINVAL;
+ }
+
+ /* dpaa2 cannot merge a partial RETA into the live table, so only a
+ * full update (every entry of every group) is accepted.
+ */
+ for (i = 0; i < reta_size / RTE_ETH_RETA_GROUP_SIZE; i++) {
+ if (reta_conf[i].mask != UINT64_MAX) {
+ DPAA2_PMD_ERR("partial RETA update not supported; set all %u entries",
+ DPAA2_RETA_SIZE);
+ return -ENOTSUP;
+ }
+ }
+
+ /* First pass: validate queue IDs, find max, and require at least
+ * one slot to be selected via the per-group mask.
+ */
+ for (i = 0; i < reta_size; i++) {
+ uint16_t grp = i / RTE_ETH_RETA_GROUP_SIZE;
+ uint16_t pos = i % RTE_ETH_RETA_GROUP_SIZE;
+ uint16_t q;
+
+ if (!(reta_conf[grp].mask & (1ULL << pos)))
+ continue;
+ any_set = true;
+
+ q = reta_conf[grp].reta[pos];
+ if (q >= dev->data->nb_rx_queues) {
+ DPAA2_PMD_ERR("reta[%u] = %u out of range (max %u)",
+ i, q, dev->data->nb_rx_queues - 1);
+ return -EINVAL;
+ }
+ if (q > max_q)
+ max_q = q;
+ }
+
+ if (!any_set) {
+ DPAA2_PMD_WARN("reta_update called with empty mask, no-op");
+ return 0;
+ }
+
+ n = max_q + 1;
+
+ /* Second pass: enforce the uniform pattern reta[i] = i % n on every
+ * slot the user has selected. dpaa2 HW cannot honor any other layout.
+ */
+ for (i = 0; i < reta_size; i++) {
+ uint16_t grp = i / RTE_ETH_RETA_GROUP_SIZE;
+ uint16_t pos = i % RTE_ETH_RETA_GROUP_SIZE;
+ uint16_t expected = i % n;
+ uint16_t q;
+
+ if (!(reta_conf[grp].mask & (1ULL << pos)))
+ continue;
+
+ q = reta_conf[grp].reta[pos];
+ if (q != expected) {
+ DPAA2_PMD_ERR("Non-uniform RETA pattern at slot %u "
+ "(got queue %u, expected %u). dpaa2 HW "
+ "only supports queue_id = hash mod N with "
+ "contiguous queues 0..N-1.",
+ i, q, expected);
+ return -ENOTSUP;
+ }
+ }
+
+ if (!dpaa2_dist_size_is_supported(n)) {
+ DPAA2_PMD_ERR("dist_size %u not supported by HW. Allowed: "
+ "1,2,3,4,6,7,8,12,14,16,24,28,32,48,56,64,...",
+ n);
+ return -ENOTSUP;
+ }
+
+ /* Apply on every configured RX TC, matching rss_hash_update behavior. */
+ for (tc_index = 0; tc_index < priv->num_rx_tc; tc_index++) {
+ ret = dpaa2_setup_flow_dist_size(dev,
+ eth_conf->rx_adv_conf.rss_conf.rss_hf,
+ tc_index, n);
+ if (ret) {
+ DPAA2_PMD_ERR("Failed to apply dist_size=%u on tc%d (err=%d)",
+ n, tc_index, ret);
+ return ret;
+ }
+ }
+
+ DPAA2_PMD_DEBUG("RETA updated: dist_size now %u on %u TC(s)",
+ n, priv->num_rx_tc);
+ return 0;
+}
+
+/* Synthesizes a RETA snapshot from the currently-active dist_size on TC 0.
+ * Since DPAA2 always uses uniform 'hash mod N' distribution, the returned
+ * RETA is reta[i] = i % dist_size_cur[0].
+ */
+static int
+dpaa2_dev_rss_reta_query(struct rte_eth_dev *dev,
+ struct rte_eth_rss_reta_entry64 *reta_conf,
+ uint16_t reta_size)
+{
+ struct dpaa2_dev_priv *priv = dev->data->dev_private;
+ uint16_t i, n;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (reta_size != DPAA2_RETA_SIZE) {
+ DPAA2_PMD_ERR("Invalid reta_size %u (expected %u)",
+ reta_size, DPAA2_RETA_SIZE);
+ return -EINVAL;
+ }
+
+ /* Use the cached dist_size on TC 0 (representative). Fall back to the
+ * default (nb_rx_queues clamped to dist_queues) when never programmed.
+ */
+ n = priv->dist_size_cur[0];
+ if (n == 0) {
+ n = priv->dist_queues;
+ if (n > dev->data->nb_rx_queues)
+ n = dev->data->nb_rx_queues;
+ }
+ if (n == 0)
+ return -EINVAL;
+
+ for (i = 0; i < reta_size; i++) {
+ uint16_t grp = i / RTE_ETH_RETA_GROUP_SIZE;
+ uint16_t pos = i % RTE_ETH_RETA_GROUP_SIZE;
+
+ if (reta_conf[grp].mask & (1ULL << pos))
+ reta_conf[grp].reta[pos] = i % n;
+ }
+
+ return 0;
+}
+
RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_eth_eventq_attach)
int dpaa2_eth_eventq_attach(const struct rte_eth_dev *dev,
int eth_rx_queue_id,
@@ -2737,6 +2936,8 @@ static struct eth_dev_ops dpaa2_ethdev_ops = {
.mac_addr_set = dpaa2_dev_set_mac_addr,
.rss_hash_update = dpaa2_dev_rss_hash_update,
.rss_hash_conf_get = dpaa2_dev_rss_hash_conf_get,
+ .reta_update = dpaa2_dev_rss_reta_update,
+ .reta_query = dpaa2_dev_rss_reta_query,
.flow_ops_get = dpaa2_dev_flow_ops_get,
.rxq_info_get = dpaa2_rxq_info_get,
.txq_info_get = dpaa2_txq_info_get,
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.h b/drivers/net/dpaa2/dpaa2_ethdev.h
index 4da47a543a..3f224c654e 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.h
+++ b/drivers/net/dpaa2/dpaa2_ethdev.h
@@ -412,6 +412,12 @@ struct dpaa2_dev_priv {
uint8_t max_cgs;
uint8_t cgid_in_use[MAX_RX_QUEUES];
+ /* Current hash distribution size per RX TC, written by
+ * dpaa2_setup_flow_dist_size() and read by reta_query / reta_update.
+ * Zero means "use default" (= nb_rx_queues clamped to dist_queues).
+ */
+ uint16_t dist_size_cur[MAX_TCS];
+
uint16_t dpni_ver_major;
uint16_t dpni_ver_minor;
uint32_t speed_capa;
@@ -468,6 +474,9 @@ int dpaa2_distset_to_dpkg_profile_cfg(uint64_t req_dist_set,
int dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
uint64_t req_dist_set, int tc_index);
+int dpaa2_setup_flow_dist_size(struct rte_eth_dev *eth_dev,
+ uint64_t req_dist_set, int tc_index, uint16_t dist_size);
+
int dpaa2_remove_flow_dist(struct rte_eth_dev *eth_dev,
uint8_t tc_index);
--
2.43.0
^ permalink raw reply related
* [PATCH 5/6] net/dpaa2: support inner RSS level for tunnelled traffic
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616104717.723087-1-maxime@leroys.fr>
Honour RTE_ETH_RSS_LEVEL_INNERMOST in the RSS configuration. When it is
set, program the IP extracts with hdr_index = HDR_INDEX_LAST so the RSS
key uses the innermost IP header instead of the outer one.
This is useful for tunnelled traffic whose outer headers are fixed,
leaving no entropy for an outer IP hash. Hashing on the inner IP
addresses spreads such flows.
The hdr_index field is only defined for IP/IPv4/IPv6, VLAN and MPLS
extracts, so this patch only controls the IP extracts. L4 extracts do not
have a header-instance selector and keep hdr_index 0; they continue to
use the parser-selected L4 offset.
The RSS level is carried in the high bits of rss_hf; extract it and strip
the level bits before walking the protocol bits so they are not mistaken
for an unsupported hash type. The level is also advertised in
flow_type_rss_offloads, otherwise ethdev rejects an rss_hf carrying it.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
doc/guides/rel_notes/release_26_07.rst | 4 ++++
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 23 +++++++++++++++++++++++
drivers/net/dpaa2/dpaa2_ethdev.c | 3 ++-
3 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 5d7aa8d1bf..39f3988198 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -140,6 +140,10 @@ New Features
* Added support for selective Rx in scalar SPRQ Rx path.
+* **Updated NXP dpaa2 driver.**
+
+ * Added inner RSS level support for tunnelled traffic.
+
* **Updated PCAP ethernet driver.**
* Added support for VLAN insertion and stripping.
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index 2724672a5e..8a05253bbd 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -220,6 +220,13 @@ dpaa2_remove_flow_dist(struct rte_eth_dev *eth_dev,
return ret;
}
+/* dpkg from_hdr.hdr_index value selecting the innermost IP instance (see
+ * fsl_dpkg.h, where hdr_index is only defined for NET_PROT_IP/IPv4/IPv6/
+ * VLAN/MPLS). Used to hash on the inner IP of tunnelled traffic when
+ * RTE_ETH_RSS_LEVEL_INNERMOST is requested.
+ */
+#define DPAA2_DIST_HDR_INDEX_LAST 0xff
+
int
dpaa2_distset_to_dpkg_profile_cfg(
uint64_t req_dist_set,
@@ -234,8 +241,18 @@ dpaa2_distset_to_dpkg_profile_cfg(
int esp_configured = 0;
int ah_configured = 0;
int pppoe_configured = 0;
+ uint8_t hdr_index = 0;
memset(kg_cfg, 0, sizeof(struct dpkg_profile_cfg));
+
+ /* RTE_ETH_RSS_LEVEL_INNERMOST asks for the inner header to be hashed.
+ * Map it to the innermost IP instance in the key extracts; the level
+ * bits are not protocol bits, so strip them before the loop.
+ */
+ if ((req_dist_set & RTE_ETH_RSS_LEVEL_MASK) == RTE_ETH_RSS_LEVEL_INNERMOST)
+ hdr_index = DPAA2_DIST_HDR_INDEX_LAST;
+ req_dist_set &= ~RTE_ETH_RSS_LEVEL_MASK;
+
while (req_dist_set) {
if (req_dist_set % 2 != 0) {
dist_field = 1ULL << loop;
@@ -373,6 +390,8 @@ dpaa2_distset_to_dpkg_profile_cfg(
kg_cfg->extracts[i].extract.from_hdr.prot =
NET_PROT_IP;
+ kg_cfg->extracts[i].extract.from_hdr.hdr_index =
+ hdr_index;
kg_cfg->extracts[i].extract.from_hdr.field =
NH_FLD_IP_SRC;
kg_cfg->extracts[i].type =
@@ -383,6 +402,8 @@ dpaa2_distset_to_dpkg_profile_cfg(
kg_cfg->extracts[i].extract.from_hdr.prot =
NET_PROT_IP;
+ kg_cfg->extracts[i].extract.from_hdr.hdr_index =
+ hdr_index;
kg_cfg->extracts[i].extract.from_hdr.field =
NH_FLD_IP_DST;
kg_cfg->extracts[i].type =
@@ -393,6 +414,8 @@ dpaa2_distset_to_dpkg_profile_cfg(
kg_cfg->extracts[i].extract.from_hdr.prot =
NET_PROT_IP;
+ kg_cfg->extracts[i].extract.from_hdr.hdr_index =
+ hdr_index;
kg_cfg->extracts[i].extract.from_hdr.field =
NH_FLD_IP_PROTO;
kg_cfg->extracts[i].type =
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 803a8321e0..c19736fb80 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -424,7 +424,8 @@ dpaa2_dev_info_get(struct rte_eth_dev *dev,
dev_info->max_hash_mac_addrs = 0;
dev_info->max_vfs = 0;
dev_info->max_vmdq_pools = RTE_ETH_16_POOLS;
- dev_info->flow_type_rss_offloads = DPAA2_RSS_OFFLOAD_ALL;
+ dev_info->flow_type_rss_offloads = DPAA2_RSS_OFFLOAD_ALL |
+ RTE_ETH_RSS_LEVEL_OUTERMOST | RTE_ETH_RSS_LEVEL_INNERMOST;
dev_info->default_rxportconf.burst_size = dpaa2_dqrr_size;
/* same is rx size for best perf */
--
2.43.0
^ permalink raw reply related
* [PATCH 4/6] net/dpaa2: set PPPoE configured flag in RSS key build
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616104717.723087-1-maxime@leroys.fr>
The RTE_ETH_RSS_PPPOE case tested pppoe_configured as a guard against
adding the extract twice, but never set it, leaving the guard dead. Set
it like the other protocol cases.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index f1d670f213..2724672a5e 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -260,6 +260,8 @@ dpaa2_distset_to_dpkg_profile_cfg(
case RTE_ETH_RSS_PPPOE:
if (pppoe_configured)
break;
+ pppoe_configured = 1;
+
kg_cfg->extracts[i].extract.from_hdr.prot =
NET_PROT_PPPOE;
kg_cfg->extracts[i].extract.from_hdr.field =
--
2.43.0
^ permalink raw reply related
* [PATCH 3/6] net/dpaa2: drop stray extract count bump in RSS key build
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616104717.723087-1-maxime@leroys.fr>
The IPv4/IPv6 L3 case bumped kg_cfg->num_extracts once in the middle of
the loop, while every other case relies on the final
'kg_cfg->num_extracts = i' that overwrites it. The increment was dead and
misleading; remove it.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index 4df66d8f33..f1d670f213 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -397,7 +397,6 @@ dpaa2_distset_to_dpkg_profile_cfg(
DPKG_EXTRACT_FROM_HDR;
kg_cfg->extracts[i].extract.from_hdr.type =
DPKG_FULL_FIELD;
- kg_cfg->num_extracts++;
i++;
break;
--
2.43.0
^ permalink raw reply related
* [PATCH 2/6] net/dpaa2: use L4 port extraction for SCTP RSS
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, stable, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616104717.723087-1-maxime@leroys.fr>
DPAA2 hardware exposes L4 source and destination port fields at the parser
L4 offset. These fields are valid when TCP, UDP, SCTP or DCCP is present.
The driver already uses the TCP port fields for the TCP/UDP RSS case.
Handle SCTP in the same L4 RSS case, so SCTP packets use the same L4
source and destination port extraction.
Fixes: 89c2ea8f5408 ("net/dpaa2: add RSS flow distribution")
Cc: stable@dpdk.org
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 32 +++-----------------------
1 file changed, 3 insertions(+), 29 deletions(-)
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index e7578b7576..4df66d8f33 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -228,7 +228,7 @@ dpaa2_distset_to_dpkg_profile_cfg(
uint32_t loop = 0, i = 0;
uint64_t dist_field = 0;
int l2_configured = 0, l3_configured = 0;
- int l4_configured = 0, sctp_configured = 0;
+ int l4_configured = 0;
int mpls_configured = 0;
int vlan_configured = 0;
int esp_configured = 0;
@@ -407,6 +407,8 @@ dpaa2_distset_to_dpkg_profile_cfg(
case RTE_ETH_RSS_NONFRAG_IPV6_UDP:
case RTE_ETH_RSS_IPV6_TCP_EX:
case RTE_ETH_RSS_IPV6_UDP_EX:
+ case RTE_ETH_RSS_NONFRAG_IPV4_SCTP:
+ case RTE_ETH_RSS_NONFRAG_IPV6_SCTP:
if (l4_configured)
break;
@@ -433,34 +435,6 @@ dpaa2_distset_to_dpkg_profile_cfg(
i++;
break;
- case RTE_ETH_RSS_NONFRAG_IPV4_SCTP:
- case RTE_ETH_RSS_NONFRAG_IPV6_SCTP:
-
- if (sctp_configured)
- break;
- sctp_configured = 1;
-
- kg_cfg->extracts[i].extract.from_hdr.prot =
- NET_PROT_SCTP;
- kg_cfg->extracts[i].extract.from_hdr.field =
- NH_FLD_SCTP_PORT_SRC;
- kg_cfg->extracts[i].type =
- DPKG_EXTRACT_FROM_HDR;
- kg_cfg->extracts[i].extract.from_hdr.type =
- DPKG_FULL_FIELD;
- i++;
-
- kg_cfg->extracts[i].extract.from_hdr.prot =
- NET_PROT_SCTP;
- kg_cfg->extracts[i].extract.from_hdr.field =
- NH_FLD_SCTP_PORT_DST;
- kg_cfg->extracts[i].type =
- DPKG_EXTRACT_FROM_HDR;
- kg_cfg->extracts[i].extract.from_hdr.type =
- DPKG_FULL_FIELD;
- i++;
- break;
-
default:
DPAA2_PMD_WARN(
"unsupported flow dist option 0x%" PRIx64,
--
2.43.0
^ permalink raw reply related
* [PATCH 1/6] net/dpaa2: add L4 destination port to the RSS hash key
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, stable, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616104717.723087-1-maxime@leroys.fr>
The TCP/UDP case of the RSS key builder added two extracts but both used
NH_FLD_TCP_PORT_SRC, so the L4 destination port was never part of the
hash. Use NH_FLD_TCP_PORT_DST for the second extract so both source and
destination ports contribute.
NET_PROT_TCP is kept: it maps to the hardware's generic L4 port
extraction (parser L4 offset, valid for TCP/UDP/SCTP), so this also
covers UDP traffic.
Fixes: 89c2ea8f5408 ("net/dpaa2: add RSS flow distribution")
Cc: stable@dpdk.org
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index 13825046d8..e7578b7576 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -425,7 +425,7 @@ dpaa2_distset_to_dpkg_profile_cfg(
kg_cfg->extracts[i].extract.from_hdr.prot =
NET_PROT_TCP;
kg_cfg->extracts[i].extract.from_hdr.field =
- NH_FLD_TCP_PORT_SRC;
+ NH_FLD_TCP_PORT_DST;
kg_cfg->extracts[i].type =
DPKG_EXTRACT_FROM_HDR;
kg_cfg->extracts[i].extract.from_hdr.type =
--
2.43.0
^ permalink raw reply related
* [PATCH 0/6] net/dpaa2: RSS fixes and improvements
From: Maxime Leroy @ 2026-06-16 10:47 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy
A set of RSS fixes and improvements for the dpaa2 PMD.
Patches 1 and 2 fix the RSS hash key: the L4 destination port was never
added (both extracts used the source port), and SCTP now uses the same L4
port extraction as TCP/UDP. Both are tagged for stable.
Patches 3 and 4 are small cleanups in the key builder (a dead num_extracts
increment, and the unset PPPoE guard flag).
Patch 5 honours RTE_ETH_RSS_LEVEL_INNERMOST so tunnelled traffic hashes on
the inner IP header. Patch 6 implements reta_query / reta_update as an
emulation over the HW distribution-size mechanism, since dpaa2 has no
software-visible indirection table.
Tested on LX2160A (lx2160acex7).
Maxime Leroy (6):
net/dpaa2: add L4 destination port to the RSS hash key
net/dpaa2: use L4 port extraction for SCTP RSS
net/dpaa2: drop stray extract count bump in RSS key build
net/dpaa2: set PPPoE configured flag in RSS key build
net/dpaa2: support inner RSS level for tunnelled traffic
net/dpaa2: implement RSS RETA query and update
doc/guides/nics/features/dpaa2.ini | 1 +
doc/guides/rel_notes/release_26_07.rst | 5 +
drivers/net/dpaa2/base/dpaa2_hw_dpni.c | 94 +++++++-----
drivers/net/dpaa2/dpaa2_ethdev.c | 204 ++++++++++++++++++++++++-
drivers/net/dpaa2/dpaa2_ethdev.h | 9 ++
5 files changed, 272 insertions(+), 41 deletions(-)
--
2.43.0
^ permalink raw reply
* [PATCH v1 1/2] app/testpmd: mask VLAN TCI when specified
From: Anatoly Burakov @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev, Ori Kam, Aman Singh
Currently, when testpmd command `...vlan tci is 0x1234`, the VLAN TCI field
is being specified in spec, but is not masked in mask. Add full mask to
VLAN TCI when specified.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
app/test-pmd/cmdline_flow.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 67f200f2e3..0b7d268535 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -4610,7 +4610,8 @@ static const struct token token_list[] = {
.help = "tag control information",
.next = NEXT(item_vlan, NEXT_ENTRY(COMMON_UNSIGNED),
item_param),
- .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_vlan, hdr.vlan_tci)),
+ .args = ARGS(ARGS_ENTRY_MASK_HTON(struct rte_flow_item_vlan,
+ hdr.vlan_tci, "\xff\xff")),
},
[ITEM_VLAN_PCP] = {
.name = "pcp",
--
2.47.3
^ permalink raw reply related
* [PATCH v1 2/2] app/testpmd: mask VLAN inner type when specified
From: Anatoly Burakov @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev, Ori Kam, Aman Singh
In-Reply-To: <00f78873fdb3d48542a7226e68a94e74cab4d8c0.1781605650.git.anatoly.burakov@intel.com>
Currently, when testpmd command `...vlan inner_type is 0x1234`, the VLAN
inner type field is being specified in spec, but is not masked in mask.
Add full mask to VLAN inner type when specified.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
app/test-pmd/cmdline_flow.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 0b7d268535..e41ab0ef9b 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -4642,8 +4642,8 @@ static const struct token token_list[] = {
.help = "inner EtherType",
.next = NEXT(item_vlan, NEXT_ENTRY(COMMON_UNSIGNED),
item_param),
- .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_vlan,
- hdr.eth_proto)),
+ .args = ARGS(ARGS_ENTRY_MASK_HTON(struct rte_flow_item_vlan,
+ hdr.eth_proto, "\xff\xff")),
},
[ITEM_VLAN_HAS_MORE_VLAN] = {
.name = "has_more_vlan",
--
2.47.3
^ permalink raw reply related
* [PATCH v2 6/6] net/dpaa2: drop the fake software VLAN strip offload
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616102727.708948-1-maxime@leroys.fr>
RTE_ETH_RX_OFFLOAD_VLAN_STRIP is advertised, but no hardware VLAN strip
backs it: when enabled, the Rx burst calls rte_vlan_strip() on every
frame, a software op masquerading as a hardware offload.
It saves a forwarding application nothing: the datapath reads the L2
header anyway to classify or strip. The offload does not remove that
read, it relocates it into the driver Rx burst, where it is far more
expensive.
The cost is a matter of timing. rte_vlan_strip() reaches the L2 header
through rte_pktmbuf_mtod(), which dereferences mbuf->buf_addr. On a
freshly recycled buffer that mbuf cacheline is cold. eth_fd_to_mbuf()
has just written other fields of it (data_off, ol_flags), but buf_addr
is a persistent field it does not rewrite. A write does not stall: it
posts to the store buffer while the line fills in the background, and
the rewritten fields are forwarded straight from there. buf_addr has
nothing to forward, so it must be read from the line, whose fill is
still in flight, and the read stalls. The ethertype read that follows,
on the cold payload line, stalls again. Read later by the application,
when the fill has completed, the same read hits. The offload just
performs it at the worst possible moment.
Measured on a single-core port-to-port forwarding test over two 10G
ports (one core at 2 GHz, 64-byte untagged frames):
- throughput 4.22 -> 5.00 Mpps (+18 percent)
- IPC 0.93 -> 1.25: the cost was memory stall, not compute
- L3/DRAM-bound L2 refills 319M -> 200M over 10s (-37 percent)
perf confirms it: with the offload, the buf_addr load (the cold mbuf
field) and the payload load account for about 84 percent of the Rx
burst's L2 refills; removing it, those vanish and only the inherent DQRR
dequeue misses remain.
Stop advertising VLAN_STRIP and remove the rte_vlan_strip() calls from
every Rx path. This is a behavioural change: the tag is left in the
frame, so an application must strip it itself, on the L2 header it
already reads.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
doc/guides/rel_notes/release_26_07.rst | 3 +++
drivers/net/dpaa2/dpaa2_ethdev.c | 1 -
drivers/net/dpaa2/dpaa2_rxtx.c | 23 +++--------------------
3 files changed, 6 insertions(+), 21 deletions(-)
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 3bc49c3910..1da1d7b729 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -143,6 +143,9 @@ New Features
* **Updated NXP dpaa2 driver.**
* Added Rx queue interrupt support.
+ * Removed the software VLAN strip offload: ``RTE_ETH_RX_OFFLOAD_VLAN_STRIP``
+ is no longer advertised, as no hardware strip backs it. An application
+ that needs the tag removed must now strip it itself.
* **Updated PCAP ethernet driver.**
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index ac7303c116..e0451d4ac6 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -48,7 +48,6 @@ static uint64_t dev_rx_offloads_sup =
RTE_ETH_RX_OFFLOAD_SCTP_CKSUM |
RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM |
- RTE_ETH_RX_OFFLOAD_VLAN_STRIP |
RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
RTE_ETH_RX_OFFLOAD_TIMESTAMP;
diff --git a/drivers/net/dpaa2/dpaa2_rxtx.c b/drivers/net/dpaa2/dpaa2_rxtx.c
index 189accc1de..d16e4f8f35 100644
--- a/drivers/net/dpaa2/dpaa2_rxtx.c
+++ b/drivers/net/dpaa2/dpaa2_rxtx.c
@@ -890,10 +890,6 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
}
#endif
- if (eth_data->dev_conf.rxmode.offloads &
- RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
- rte_vlan_strip(bufs[num_rx]);
-
dq_storage++;
num_rx++;
} while (pending);
@@ -922,22 +918,14 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
return num_rx;
}
-/* Convert a DQRR'd FD (single or scatter-gather) to an mbuf and apply software
- * VLAN strip, like the poll path.
- */
+/* Convert a DQRR'd FD (single or scatter-gather) to an mbuf. */
static inline struct rte_mbuf *
dpaa2_dqrr_fd_to_mbuf(const struct qbman_fd *fd,
struct rte_eth_dev_data *eth_data)
{
- struct rte_mbuf *m;
-
if (unlikely(DPAA2_FD_GET_FORMAT(fd) == qbman_fd_sg))
- m = eth_sg_fd_to_mbuf(fd, eth_data->port_id);
- else
- m = eth_fd_to_mbuf(fd, eth_data->port_id);
- if (eth_data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
- rte_vlan_strip(m);
- return m;
+ return eth_sg_fd_to_mbuf(fd, eth_data->port_id);
+ return eth_fd_to_mbuf(fd, eth_data->port_id);
}
/* prefetch a DQRR'd FD's HW annotation (parse area) ahead of conversion */
@@ -1222,11 +1210,6 @@ dpaa2_dev_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
}
#endif
- if (eth_data->dev_conf.rxmode.offloads &
- RTE_ETH_RX_OFFLOAD_VLAN_STRIP) {
- rte_vlan_strip(bufs[num_rx]);
- }
-
dq_storage++;
num_rx++;
num_pulled++;
--
2.43.0
^ permalink raw reply related
* [PATCH v2 5/6] net/dpaa2: fix Rx queue count for primary process
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev
Cc: Maxime Leroy, stable, Hemant Agrawal, Sachin Saxena,
Andrew Rybchenko, Ferruh Yigit, David Marchand
In-Reply-To: <20260616102727.708948-1-maxime@leroys.fr>
The rx_queue_count callback was only assigned on the secondary process
path of dpaa2_dev_init(), leaving eth_dev->rx_queue_count NULL for the
primary process. The fast-path rte_eth_rx_queue_count() performs an
unguarded indirect call in non-debug builds, so invoking it on a
primary-process dpaa2 port dereferences a NULL function pointer and
crashes.
Assign the callback once before the process-type split so both the
primary and secondary paths set it.
Fixes: cbfc6111b557 ("ethdev: move inline device operations")
Cc: stable@dpdk.org
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/net/dpaa2/dpaa2_ethdev.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 76e2df6167..ac7303c116 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -3410,6 +3410,7 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
}
eth_dev->dev_ops = &dpaa2_ethdev_ops;
+ eth_dev->rx_queue_count = dpaa2_dev_rx_queue_count;
if (dpaa2_get_devargs(dev->devargs, DRIVER_LOOPBACK_MODE)) {
eth_dev->rx_pkt_burst = dpaa2_dev_loopback_rx;
--
2.43.0
^ permalink raw reply related
* [PATCH v2 4/6] bus/fslmc/dpio: tune DQRI interrupt coalescing holdoff
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616102727.708948-1-maxime@leroys.fr>
The portal DQRI interrupt used a fixed threshold of 3 and a raw 0xFF
timeout. Parameterize dpaa2_dpio_intr_init() with (threshold, timeout) so
each mode supplies its own: the event driver keeps the legacy 3 / 0xFF
and its DPAA2_PORTAL_INTR_THRESHOLD / DPAA2_PORTAL_INTR_TIMEOUT env-var
overrides, while rx-queue interrupts default the threshold to the HW DQRR
ring depth (ring-1, =7 on QBMan >= 4.1) and use a coalescing holdoff in
microseconds, converted to ITP units from the MC-reported QBMan clock
(itp = holdoff_us * clk_MHz / 256, capped at the 12-bit field). The setup
is portal-wide and idempotent, so the first mode to arm a given portal
wins; a portal is normally driven by a single mode.
The net/dpaa2 PMD exposes both rx-queue-interrupt knobs as per-port
devargs: drv_rx_intr_holdoff_us (default 100us) and drv_rx_intr_threshold
(default 0 = ring-1, clamped to [1, ring-1]). Also expose
dpaa2_dpio_intr_deinit() (no longer event-only), and on the intr_init
error paths close the epoll fd and disable the interrupt.
Add qbman_swp_dqrr_size() to expose the ring depth.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
doc/guides/nics/dpaa2.rst | 10 +++
drivers/bus/fslmc/portal/dpaa2_hw_dpio.c | 72 +++++++++++++------
drivers/bus/fslmc/portal/dpaa2_hw_dpio.h | 12 +++-
.../fslmc/qbman/include/fsl_qbman_portal.h | 10 +++
drivers/bus/fslmc/qbman/qbman_portal.c | 6 ++
drivers/net/dpaa2/dpaa2_ethdev.c | 60 +++++++++++++++-
drivers/net/dpaa2/dpaa2_ethdev.h | 7 ++
7 files changed, 152 insertions(+), 25 deletions(-)
diff --git a/doc/guides/nics/dpaa2.rst b/doc/guides/nics/dpaa2.rst
index 2d70bd0ab9..47a52c9287 100644
--- a/doc/guides/nics/dpaa2.rst
+++ b/doc/guides/nics/dpaa2.rst
@@ -492,6 +492,16 @@ for details.
packets, so that user can check what is wrong with those packets.
e.g. ``fslmc:dpni.1,drv_error_queue=1``
+* Use dev arg option ``drv_rx_intr_holdoff_us=<uint32>`` to set the Rx queue
+ interrupt coalescing holdoff in microseconds (default 100). Only applies in
+ Rx queue interrupt mode.
+ e.g. ``fslmc:dpni.1,drv_rx_intr_holdoff_us=50``
+
+* Use dev arg option ``drv_rx_intr_threshold=<uint32>`` to set the Rx queue
+ interrupt coalescing frame threshold; 0 (default) means the DQRR ring depth
+ minus one.
+ e.g. ``fslmc:dpni.1,drv_rx_intr_threshold=4``
+
Enabling logs
-------------
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index e6b4e74b3b..c5525a94fa 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -206,12 +206,35 @@ dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
}
#endif /* RTE_EVENT_DPAA2 */
+/* holdoff (us) -> QBMan ITP units (256 cycles each), capped at the 12-bit field */
+RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_dpio_holdoff_to_itp)
+int dpaa2_dpio_holdoff_to_itp(struct dpaa2_dpio_dev *dpio_dev, uint32_t holdoff_us)
+{
+ uint32_t qman_mhz = 0;
+ struct dpio_attr attr;
+ uint64_t itp;
+
+ if (dpio_get_attributes(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token, &attr) == 0)
+ qman_mhz = attr.clk / 1000000;
+ itp = qman_mhz ? ((uint64_t)holdoff_us * qman_mhz) / 256 : 0xFF;
+ if (itp > 0xfff) /* 12-bit ITP field */
+ itp = 0xfff;
+
+ return (int)itp;
+}
+
+/* threshold: DQRR fill raising DQRI (< ring depth); timeout: holdoff in ITP units.
+ * Per-mode values from the caller (eventdev vs rx-queue intr); no env override.
+ * The DQRI config is portal-wide and this is idempotent: the first caller to
+ * arm a portal wins, a later caller's values are ignored (a portal normally
+ * serves a single mode).
+ */
RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_dpio_intr_init)
-int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
+int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, int threshold,
+ int timeout, bool build_epoll)
{
- struct epoll_event epoll_ev;
int eventfd, dpio_epoll_fd, ret;
- int threshold = 0x3, timeout = 0xFF;
+ struct epoll_event epoll_ev;
if (dpio_dev->intr_enabled)
return 0;
@@ -222,12 +245,6 @@ int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
return -1;
}
- if (getenv("DPAA2_PORTAL_INTR_THRESHOLD"))
- threshold = atoi(getenv("DPAA2_PORTAL_INTR_THRESHOLD"));
-
- if (getenv("DPAA2_PORTAL_INTR_TIMEOUT"))
- sscanf(getenv("DPAA2_PORTAL_INTR_TIMEOUT"), "%x", &timeout);
-
qbman_swp_interrupt_set_trigger(dpio_dev->sw_portal,
QBMAN_SWP_INTERRUPT_DQRI);
qbman_swp_interrupt_clear_status(dpio_dev->sw_portal, 0xffffffff);
@@ -238,9 +255,9 @@ int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
dpio_dev->epoll_fd = -1;
/* The event PMD dequeues by sleeping on a private epoll instance owned
- * by the portal, so build it here. A caller that waits on another
- * epoll (the net rx-queue-interrupt path uses the application's) skips
- * this.
+ * by the portal, so build it here. The net rx-queue-interrupt path
+ * exposes the raw eventfd through the generic ethdev API and waits on
+ * the application's own epoll instead, so it skips this.
*/
if (build_epoll) {
dpio_epoll_fd = epoll_create(1);
@@ -269,11 +286,14 @@ int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
return 0;
}
-#ifdef RTE_EVENT_DPAA2
-static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
+RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_dpio_intr_deinit)
+void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
{
int ret;
+ if (!dpio_dev->intr_enabled)
+ return;
+
ret = rte_dpaa2_intr_disable(dpio_dev->intr_handle, 0);
if (ret)
DPAA2_BUS_ERR("DPIO interrupt disable failed");
@@ -284,7 +304,6 @@ static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
}
dpio_dev->intr_enabled = 0;
}
-#endif
static int
dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
@@ -306,9 +325,18 @@ dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
}
#ifdef RTE_EVENT_DPAA2
- if (dpaa2_dpio_intr_init(dpio_dev, true)) {
- DPAA2_BUS_ERR("Interrupt registration failed for dpio");
- return -1;
+ {
+ int threshold = 3, timeout = 0xFF;
+
+ if (getenv("DPAA2_PORTAL_INTR_THRESHOLD"))
+ threshold = atoi(getenv("DPAA2_PORTAL_INTR_THRESHOLD"));
+ if (getenv("DPAA2_PORTAL_INTR_TIMEOUT"))
+ sscanf(getenv("DPAA2_PORTAL_INTR_TIMEOUT"), "%x", &timeout);
+
+ if (dpaa2_dpio_intr_init(dpio_dev, threshold, timeout, true)) {
+ DPAA2_BUS_ERR("Interrupt registration failed for dpio");
+ return -1;
+ }
}
dpaa2_affine_dpio_intr_to_respective_core(dpio_dev->hw_id, cpu_id);
#endif
@@ -319,9 +347,11 @@ dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
static void dpaa2_put_qbman_swp(struct dpaa2_dpio_dev *dpio_dev)
{
if (dpio_dev) {
-#ifdef RTE_EVENT_DPAA2
+ /* rx-queue interrupts (net PMD) can arm a portal without the
+ * event driver; tear it down unconditionally. Safe when never
+ * armed: intr_deinit returns early if intr is not enabled.
+ */
dpaa2_dpio_intr_deinit(dpio_dev);
-#endif
rte_atomic16_clear(&dpio_dev->ref_count);
}
}
@@ -512,6 +542,8 @@ dpaa2_create_dpio_device(int vdev_fd,
goto err;
}
+ DPAA2_BUS_DEBUG("QBMAN clk = %u Hz (%u MHz)", attr.clk, attr.clk / 1000000);
+
/* find the SoC type for the first time */
if (!dpaa2_svr_family) {
struct mc_soc_version mc_plat_info = {0};
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
index 10dd968e5f..090fa14410 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
@@ -50,9 +50,17 @@ int dpaa2_affine_qbman_swp(void);
__rte_internal
int dpaa2_affine_qbman_ethrx_swp(void);
-/* set up a DPIO portal's DQRI interrupt (rx-queue interrupt mode) */
+/* set up / tear down a DPIO portal's DQRI interrupt (rx-queue interrupt mode) */
__rte_internal
-int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll);
+int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, int threshold,
+ int timeout, bool build_epoll);
+
+__rte_internal
+void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev);
+
+/* convert a coalescing holdoff (microseconds) to QBMan ITP units */
+__rte_internal
+int dpaa2_dpio_holdoff_to_itp(struct dpaa2_dpio_dev *dpio_dev, uint32_t holdoff_us);
/* allocate memory for FQ - dq storage */
__rte_internal
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
index bb8bd86103..e9eda31927 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -157,6 +157,16 @@ uint32_t qbman_swp_intr_timeout_read_status(struct qbman_swp *p);
*/
void qbman_swp_intr_timeout_write(struct qbman_swp *p, uint32_t mask);
+/**
+ * qbman_swp_dqrr_size() - Get the HW DQRR ring depth of a software portal.
+ * @p: the given software portal object.
+ *
+ * Returns the number of DQRR entries (4 on QBMan < 4.1, 8 on >= 4.1). Useful
+ * as the upper bound for the DQRR interrupt coalescing threshold.
+ */
+__rte_internal
+uint8_t qbman_swp_dqrr_size(struct qbman_swp *p);
+
/**
* qbman_swp_interrupt_get_trigger() - Get the data in software portal
* interrupt enable register.
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index 947415363a..81c2d87e0a 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -433,6 +433,12 @@ void qbman_swp_intr_timeout_write(struct qbman_swp *p, uint32_t mask)
qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_ITPR, mask);
}
+RTE_EXPORT_INTERNAL_SYMBOL(qbman_swp_dqrr_size)
+uint8_t qbman_swp_dqrr_size(struct qbman_swp *p)
+{
+ return p->dqrr.dqrr_size;
+}
+
uint32_t qbman_swp_interrupt_get_trigger(struct qbman_swp *p)
{
return qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_IER);
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 61e7c820de..76e2df6167 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -36,6 +36,9 @@
#define DRIVER_ERROR_QUEUE "drv_err_queue"
#define DRIVER_NO_TAILDROP "drv_no_taildrop"
#define DRIVER_NO_DATA_STASHING "drv_no_data_stashing"
+#define DRIVER_RX_INTR_HOLDOFF_US "drv_rx_intr_holdoff_us"
+#define DPAA2_RX_INTR_HOLDOFF_US_DEF 100
+#define DRIVER_RX_INTR_THRESHOLD "drv_rx_intr_threshold"
#define CHECK_INTERVAL 100 /* 100ms */
#define MAX_REPEAT_TIME 90 /* 9s (90 * 100ms) in total */
@@ -2873,7 +2876,7 @@ dpaa2_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
struct dpaa2_dev_priv *priv = dev->data->dev_private;
struct dpaa2_queue *dpaa2_q = priv->rx_vq[queue_id];
struct dpaa2_dpio_dev *dpio, *old;
- int ret;
+ int ret, threshold, timeout, dqrr_max;
if (!dpaa2_q->napi_dpcon)
return -ENOTSUP; /* no channel -> caller keeps polling */
@@ -2882,10 +2885,22 @@ dpaa2_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
return -EIO;
dpio = DPAA2_PER_LCORE_ETHRX_DPIO;
+ /* threshold from drv_rx_intr_threshold (0 = ring-1), holdoff from
+ * drv_rx_intr_holdoff_us. idempotent: no-op if the dpio is already
+ * armed (e.g. event driver)
+ */
+ dqrr_max = qbman_swp_dqrr_size(dpio->sw_portal) - 1;
+ threshold = priv->rx_intr_threshold ? (int)priv->rx_intr_threshold : dqrr_max;
+ if (threshold < 1 || threshold > dqrr_max) {
+ DPAA2_PMD_WARN("drv_rx_intr_threshold %d out of [1, %d], clamping",
+ threshold, dqrr_max);
+ threshold = threshold < 1 ? 1 : dqrr_max;
+ }
+ timeout = dpaa2_dpio_holdoff_to_itp(dpio, priv->rx_intr_holdoff_us);
/* build_epoll=false: the generic ethdev rx-intr API waits on the
* application epoll, not the portal's private one (event PMD only).
*/
- ret = dpaa2_dpio_intr_init(dpio, false); /* VFIO eventfd, no MC */
+ ret = dpaa2_dpio_intr_init(dpio, threshold, timeout, false);
if (ret)
return ret;
@@ -3139,6 +3154,35 @@ dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
return 1;
}
+static int
+u32_devarg_handler(__rte_unused const char *key, const char *value, void *opaque)
+{
+ char *end;
+ unsigned long v = strtoul(value, &end, 0);
+
+ if (*value == '\0' || *end != '\0' || v > UINT32_MAX)
+ return -1;
+ *(uint32_t *)opaque = (uint32_t)v;
+
+ return 0;
+}
+
+/* Read a u32-valued devarg into *out, leaving *out untouched if absent. */
+static void
+dpaa2_get_devargs_u32(struct rte_devargs *devargs, const char *key, uint32_t *out)
+{
+ struct rte_kvargs *kvlist;
+
+ if (!devargs)
+ return;
+ kvlist = rte_kvargs_parse(devargs->args, NULL);
+ if (!kvlist)
+ return;
+ if (rte_kvargs_count(kvlist, key))
+ rte_kvargs_process(kvlist, key, u32_devarg_handler, out);
+ rte_kvargs_free(kvlist);
+}
+
static int
dpaa2_dev_init(struct rte_eth_dev *eth_dev)
{
@@ -3166,6 +3210,14 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
DPAA2_PMD_INFO("No RX prefetch mode");
}
+ priv->rx_intr_holdoff_us = DPAA2_RX_INTR_HOLDOFF_US_DEF;
+ dpaa2_get_devargs_u32(dev->devargs, DRIVER_RX_INTR_HOLDOFF_US,
+ &priv->rx_intr_holdoff_us);
+
+ priv->rx_intr_threshold = 0;
+ dpaa2_get_devargs_u32(dev->devargs, DRIVER_RX_INTR_THRESHOLD,
+ &priv->rx_intr_threshold);
+
if (dpaa2_get_devargs(dev->devargs, DRIVER_LOOPBACK_MODE)) {
priv->flags |= DPAA2_RX_LOOPBACK_MODE;
DPAA2_PMD_INFO("Rx loopback mode");
@@ -3681,5 +3733,7 @@ RTE_PMD_REGISTER_PARAM_STRING(NET_DPAA2_PMD_DRIVER_NAME,
DRIVER_RX_PARSE_ERR_DROP "=<int>"
DRIVER_ERROR_QUEUE "=<int>"
DRIVER_NO_TAILDROP "=<int>"
- DRIVER_NO_DATA_STASHING "=<int>");
+ DRIVER_NO_DATA_STASHING "=<int> "
+ DRIVER_RX_INTR_HOLDOFF_US "=<uint32> "
+ DRIVER_RX_INTR_THRESHOLD "=<uint32>");
RTE_LOG_REGISTER_DEFAULT(dpaa2_logtype_pmd, NOTICE);
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.h b/drivers/net/dpaa2/dpaa2_ethdev.h
index 3765f79e84..84785c0561 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.h
+++ b/drivers/net/dpaa2/dpaa2_ethdev.h
@@ -412,6 +412,13 @@ struct dpaa2_dev_priv {
uint8_t max_cgs;
uint8_t cgid_in_use[MAX_RX_QUEUES];
+ /* DQRI holdoff (us) for rx-queue interrupts (drv_rx_intr_holdoff_us) */
+ uint32_t rx_intr_holdoff_us;
+ /* DQRI threshold for rx-queue interrupts (drv_rx_intr_threshold);
+ * 0 = auto (DQRR ring depth - 1)
+ */
+ uint32_t rx_intr_threshold;
+
uint16_t dpni_ver_major;
uint16_t dpni_ver_minor;
uint32_t speed_capa;
--
2.43.0
^ permalink raw reply related
* [PATCH v2 3/6] net/dpaa2: support Rx queue interrupts
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616102727.708948-1-maxime@leroys.fr>
Implement .rx_queue_intr_enable / .rx_queue_intr_disable so a worker
can sleep on a queue's data-availability notification instead of
busy-polling, through the generic rte_eth_dev_rx_intr_* API.
A worker wakes on its software portal's DQRI, which fires when the
portal's DQRR holds frames, so the Rx FQ must be scheduled to a channel
that portal dequeues. The natural dpni_set_queue with a notification
destination holds the global MC lock long enough to wedge the firmware
and must target a disabled dpni. But the polling portal is only known
once a worker affines, after dev_start, so the destination cannot be
the worker's portal.
Bind each Rx FQ to its own DPCON channel instead. The default Rx burst
pulls frames from the FQ with a volatile dequeue and cannot be
interrupt-driven; to wake on the DQRI the FQ must be pushed to the
portal's DQRR. dev_start issues the DEST_DPCON set_queue statically on
the still-disabled dpni with no knowledge of the polling lcore; a worker
later subscribes its own ethrx portal to the channel and arms the DQRI
in rx_queue_intr_enable (a one-shot per-portal MC op plus QBMan, never
the wedging set_queue).
This pushed/DQRR consumption is how the event PMD works, but the DPCON
use differs. The event PMD uses one DPCON per worker, concentrates N
FQs onto it, and lets the QBMan scheduler load-balance events across
cores. Here affinity is static and there is no scheduling, so each FQ
gets its own DPCON (one per FQ, more channels, drawn from the shared
pool that the DPCON move to the fslmc bus now feeds), bound once at
dev_start before the lcore is known. Frames are delivered by
rte_eth_rx_burst (dpaa2_dev_rx_dqrr), not as events via
rte_event_dequeue.
rte_eth_dev_rx_intr_enable(q) subscribes the lcore portal to q's DPCON
and arms the DQRI. rte_eth_dev_rx_intr_ctl_q(q) adds q's eventfd (the
portal DQRI fd) to the thread epoll.
wire
|
[ DPMAC ]
|
[ DPNI ] (1)
|
TC0: FQ0 FQ1 FQ2 FQ3 (2)
| | | | (3)
[DPCON][DPCON][DPCON][DPCON]
\ | | / (4)
[ DPIO A ] [ DPIO B ] (5)
| |
DQRR DQRR (6)
| |
DQRI DQRI (7)
| |
eventfd eventfd (8)
| |
rte_epoll_wait rte_epoll_wait (9)
| |
dpaa2_dev_rx_dqrr (10)
(1) WRIOP picks a TC (QoS), then RSS-hashes within the TC to an FQ
(2) FQ0..FQ3 are the rte_eth Rx queues
(3) dpni_set_queue(DEST_DPCON): one DPCON per FQ
(4) the lcore portal subscribes to its DPCONs (push_set)
(5) one QBMan software portal per lcore
(6) QMan pushes the FDs into the portal DQRR
(7) DQRI is raised when the DQRR is non-empty
(8) a portal's queues share one fd (its DQRI eventfd)
(9) worker sleeps here when all its queues are idle
(10) dpaa2_dev_rx_dqrr drains the DQRR, demuxes FDs to FQs by fqd_ctx
The DQRI and eventfd are portal-wide: a queue's eventfd is its portal's
DQRI fd, and the inhibit bit is refcounted by armed queues so disabling
one queue never masks a sibling. The static per-queue bind also lets a
queue be re-homed to another lcore at runtime, the new worker
reclaiming the channel, with no set_queue and no port stop.
On single-core 64-byte forwarding this interrupt path runs at ~5.0 Mpps
versus ~5.86 Mpps polling: per-frame DQRR demux and consume cost about
15 percent over the polling batch dequeue.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
doc/guides/nics/features/dpaa2.ini | 1 +
doc/guides/rel_notes/release_26_07.rst | 4 +
drivers/bus/fslmc/portal/dpaa2_hw_dpio.c | 11 +-
drivers/bus/fslmc/portal/dpaa2_hw_dpio.h | 4 +
drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 27 +-
.../fslmc/qbman/include/fsl_qbman_portal.h | 1 +
drivers/bus/fslmc/qbman/qbman_portal.c | 1 +
drivers/net/dpaa2/dpaa2_ethdev.c | 291 +++++++++++++++++-
drivers/net/dpaa2/dpaa2_ethdev.h | 3 +
drivers/net/dpaa2/dpaa2_rxtx.c | 122 ++++++++
10 files changed, 459 insertions(+), 6 deletions(-)
diff --git a/doc/guides/nics/features/dpaa2.ini b/doc/guides/nics/features/dpaa2.ini
index 5f9c587847..fff313603f 100644
--- a/doc/guides/nics/features/dpaa2.ini
+++ b/doc/guides/nics/features/dpaa2.ini
@@ -7,6 +7,7 @@
Speed capabilities = Y
Link status = Y
Link status event = Y
+Rx interrupt = Y
Burst mode info = Y
Queue start/stop = Y
Scattered Rx = Y
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 5d7aa8d1bf..3bc49c3910 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -140,6 +140,10 @@ New Features
* Added support for selective Rx in scalar SPRQ Rx path.
+* **Updated NXP dpaa2 driver.**
+
+ * Added Rx queue interrupt support.
+
* **Updated PCAP ethernet driver.**
* Added support for VLAN insertion and stripping.
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 3a5abb2e6d..e6b4e74b3b 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -204,13 +204,18 @@ dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
fclose(file);
}
+#endif /* RTE_EVENT_DPAA2 */
-static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
+RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_dpio_intr_init)
+int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
{
struct epoll_event epoll_ev;
int eventfd, dpio_epoll_fd, ret;
int threshold = 0x3, timeout = 0xFF;
+ if (dpio_dev->intr_enabled)
+ return 0;
+
ret = rte_dpaa2_intr_enable(dpio_dev->intr_handle, 0);
if (ret) {
DPAA2_BUS_ERR("Interrupt registration failed");
@@ -259,9 +264,12 @@ static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epol
dpio_dev->epoll_fd = dpio_epoll_fd;
}
+ dpio_dev->intr_enabled = 1;
+
return 0;
}
+#ifdef RTE_EVENT_DPAA2
static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
{
int ret;
@@ -274,6 +282,7 @@ static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
close(dpio_dev->epoll_fd);
dpio_dev->epoll_fd = -1;
}
+ dpio_dev->intr_enabled = 0;
}
#endif
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
index 328e1e788a..10dd968e5f 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
@@ -50,6 +50,10 @@ int dpaa2_affine_qbman_swp(void);
__rte_internal
int dpaa2_affine_qbman_ethrx_swp(void);
+/* set up a DPIO portal's DQRI interrupt (rx-queue interrupt mode) */
+__rte_internal
+int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll);
+
/* allocate memory for FQ - dq storage */
__rte_internal
int
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index 79a2ec41e3..af75e96b27 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -133,6 +133,8 @@ struct dpaa2_dpio_dev {
struct rte_intr_handle *intr_handle; /* Interrupt related info */
int32_t epoll_fd; /**< File descriptor created for interrupt polling */
int32_t hw_id; /**< An unique ID of this DPIO device instance */
+ uint8_t intr_enabled; /**< DQRI portal interrupt already set up */
+ uint16_t ethrx_intr_refcnt; /**< rx queues currently armed on this portal */
struct dpaa2_portal_dqrr dpaa2_held_bufs;
};
@@ -164,6 +166,20 @@ typedef void (dpaa2_queue_cb_dqrr_t)(struct qbman_swp *swp,
typedef void (dpaa2_queue_cb_eqresp_free_t)(uint16_t eqresp_ci,
struct dpaa2_queue *dpaa2_q);
+#define DPAA2_NAPI_FD_STASH_SIZE 64 /*!< power of 2; >= 2x rx burst so the
+ * peer port's frames fit before HW
+ * backpressure (2 ports/worker)
+ */
+
+/* Lcore-local FIFO of raw FDs demuxed to this queue by another queue's burst
+ * on the same portal (see dpaa2_queue::napi_stash).
+ */
+struct dpaa2_napi_stash {
+ uint16_t head; /*!< pop index (drain) */
+ uint16_t tail; /*!< push index (park) */
+ struct qbman_fd fd[DPAA2_NAPI_FD_STASH_SIZE];
+};
+
struct __rte_cache_aligned dpaa2_queue {
struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
union {
@@ -176,7 +192,7 @@ struct __rte_cache_aligned dpaa2_queue {
uint8_t cgid; /*! < Congestion Group id for this queue */
uint64_t rx_pkts;
uint64_t tx_pkts;
- uint64_t err_pkts;
+ uint64_t err_pkts; /*!< also counts NAPI stash-full drops (imissed) */
union {
/**Ingress*/
struct queue_storage_info_t *q_storage[RTE_MAX_LCORE];
@@ -195,6 +211,15 @@ struct __rte_cache_aligned dpaa2_queue {
uint64_t offloads;
uint64_t lpbk_cntx;
uint8_t data_stashing_off;
+ /* NAPI rx-interrupt: per-queue DPCON bound to this FQ at dev_start
+ * (DEST_DPCON, static); the polling worker subscribes its ethrx portal
+ * to the channel and arms the DQRI, rx_dqrr drains+demuxes by fqd_ctx.
+ */
+ struct dpaa2_dpcon_dev *napi_dpcon; /*!< notif channel, NULL = napi off */
+ RTE_ATOMIC(struct dpaa2_dpio_dev *) napi_sub_dpio; /*!< subscribed portal or NULL */
+ uint8_t napi_channel_index; /*!< portal-local static-dequeue idx */
+ uint8_t napi_armed; /*!< this queue requests DQRI wakeups */
+ struct dpaa2_napi_stash napi_stash; /*!< NAPI/DQRR demux FDs (~2 KB) */
};
struct swp_active_dqs {
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
index 5375ea386d..bb8bd86103 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -189,6 +189,7 @@ int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p);
* @p: the given software portal object.
* @mask: The value to set in SWP_IIR register.
*/
+__rte_internal
void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit);
/************/
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index 84853924e7..947415363a 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -448,6 +448,7 @@ int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p)
return qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_IIR);
}
+RTE_EXPORT_INTERNAL_SYMBOL(qbman_swp_interrupt_set_inhibit)
void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit)
{
qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR,
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 803a8321e0..61e7c820de 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -623,6 +623,8 @@ dpaa2_clear_queue_active_dps(struct dpaa2_queue *q, int num_lcores)
}
}
+static void dpaa2_dev_rx_queue_intr_unbind(struct dpaa2_queue *dpaa2_q);
+
static void
dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev)
{
@@ -640,6 +642,12 @@ dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev)
/* cleaning up queue storage */
for (i = 0; i < priv->nb_rx_queues; i++) {
dpaa2_q = priv->rx_vq[i];
+ if (dpaa2_q->napi_dpcon) { /* release the rx-intr channel */
+ dpaa2_dev_rx_queue_intr_unbind(dpaa2_q);
+ rte_dpaa2_free_dpcon_dev(dpaa2_q->napi_dpcon);
+ dpaa2_q->napi_dpcon = NULL;
+ dpaa2_q->napi_sub_dpio = NULL;
+ }
dpaa2_clear_queue_active_dps(dpaa2_q,
RTE_MAX_LCORE);
dpaa2_queue_storage_free(dpaa2_q,
@@ -845,6 +853,19 @@ dpaa2_eth_dev_configure(struct rte_eth_dev *dev)
}
}
+ if (dev->data->dev_conf.intr_conf.rxq) {
+ if (!dev->intr_handle)
+ dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
+ if (!dev->intr_handle ||
+ rte_intr_vec_list_alloc(dev->intr_handle, "rxq_intr",
+ dev->data->nb_rx_queues) ||
+ rte_intr_nb_efd_set(dev->intr_handle, dev->data->nb_rx_queues) ||
+ rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_EXT)) {
+ DPAA2_PMD_ERR("Failed to set up rx-queue interrupts");
+ return -rte_errno;
+ }
+ }
+
dpaa2_tm_init(dev);
return 0;
@@ -863,6 +884,7 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
{
struct dpaa2_dev_priv *priv = dev->data->dev_private;
struct fsl_mc_io *dpni = dev->process_private;
+ bool dpcon_allocated = false;
struct dpaa2_queue *dpaa2_q;
struct dpni_queue cfg;
uint8_t options = 0;
@@ -903,6 +925,21 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
dpaa2_q->bp_array = rte_dpaa2_bpid_info;
dpaa2_q->offloads = rx_conf->offloads;
+ /* NAPI: grab a DPCON channel so dev_start can bind this FQ statically.
+ * The DQRR burst replaces the poll path for every queue at once, so a
+ * missing channel is fatal rather than a silent per-queue fallback.
+ */
+ dpaa2_q->napi_sub_dpio = NULL;
+ if (dev->data->dev_conf.intr_conf.rxq && !dpaa2_q->napi_dpcon) {
+ dpaa2_q->napi_dpcon = rte_dpaa2_alloc_dpcon_dev();
+ if (!dpaa2_q->napi_dpcon) {
+ DPAA2_PMD_ERR("rxq %d: no DPCON for rx-queue interrupts",
+ rx_queue_id);
+ return -ENODEV;
+ }
+ dpcon_allocated = true;
+ }
+
/*Get the flow id from given VQ id*/
flow_id = dpaa2_q->flow_id;
memset(&cfg, 0, sizeof(struct dpni_queue));
@@ -910,6 +947,10 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
options = options | DPNI_QUEUE_OPT_USER_CTX;
cfg.user_context = (size_t)(dpaa2_q);
+ /* clear any stale DPIO dest left scheduled by a prior rx-intr run */
+ options |= DPNI_QUEUE_OPT_DEST;
+ cfg.destination.type = DPNI_DEST_NONE;
+
/* check if a private cgr available. */
for (i = 0; i < priv->max_cgs; i++) {
if (!priv->cgid_in_use[i]) {
@@ -950,7 +991,7 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
dpaa2_q->tc_index, flow_id, options, &cfg);
if (ret) {
DPAA2_PMD_ERR("Error in setting the rx flow: = %d", ret);
- return ret;
+ goto err_free_dpcon;
}
dpaa2_q->nb_desc = nb_rx_desc;
@@ -991,7 +1032,7 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
if (ret) {
DPAA2_PMD_ERR("Error in setting taildrop. err=(%d)",
ret);
- return ret;
+ goto err_free_dpcon;
}
} else { /* Disable tail Drop */
struct dpni_taildrop taildrop = {0};
@@ -1011,12 +1052,22 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
if (ret) {
DPAA2_PMD_ERR("Error in setting taildrop. err=(%d)",
ret);
- return ret;
+ goto err_free_dpcon;
}
}
dev->data->rx_queues[rx_queue_id] = dpaa2_q;
return 0;
+
+err_free_dpcon:
+ /* free only the DPCON this call allocated; a pre-existing one belongs to
+ * an earlier setup and is released at dev_close
+ */
+ if (dpcon_allocated) {
+ rte_dpaa2_free_dpcon_dev(dpaa2_q->napi_dpcon);
+ dpaa2_q->napi_dpcon = NULL;
+ }
+ return ret;
}
static int
@@ -1175,6 +1226,62 @@ dpaa2_dev_tx_queue_setup(struct rte_eth_dev *dev,
return 0;
}
+/* Fully release a queue's rx-interrupt state: detach the FQ from its DPCON,
+ * unbind the static dequeue channel from the portal and free any stashed FDs.
+ * Teardown only: the port is stopped and the portal quiesced; not a runtime
+ * rx_queue_intr_disable() replacement. Call before freeing the DPCON.
+ */
+static void
+dpaa2_dev_rx_queue_intr_unbind(struct dpaa2_queue *dpaa2_q)
+{
+ struct dpaa2_dev_priv *priv;
+ struct dpaa2_dpio_dev *dpio;
+ struct fsl_mc_io *dpni;
+ struct dpni_queue cfg;
+ int ret;
+
+ if (!dpaa2_q || !dpaa2_q->napi_dpcon)
+ return;
+
+ /* detach the FQ from its DPCON so it no longer points at a channel
+ * about to be returned to the pool (dpni is disabled at teardown)
+ */
+ priv = dpaa2_q->eth_data->dev_private;
+ dpni = priv->eth_dev->process_private;
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.destination.type = DPNI_DEST_NONE;
+ ret = dpni_set_queue(dpni, CMD_PRI_LOW, priv->token, DPNI_QUEUE_RX,
+ dpaa2_q->tc_index, dpaa2_q->flow_id,
+ DPNI_QUEUE_OPT_DEST, &cfg);
+ if (ret)
+ DPAA2_PMD_ERR("napi: DEST_NONE rxq flow %u: %d",
+ dpaa2_q->flow_id, ret);
+
+ /* unbind the static dequeue channel from the portal it was armed on */
+ dpio = rte_atomic_load_explicit(&dpaa2_q->napi_sub_dpio,
+ rte_memory_order_acquire);
+ if (dpio) {
+ qbman_swp_push_set(dpio->sw_portal,
+ dpaa2_q->napi_channel_index, 0);
+ if (dpaa2_q->napi_armed) {
+ dpaa2_q->napi_armed = 0;
+ if (dpio->ethrx_intr_refcnt > 0 &&
+ --dpio->ethrx_intr_refcnt == 0)
+ qbman_swp_interrupt_set_inhibit(dpio->sw_portal, 1);
+ }
+ ret = dpio_remove_static_dequeue_channel(dpio->dpio, CMD_PRI_LOW,
+ dpio->token, dpaa2_q->napi_dpcon->dpcon_id);
+ if (ret)
+ DPAA2_PMD_ERR("napi: remove DPCON %d static dequeue channel: %d",
+ dpaa2_q->napi_dpcon->dpcon_id, ret);
+ rte_atomic_store_explicit(&dpaa2_q->napi_sub_dpio, NULL,
+ rte_memory_order_release);
+ }
+
+ /* free FDs parked for this queue but never drained by a burst */
+ dpaa2_dev_rx_queue_napi_stash_drain(dpaa2_q);
+}
+
static void
dpaa2_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
@@ -1204,6 +1311,12 @@ dpaa2_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t rx_queue_id)
priv->cgid_in_use[dpaa2_q->cgid] = 0;
dpaa2_q->cgid = DPAA2_INVALID_CGID;
}
+
+ if (dpaa2_q->napi_dpcon) {
+ dpaa2_dev_rx_queue_intr_unbind(dpaa2_q);
+ rte_dpaa2_free_dpcon_dev(dpaa2_q->napi_dpcon);
+ dpaa2_q->napi_dpcon = NULL;
+ }
}
static int
@@ -1354,6 +1467,36 @@ dpaa2_dev_start(struct rte_eth_dev *dev)
intr_handle = dpaa2_dev->intr_handle;
PMD_INIT_FUNC_TRACE();
+
+ /* NAPI: bind each rx FQ to its own DPCON channel while the dpni is still
+ * disabled (a DEST set_queue on an enabled dpni wedges the shared MC).
+ * Static, affinity-free; the polling worker subscribes its portal later.
+ */
+ if (dev->data->dev_conf.intr_conf.rxq) {
+ for (i = 0; i < data->nb_rx_queues; i++) {
+ dpaa2_q = data->rx_queues[i];
+ if (!dpaa2_q->napi_dpcon)
+ continue;
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.destination.type = DPNI_DEST_DPCON;
+ cfg.destination.id = dpaa2_q->napi_dpcon->dpcon_id;
+ cfg.user_context = (size_t)dpaa2_q;
+ ret = dpni_set_queue(dpni, CMD_PRI_LOW, priv->token,
+ DPNI_QUEUE_RX, dpaa2_q->tc_index,
+ dpaa2_q->flow_id,
+ DPNI_QUEUE_OPT_DEST | DPNI_QUEUE_OPT_USER_CTX,
+ &cfg);
+ if (ret) {
+ DPAA2_PMD_ERR("napi: DPCON bind rxq %d: %d", i, ret);
+ return ret;
+ }
+ }
+ /* DQRR burst for all queues; a queue only yields frames once
+ * rx_queue_intr_enable() has subscribed its portal
+ */
+ dev->rx_pkt_burst = dpaa2_dev_rx_dqrr;
+ }
+
ret = dpni_enable(dpni, CMD_PRI_LOW, priv->token);
if (ret) {
DPAA2_PMD_ERR("Failure in enabling dpni %d device: err=%d",
@@ -1824,6 +1967,13 @@ dpaa2_dev_stats_get(struct rte_eth_dev *dev,
stats->oerrors = value.page_2.egress_discarded_frames;
stats->imissed = value.page_2.ingress_nobuffer_discards;
+ /* software Rx drops (full napi stash) are not in the HW counters */
+ for (i = 0; i < priv->nb_rx_queues; i++) {
+ dpaa2_rxq = priv->rx_vq[i];
+ if (dpaa2_rxq != NULL)
+ stats->imissed += dpaa2_rxq->err_pkts;
+ }
+
/* Fill in per queue stats */
if (qstats != NULL) {
for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
@@ -2137,8 +2287,10 @@ dpaa2_dev_stats_reset(struct rte_eth_dev *dev)
/* Reset the per queue stats in dpaa2_queue structure */
for (i = 0; i < priv->nb_rx_queues; i++) {
dpaa2_q = priv->rx_vq[i];
- if (dpaa2_q)
+ if (dpaa2_q) {
dpaa2_q->rx_pkts = 0;
+ dpaa2_q->err_pkts = 0;
+ }
}
for (i = 0; i < priv->nb_tx_queues; i++) {
@@ -2698,6 +2850,135 @@ rte_pmd_dpaa2_thread_init(void)
}
}
+/* Arm rx-queue interrupts on the worker lcore: subscribe its ethrx portal to
+ * the queue's DPCON channel (one-shot per-portal MC) and unmask the portal DQRI
+ * (pure QBMan).
+ *
+ * Affinity is static queue-to-lcore; a lcore may own several rx queues. The
+ * DQRI and the eventfd are portal-wide, so frames are demuxed by fqd_ctx in the
+ * burst and the portal's inhibit bit is reference-counted by the number of its
+ * queues currently armed (ethrx_intr_refcnt) -- disabling one queue must not
+ * mask wakeups still wanted by its siblings. napi_armed and ethrx_intr_refcnt
+ * are plain (not atomic): these ops run on the queue's owner lcore against its
+ * own portal (one portal per lcore), so per-portal isolation keeps them from
+ * racing, not control-plane serialization.
+ *
+ * A re-home reclaims the channel by poking the old portal, so the caller must
+ * have quiesced the previous owner and disabled the queue there; napi_armed is
+ * then 0 and only the new portal is counted.
+ */
+static int
+dpaa2_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct dpaa2_dev_priv *priv = dev->data->dev_private;
+ struct dpaa2_queue *dpaa2_q = priv->rx_vq[queue_id];
+ struct dpaa2_dpio_dev *dpio, *old;
+ int ret;
+
+ if (!dpaa2_q->napi_dpcon)
+ return -ENOTSUP; /* no channel -> caller keeps polling */
+
+ if (dpaa2_affine_qbman_ethrx_swp())
+ return -EIO;
+ dpio = DPAA2_PER_LCORE_ETHRX_DPIO;
+
+ /* build_epoll=false: the generic ethdev rx-intr API waits on the
+ * application epoll, not the portal's private one (event PMD only).
+ */
+ ret = dpaa2_dpio_intr_init(dpio, false); /* VFIO eventfd, no MC */
+ if (ret)
+ return ret;
+
+ old = rte_atomic_load_explicit(&dpaa2_q->napi_sub_dpio, rte_memory_order_acquire);
+ if (old && old != dpio && dpaa2_q->napi_armed) {
+ DPAA2_PMD_ERR("rxq %d still armed on another portal; disable it first",
+ queue_id);
+ return -EBUSY;
+ }
+ if (old != dpio) {
+ if (old) { /* reclaim from old portal (quiesced; QBMan MMIO unsynced) */
+ qbman_swp_push_set(old->sw_portal,
+ dpaa2_q->napi_channel_index, 0);
+ ret = dpio_remove_static_dequeue_channel(old->dpio,
+ CMD_PRI_LOW, old->token,
+ dpaa2_q->napi_dpcon->dpcon_id);
+ /* push_set(0) above already stops the old portal from
+ * dequeuing; a failed unbind only leaks a static-channel
+ * slot on the old DPIO, so warn and proceed
+ */
+ if (ret)
+ DPAA2_PMD_WARN("napi: reclaim rxq %d: %d",
+ queue_id, ret);
+ /* on no portal until the add below succeeds */
+ rte_atomic_store_explicit(&dpaa2_q->napi_sub_dpio, NULL,
+ rte_memory_order_release);
+ }
+ ret = dpio_add_static_dequeue_channel(dpio->dpio, CMD_PRI_LOW,
+ dpio->token, dpaa2_q->napi_dpcon->dpcon_id,
+ &dpaa2_q->napi_channel_index);
+ if (ret) {
+ DPAA2_PMD_ERR("napi: subscribe rxq %d: %d", queue_id, ret);
+ return ret;
+ }
+ qbman_swp_push_set(dpio->sw_portal,
+ dpaa2_q->napi_channel_index, 1);
+ /* point this queue's eventfd at the portal's DQRI fd so the
+ * generic rte_eth_dev_rx_intr_ctl_q epoll wakes on it
+ */
+ if (rte_intr_vec_list_index_set(dev->intr_handle, queue_id, queue_id) ||
+ rte_intr_efds_index_set(dev->intr_handle, queue_id,
+ rte_intr_fd_get(dpio->intr_handle))) {
+ DPAA2_PMD_ERR("napi: efd wiring rxq %d", queue_id);
+ /* unwind the half-done subscription so HW and driver
+ * state stay consistent
+ */
+ qbman_swp_push_set(dpio->sw_portal,
+ dpaa2_q->napi_channel_index, 0);
+ dpio_remove_static_dequeue_channel(dpio->dpio,
+ CMD_PRI_LOW, dpio->token,
+ dpaa2_q->napi_dpcon->dpcon_id);
+ return -EIO;
+ }
+ rte_atomic_store_explicit(&dpaa2_q->napi_sub_dpio, dpio, rte_memory_order_release);
+ }
+
+ /* arm this queue; the portal DQRI is unmasked only on the 0 -> 1 edge
+ * of its armed-queue count
+ */
+ if (!dpaa2_q->napi_armed) {
+ dpaa2_q->napi_armed = 1;
+ if (dpio->ethrx_intr_refcnt++ == 0) {
+ qbman_swp_interrupt_clear_status(dpio->sw_portal,
+ 0xffffffff);
+ qbman_swp_interrupt_set_inhibit(dpio->sw_portal, 0);
+ }
+ }
+
+ return 0;
+}
+
+/* Disarm rx-queue interrupts for this queue. The portal DQRI is masked only
+ * once the last of its queues disarms; act on the portal the queue is actually
+ * subscribed to, not the caller's current portal.
+ */
+static int
+dpaa2_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+ struct dpaa2_dev_priv *priv = dev->data->dev_private;
+ struct dpaa2_queue *dpaa2_q = priv->rx_vq[queue_id];
+ struct dpaa2_dpio_dev *dpio;
+
+ dpio = rte_atomic_load_explicit(&dpaa2_q->napi_sub_dpio, rte_memory_order_acquire);
+ if (dpio && dpaa2_q->napi_armed) {
+ dpaa2_q->napi_armed = 0;
+ if (dpio->ethrx_intr_refcnt > 0 &&
+ --dpio->ethrx_intr_refcnt == 0)
+ qbman_swp_interrupt_set_inhibit(dpio->sw_portal, 1);
+ }
+
+ return 0;
+}
+
static struct eth_dev_ops dpaa2_ethdev_ops = {
.dev_configure = dpaa2_eth_dev_configure,
.dev_start = dpaa2_dev_start,
@@ -2726,6 +3007,8 @@ static struct eth_dev_ops dpaa2_ethdev_ops = {
.vlan_tpid_set = dpaa2_vlan_tpid_set,
.rx_queue_setup = dpaa2_dev_rx_queue_setup,
.rx_queue_release = dpaa2_dev_rx_queue_release,
+ .rx_queue_intr_enable = dpaa2_dev_rx_queue_intr_enable,
+ .rx_queue_intr_disable = dpaa2_dev_rx_queue_intr_disable,
.tx_queue_setup = dpaa2_dev_tx_queue_setup,
.rx_burst_mode_get = dpaa2_dev_rx_burst_mode_get,
.tx_burst_mode_get = dpaa2_dev_tx_burst_mode_get,
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.h b/drivers/net/dpaa2/dpaa2_ethdev.h
index 4da47a543a..3765f79e84 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.h
+++ b/drivers/net/dpaa2/dpaa2_ethdev.h
@@ -491,6 +491,9 @@ uint16_t dpaa2_dev_loopback_rx(void *queue, struct rte_mbuf **bufs,
uint16_t dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts);
+uint16_t dpaa2_dev_rx_dqrr(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts);
+void dpaa2_dev_rx_queue_napi_stash_drain(struct dpaa2_queue *dpaa2_q);
void dpaa2_dev_process_parallel_event(struct qbman_swp *swp,
const struct qbman_fd *fd,
const struct qbman_result *dq,
diff --git a/drivers/net/dpaa2/dpaa2_rxtx.c b/drivers/net/dpaa2/dpaa2_rxtx.c
index b316e23e87..189accc1de 100644
--- a/drivers/net/dpaa2/dpaa2_rxtx.c
+++ b/drivers/net/dpaa2/dpaa2_rxtx.c
@@ -922,6 +922,128 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
return num_rx;
}
+/* Convert a DQRR'd FD (single or scatter-gather) to an mbuf and apply software
+ * VLAN strip, like the poll path.
+ */
+static inline struct rte_mbuf *
+dpaa2_dqrr_fd_to_mbuf(const struct qbman_fd *fd,
+ struct rte_eth_dev_data *eth_data)
+{
+ struct rte_mbuf *m;
+
+ if (unlikely(DPAA2_FD_GET_FORMAT(fd) == qbman_fd_sg))
+ m = eth_sg_fd_to_mbuf(fd, eth_data->port_id);
+ else
+ m = eth_fd_to_mbuf(fd, eth_data->port_id);
+ if (eth_data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
+ rte_vlan_strip(m);
+ return m;
+}
+
+/* prefetch a DQRR'd FD's HW annotation (parse area) ahead of conversion */
+static inline void
+dpaa2_dqrr_prefetch_annot(const struct qbman_fd *fd)
+{
+ rte_prefetch0((void *)((size_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd))
+ + DPAA2_FD_PTA_SIZE));
+}
+
+/* Free FDs a sibling burst parked in this queue's stash but that were never
+ * drained (queue released/freed while the lcore still held its frames).
+ */
+void
+dpaa2_dev_rx_queue_napi_stash_drain(struct dpaa2_queue *dpaa2_q)
+{
+ struct dpaa2_napi_stash *stash = &dpaa2_q->napi_stash;
+ const struct qbman_fd *fd;
+
+ while (stash->head != stash->tail) {
+ fd = &stash->fd[stash->head & (DPAA2_NAPI_FD_STASH_SIZE - 1)];
+ rte_pktmbuf_free(dpaa2_dqrr_fd_to_mbuf(fd, dpaa2_q->eth_data));
+ stash->head++;
+ }
+ stash->head = 0;
+ stash->tail = 0;
+}
+
+/* rx interrupt/DQRR path: the FQ is scheduled to a channel the lcore's ethrx
+ * portal statically dequeues -- a VDQ on a scheduled FQ never completes, so DQRR
+ * is the only model compatible with interrupt sleep. One portal serves every
+ * queue the lcore owns, so the burst demuxes by fqd_ctx: own frames are
+ * returned, foreign ones have their raw FD parked in the target queue's stash.
+ *
+ * The application must therefore poll all queues assigned to the lcore after a
+ * wakeup -- the same scheduling contract as plain DPDK polling. When a foreign
+ * queue's stash is full the FD is dropped (freed) rather than left on the shared
+ * DQRR ring, which would head-of-line block every other queue on the portal.
+ */
+uint16_t __rte_hot
+dpaa2_dev_rx_dqrr(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct dpaa2_queue *dpaa2_q = queue;
+ struct rte_eth_dev_data *eth_data = dpaa2_q->eth_data;
+ struct dpaa2_napi_stash *stash = &dpaa2_q->napi_stash;
+ const struct qbman_result *dq;
+ const struct qbman_fd *fd;
+ struct dpaa2_queue *rxq;
+ struct qbman_swp *swp;
+ uint16_t num_rx = 0;
+
+ if (unlikely(!DPAA2_PER_LCORE_ETHRX_DPIO)) {
+ if (dpaa2_affine_qbman_ethrx_swp()) {
+ DPAA2_PMD_ERR("Failure in affining portal");
+ return 0;
+ }
+ }
+ swp = DPAA2_PER_LCORE_ETHRX_PORTAL;
+
+ /* our frames parked by another queue's burst -- convert now (hot) */
+ while (num_rx < nb_pkts && stash->head != stash->tail) {
+ fd = &stash->fd[stash->head & (DPAA2_NAPI_FD_STASH_SIZE - 1)];
+ if (dpaa2_svr_family != SVR_LX2160A &&
+ (uint16_t)(stash->head + 1) != stash->tail)
+ dpaa2_dqrr_prefetch_annot(&stash->fd[(stash->head + 1) &
+ (DPAA2_NAPI_FD_STASH_SIZE - 1)]);
+ bufs[num_rx++] = dpaa2_dqrr_fd_to_mbuf(fd, eth_data);
+ stash->head++;
+ }
+
+ while (num_rx < nb_pkts) {
+ dq = qbman_swp_dqrr_next(swp);
+ if (!dq)
+ break; /* ring momentarily empty */
+ qbman_swp_prefetch_dqrr_next(swp);
+ fd = qbman_result_DQ_fd(dq);
+ /* parse summary is in the FRC on LX2160A; annotation is HW-stashed */
+ if (dpaa2_svr_family != SVR_LX2160A)
+ dpaa2_dqrr_prefetch_annot(fd);
+ rxq = (struct dpaa2_queue *)(size_t)qbman_result_DQ_fqd_ctx(dq);
+ if (unlikely(!rxq))
+ rxq = dpaa2_q;
+ if (rxq == dpaa2_q) {
+ bufs[num_rx++] = dpaa2_dqrr_fd_to_mbuf(fd, eth_data);
+ } else {
+ struct dpaa2_napi_stash *fs = &rxq->napi_stash;
+
+ if (unlikely((uint16_t)(fs->tail - fs->head) >=
+ DPAA2_NAPI_FD_STASH_SIZE)) {
+ /* stash full: drop rather than leave it on the ring
+ * and head-of-line block the shared portal
+ */
+ rte_pktmbuf_free(dpaa2_dqrr_fd_to_mbuf(fd, rxq->eth_data));
+ rxq->err_pkts++;
+ } else {
+ fs->fd[fs->tail & (DPAA2_NAPI_FD_STASH_SIZE - 1)] = *fd;
+ fs->tail++;
+ }
+ }
+ qbman_swp_dqrr_consume(swp, dq);
+ }
+
+ dpaa2_q->rx_pkts += num_rx;
+ return num_rx;
+}
+
void __rte_hot
dpaa2_dev_process_parallel_event(struct qbman_swp *swp,
const struct qbman_fd *fd,
--
2.43.0
^ permalink raw reply related
* [PATCH v2 2/6] bus/fslmc/dpio: make the portal DQRI epoll optional
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616102727.708948-1-maxime@leroys.fr>
dpaa2_dpio_intr_init() builds a private epoll instance the event PMD
sleeps on. The upcoming net rx-queue-interrupt path waits on the
application's own epoll instead, so that instance would be built but
never used.
Add a build_epoll parameter: pass true to build it (event PMD), false
to skip the epoll_create/epoll_ctl. epoll_fd is set to -1 when none is
built and closed in intr_deinit only when valid. The sole caller passes
true: no functional change.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/bus/fslmc/portal/dpaa2_hw_dpio.c | 44 +++++++++++++++++-------
1 file changed, 32 insertions(+), 12 deletions(-)
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 2a9e519668..3a5abb2e6d 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -205,13 +205,12 @@ dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
fclose(file);
}
-static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev)
+static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, bool build_epoll)
{
struct epoll_event epoll_ev;
int eventfd, dpio_epoll_fd, ret;
int threshold = 0x3, timeout = 0xFF;
- dpio_epoll_fd = epoll_create(1);
ret = rte_dpaa2_intr_enable(dpio_dev->intr_handle, 0);
if (ret) {
DPAA2_BUS_ERR("Interrupt registration failed");
@@ -231,16 +230,34 @@ static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev)
qbman_swp_dqrr_thrshld_write(dpio_dev->sw_portal, threshold);
qbman_swp_intr_timeout_write(dpio_dev->sw_portal, timeout);
- eventfd = rte_intr_fd_get(dpio_dev->intr_handle);
- epoll_ev.events = EPOLLIN | EPOLLPRI | EPOLLET;
- epoll_ev.data.fd = eventfd;
+ dpio_dev->epoll_fd = -1;
- ret = epoll_ctl(dpio_epoll_fd, EPOLL_CTL_ADD, eventfd, &epoll_ev);
- if (ret < 0) {
- DPAA2_BUS_ERR("epoll_ctl failed");
- return -1;
+ /* The event PMD dequeues by sleeping on a private epoll instance owned
+ * by the portal, so build it here. A caller that waits on another
+ * epoll (the net rx-queue-interrupt path uses the application's) skips
+ * this.
+ */
+ if (build_epoll) {
+ dpio_epoll_fd = epoll_create(1);
+ if (dpio_epoll_fd < 0) {
+ DPAA2_BUS_ERR("epoll_create failed");
+ rte_dpaa2_intr_disable(dpio_dev->intr_handle, 0);
+ return -1;
+ }
+
+ eventfd = rte_intr_fd_get(dpio_dev->intr_handle);
+ epoll_ev.events = EPOLLIN | EPOLLPRI | EPOLLET;
+ epoll_ev.data.fd = eventfd;
+
+ ret = epoll_ctl(dpio_epoll_fd, EPOLL_CTL_ADD, eventfd, &epoll_ev);
+ if (ret < 0) {
+ DPAA2_BUS_ERR("epoll_ctl failed");
+ rte_dpaa2_intr_disable(dpio_dev->intr_handle, 0);
+ close(dpio_epoll_fd);
+ return -1;
+ }
+ dpio_dev->epoll_fd = dpio_epoll_fd;
}
- dpio_dev->epoll_fd = dpio_epoll_fd;
return 0;
}
@@ -253,7 +270,10 @@ static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
if (ret)
DPAA2_BUS_ERR("DPIO interrupt disable failed");
- close(dpio_dev->epoll_fd);
+ if (dpio_dev->epoll_fd >= 0) {
+ close(dpio_dev->epoll_fd);
+ dpio_dev->epoll_fd = -1;
+ }
}
#endif
@@ -277,7 +297,7 @@ dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
}
#ifdef RTE_EVENT_DPAA2
- if (dpaa2_dpio_intr_init(dpio_dev)) {
+ if (dpaa2_dpio_intr_init(dpio_dev, true)) {
DPAA2_BUS_ERR("Interrupt registration failed for dpio");
return -1;
}
--
2.43.0
^ permalink raw reply related
* [PATCH v2 1/6] bus/fslmc: move DPCON management from event driver to bus
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy, Hemant Agrawal, Sachin Saxena
In-Reply-To: <20260616102727.708948-1-maxime@leroys.fr>
The DPCON allocation helpers (rte_dpaa2_alloc_dpcon_dev /
rte_dpaa2_free_dpcon_dev) lived in the event driver, but a notification
channel is a generic QBMan resource. Move dpaa2_hw_dpcon.c to the fslmc
bus and export the helpers as internal symbols so both the event PMD and
the net driver's rx-queue interrupt path can draw channels from the same
pool. No functional change.
Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
drivers/bus/fslmc/meson.build | 1 +
.../dpaa2 => bus/fslmc/portal}/dpaa2_hw_dpcon.c | 16 +++++++---------
drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 8 ++++++++
drivers/event/dpaa2/dpaa2_eventdev.h | 5 +++--
drivers/event/dpaa2/meson.build | 1 -
5 files changed, 19 insertions(+), 12 deletions(-)
rename drivers/{event/dpaa2 => bus/fslmc/portal}/dpaa2_hw_dpcon.c (90%)
diff --git a/drivers/bus/fslmc/meson.build b/drivers/bus/fslmc/meson.build
index ceae1c6c11..50d9e91a37 100644
--- a/drivers/bus/fslmc/meson.build
+++ b/drivers/bus/fslmc/meson.build
@@ -22,6 +22,7 @@ sources = files(
'mc/mc_sys.c',
'portal/dpaa2_hw_dpbp.c',
'portal/dpaa2_hw_dpci.c',
+ 'portal/dpaa2_hw_dpcon.c',
'portal/dpaa2_hw_dpio.c',
'portal/dpaa2_hw_dprc.c',
'qbman/qbman_portal.c',
diff --git a/drivers/event/dpaa2/dpaa2_hw_dpcon.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpcon.c
similarity index 90%
rename from drivers/event/dpaa2/dpaa2_hw_dpcon.c
rename to drivers/bus/fslmc/portal/dpaa2_hw_dpcon.c
index ea5b0d4b85..6fd96ec0b9 100644
--- a/drivers/event/dpaa2/dpaa2_hw_dpcon.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpcon.c
@@ -18,13 +18,12 @@
#include <rte_cycles.h>
#include <rte_kvargs.h>
#include <dev_driver.h>
-#include <ethdev_driver.h>
+#include <eal_export.h>
#include <bus_fslmc_driver.h>
#include <mc/fsl_dpcon.h>
#include <portal/dpaa2_hw_pvt.h>
-#include "dpaa2_eventdev.h"
-#include "dpaa2_eventdev_logs.h"
+#include <fslmc_logs.h>
TAILQ_HEAD(dpcon_dev_list, dpaa2_dpcon_dev);
static struct dpcon_dev_list dpcon_dev_list
@@ -55,8 +54,7 @@ rte_dpaa2_create_dpcon_device(int dev_fd __rte_unused,
/* Allocate DPAA2 dpcon handle */
dpcon_node = rte_malloc(NULL, sizeof(struct dpaa2_dpcon_dev), 0);
if (!dpcon_node) {
- DPAA2_EVENTDEV_ERR(
- "Memory allocation failed for dpcon device");
+ DPAA2_BUS_ERR("Memory allocation failed for dpcon device");
return -1;
}
@@ -65,8 +63,7 @@ rte_dpaa2_create_dpcon_device(int dev_fd __rte_unused,
ret = dpcon_open(&dpcon_node->dpcon,
CMD_PRI_LOW, dpcon_id, &dpcon_node->token);
if (ret) {
- DPAA2_EVENTDEV_ERR("Unable to open dpcon device: err(%d)",
- ret);
+ DPAA2_BUS_ERR("Unable to open dpcon device: err(%d)", ret);
rte_free(dpcon_node);
return -1;
}
@@ -75,8 +72,7 @@ rte_dpaa2_create_dpcon_device(int dev_fd __rte_unused,
ret = dpcon_get_attributes(&dpcon_node->dpcon,
CMD_PRI_LOW, dpcon_node->token, &attr);
if (ret != 0) {
- DPAA2_EVENTDEV_ERR("dpcon attribute fetch failed: err(%d)",
- ret);
+ DPAA2_BUS_ERR("dpcon attribute fetch failed: err(%d)", ret);
rte_free(dpcon_node);
return -1;
}
@@ -92,6 +88,7 @@ rte_dpaa2_create_dpcon_device(int dev_fd __rte_unused,
return 0;
}
+RTE_EXPORT_INTERNAL_SYMBOL(rte_dpaa2_alloc_dpcon_dev)
struct dpaa2_dpcon_dev *rte_dpaa2_alloc_dpcon_dev(void)
{
struct dpaa2_dpcon_dev *dpcon_dev = NULL;
@@ -105,6 +102,7 @@ struct dpaa2_dpcon_dev *rte_dpaa2_alloc_dpcon_dev(void)
return dpcon_dev;
}
+RTE_EXPORT_INTERNAL_SYMBOL(rte_dpaa2_free_dpcon_dev)
void rte_dpaa2_free_dpcon_dev(struct dpaa2_dpcon_dev *dpcon)
{
struct dpaa2_dpcon_dev *dpcon_dev = NULL;
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index e625a5c035..79a2ec41e3 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -274,6 +274,14 @@ struct dpaa2_dpcon_dev {
uint8_t channel_index;
};
+/* DPCON channel allocation -- managed by the fslmc bus so both the net
+ * NAPI/DQRR rx path and the event PMD can grab channels.
+ */
+__rte_internal
+struct dpaa2_dpcon_dev *rte_dpaa2_alloc_dpcon_dev(void);
+__rte_internal
+void rte_dpaa2_free_dpcon_dev(struct dpaa2_dpcon_dev *dpcon);
+
/* Refer to Table 7-3 in SEC BG */
#define QBMAN_FLE_WORD4_FMT_SBF 0x0 /* Single buffer frame */
#define QBMAN_FLE_WORD4_FMT_SGE 0x2 /* Scatter gather frame */
diff --git a/drivers/event/dpaa2/dpaa2_eventdev.h b/drivers/event/dpaa2/dpaa2_eventdev.h
index bb87bdbab2..f53efce61c 100644
--- a/drivers/event/dpaa2/dpaa2_eventdev.h
+++ b/drivers/event/dpaa2/dpaa2_eventdev.h
@@ -85,8 +85,9 @@ struct dpaa2_eventdev {
uint32_t event_dev_cfg;
};
-struct dpaa2_dpcon_dev *rte_dpaa2_alloc_dpcon_dev(void);
-void rte_dpaa2_free_dpcon_dev(struct dpaa2_dpcon_dev *dpcon);
+/* rte_dpaa2_alloc_dpcon_dev()/rte_dpaa2_free_dpcon_dev() now live in the fslmc
+ * bus (portal/dpaa2_hw_pvt.h), which this header's includers already pull in.
+ */
int test_eventdev_dpaa2(void);
diff --git a/drivers/event/dpaa2/meson.build b/drivers/event/dpaa2/meson.build
index dd5063af43..62b8507652 100644
--- a/drivers/event/dpaa2/meson.build
+++ b/drivers/event/dpaa2/meson.build
@@ -7,7 +7,6 @@ if not is_linux
endif
deps += ['bus_vdev', 'net_dpaa2', 'crypto_dpaa2_sec']
sources = files(
- 'dpaa2_hw_dpcon.c',
'dpaa2_eventdev.c',
'dpaa2_eventdev_selftest.c',
)
--
2.43.0
^ permalink raw reply related
* [PATCH v2 0/6] net/dpaa2: NAPI-style Rx queue interrupts
From: Maxime Leroy @ 2026-06-16 10:27 UTC (permalink / raw)
To: dev; +Cc: Maxime Leroy
In-Reply-To: <20260611154926.392670-1-maxime@leroys.fr>
This series lets a dpaa2 worker sleep on a queue's data-availability
notification instead of busy-polling, exposed through the generic
rte_eth_dev_rx_intr_* API (NAPI-style: poll while frames keep coming,
arm the interrupt and sleep when the queue runs dry).
Why it is not a trivial .rx_queue_intr_enable
----------------------------------------------
A worker wakes on its software portal's DQRI, which fires when the
portal's DQRR holds frames. The default dpaa2 Rx burst pulls frames
from the FQ with a volatile dequeue and cannot be interrupt-driven; to
wake on the DQRI the FQ must instead be pushed to the portal's DQRR.
The natural dpni_set_queue with a notification destination would have to
target the worker's portal, but that portal is only known once a worker
affines, after dev_start, and that MC command holds the global MC lock
long enough to wedge the firmware while traffic runs. So the bind cannot
be done late, against the polling lcore.
Design
------
Each Rx FQ is bound to its own DPCON channel, statically, at dev_start
while the dpni is still disabled (no knowledge of the polling lcore). A
worker later subscribes its own ethrx portal to the channel and arms the
DQRI in rx_queue_intr_enable, a one-shot per-portal op, never the wedging
set_queue. One portal serves every queue a worker owns, so the DQRR
burst demuxes frames to their FQ by fqd_ctx; foreign frames are parked in
the target queue's stash, so the application polls all its queues after a
wakeup, the same scheduling contract as plain DPDK polling. A queue can
be re-homed to another lcore at runtime with no set_queue and no port
stop.
This reuses the event PMD's pushed/DQRR model but with one DPCON per FQ
and static affinity (no QBMan scheduling), so the DPCON allocator is
moved from the event driver to the fslmc bus and shared.
Patches 1 and 2 move the DPCON allocator to the fslmc bus and make the
portal DQRI epoll optional; patch 3 adds the interrupt support proper and
patch 4 tunes the DQRI coalescing holdoff. Patch 5 (rx_queue_count NULL on
the primary process) is a real fix the path depends on and uncovered,
tagged for stable and backportable on its own. Patch 6 (drop the software
VLAN strip) is an independent net/dpaa2 change the interrupt path does not
require.
The path also depends on two fixes sent separately: an eal change so the
shared portal eventfd does not fail with -EEXIST (already applied to main)
and the ethdev fix for fast-path ops left NULL after port stop (see
Depends-on below).
Tested on LX2160A (lx2160acex7).
Depends-on: series-38450 ("ethdev: fix fast-path ops on a stopped port")
v2:
- Dropped the RSS RETA patch, an independent net/dpaa2 change the
interrupt path does not require; it will be sent as its own series.
- Dropped the ethdev fast-path ops fix; it is now a standalone series
(Depends-on above).
- Dropped the eal/interrupts -EEXIST fix, applied to main by David Marchand.
- Declared qbman_swp_interrupt_set_inhibit and qbman_swp_dqrr_size
__rte_internal (David Marchand).
- Minor formatting cleanup in the Rx interrupt setup.
Maxime Leroy (6):
bus/fslmc: move DPCON management from event driver to bus
bus/fslmc/dpio: make the portal DQRI epoll optional
net/dpaa2: support Rx queue interrupts
bus/fslmc/dpio: tune DQRI interrupt coalescing holdoff
net/dpaa2: fix Rx queue count for primary process
net/dpaa2: drop the fake software VLAN strip offload
doc/guides/nics/dpaa2.rst | 10 +
doc/guides/nics/features/dpaa2.ini | 1 +
doc/guides/rel_notes/release_26_07.rst | 7 +
drivers/bus/fslmc/meson.build | 1 +
.../fslmc/portal}/dpaa2_hw_dpcon.c | 16 +-
drivers/bus/fslmc/portal/dpaa2_hw_dpio.c | 113 ++++--
drivers/bus/fslmc/portal/dpaa2_hw_dpio.h | 12 +
drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 35 +-
.../fslmc/qbman/include/fsl_qbman_portal.h | 11 +
drivers/bus/fslmc/qbman/qbman_portal.c | 7 +
drivers/event/dpaa2/dpaa2_eventdev.h | 5 +-
drivers/event/dpaa2/meson.build | 1 -
drivers/net/dpaa2/dpaa2_ethdev.c | 349 +++++++++++++++++-
drivers/net/dpaa2/dpaa2_ethdev.h | 10 +
drivers/net/dpaa2/dpaa2_rxtx.c | 123 +++++-
15 files changed, 647 insertions(+), 54 deletions(-)
rename drivers/{event/dpaa2 => bus/fslmc/portal}/dpaa2_hw_dpcon.c (90%)
--
2.43.0
^ permalink raw reply
* RE: [PATCH 2/2] ethdev: return 0 from dummy queue count
From: Morten Brørup @ 2026-06-16 9:54 UTC (permalink / raw)
To: Maxime Leroy, dev
Cc: stable, Stephen Hemminger, Thomas Monjalon, Andrew Rybchenko,
Sunil Kumar Kori
In-Reply-To: <20260616094259.686555-3-maxime@leroys.fr>
> From: Maxime Leroy [mailto:maxime.leroys@gmail.com] On Behalf Of Maxime
> Leroy
> Sent: Tuesday, 16 June 2026 11.43
>
> The dummy rx_queue_count/tx_queue_count callback returned -ENOTSUP. On
> a
> port that is not started (freshly allocated, or stopped once the fast-
> path
> ops are reset to dummies) there are no packets queued, so the truthful
> answer is 0, not an error: querying the count is not an unsupported
> operation. This also matches the dummy Rx/Tx burst, which reports 0
> packets.
>
> A poll-mode worker checking rte_eth_rx_queue_count() across a
> concurrent
> port stop then sees an empty queue instead of a negative error.
>
> Fixes: 066f3d9cc21c ("ethdev: remove callback checks from fast path")
> Cc: stable@dpdk.org
>
> Suggested-by: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Maxime Leroy <maxime@leroys.fr>
> ---
Acked-by: Morten Brørup <mb@smartsharesystems.com>
^ permalink raw reply
* Re: [PATCH] dts: avoid Scapy MAC resolution in Rx split test
From: Thomas Monjalon @ 2026-06-16 9:53 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev, Luca Vizzarro, Patrick Robb
In-Reply-To: <20260611115421.12c4e6ee@phoenix.local>
11/06/2026 20:54, Stephen Hemminger:
> On Wed, 10 Jun 2026 20:32:18 +0200
> Thomas Monjalon <thomas@monjalon.net> wrote:
>
> > The test gets the Ethernet header length from Scapy with len(Ether()).
> >
> > When building DTS API documentation, Sphinx imports the test module
> > and shows this warning:
> > WARNING: MAC address to reach destination not found. Using broadcast.
> >
> > Use a dummy MAC address so Scapy no longer performs
> > destination resolution during import.
> >
> > Fixes: 01c70544cffd ("dts: add selective Rx tests")
> >
> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
>
> Thanks, I previously reported this as:
>
> https://bugs.dpdk.org/show_bug.cgi?id=1951
OK, added the tag in the commit log.
> Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Applied
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox