Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 2/6] uapi: pkt_cls: add TCA_ID_FRER action identifier
From: Xiaoliang Yang @ 2026-06-22  9:21 UTC (permalink / raw)
  To: netdev, linux-kernel, linux-kselftest
  Cc: davem, edumazet, kuba, pabeni, jhs, jiri, horms, shuah,
	vladimir.oltean, vinicius.gomes, fejes, xiaoliang.yang_1
In-Reply-To: <20260622092118.6846-1-xiaoliang.yang_1@nxp.com>

Register TCA_ID_FRER in the global tc action ID enum so that the FRER
tc action can be identified uniquely among all tc actions.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
---
 include/uapi/linux/pkt_cls.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 28d94b11d1aa..9b87f0455110 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -139,6 +139,7 @@ enum tca_id {
 	TCA_ID_MPLS,
 	TCA_ID_CT,
 	TCA_ID_GATE,
+	TCA_ID_FRER,
 	/* other actions go here */
 	__TCA_ID_MAX = 255
 };
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 5/6] selftest: add tc-testing JSON test cases for act_frer
From: Xiaoliang Yang @ 2026-06-22  9:21 UTC (permalink / raw)
  To: netdev, linux-kernel, linux-kselftest
  Cc: davem, edumazet, kuba, pabeni, jhs, jiri, horms, shuah,
	vladimir.oltean, vinicius.gomes, fejes, xiaoliang.yang_1
In-Reply-To: <20260622092118.6846-1-xiaoliang.yang_1@nxp.com>

Add a tc-testing JSON file covering the FRER (IEEE 802.1CB Frame
Replication and Elimination for Reliability) tc action (act_frer).

The test suite contains 32 test cases and exercises:

 - Creating push and recover actions with default and explicit parameters
   (tag-type, alg vector/match, history-length, reset-time, tag-pop,
   individual, take-no-seq)
 - Boundary values for history-length (1 and 32) and reset-time (0)
 - Combining multiple flags (frer_0011, frer_0012)
 - Statistics output format for push (SeqGen) and recover (passed,
   discarded, tagless, out-of-order, rogue, lost, resets)
 - Replace and delete operations
 - Flush all actions
 - Duplicate-index failure (expExitCode 255)
 - Control actions (continue, pipe) placed after the index token
 - Binding push and recover actions to egress/ingress clsact filters
 - Sharing a recover action across two filters and verifying the
   reference count increments
 - not_in_hw flag present in show output

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
---
 .../tc-testing/tc-tests/actions/frer.json     | 785 ++++++++++++++++++
 1 file changed, 785 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/frer.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/frer.json b/tools/testing/selftests/tc-testing/tc-tests/actions/frer.json
new file mode 100644
index 000000000000..d5be6ae156f7
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/frer.json
@@ -0,0 +1,785 @@
+[
+  {
+    "id": "frer_0001",
+    "name": "Create frer push action with default parameters",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer push index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 1",
+    "matchPattern": "action order [0-9]+: frer push tag-type rtag index 1",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0002",
+    "name": "Create frer push action with explicit tag-type rtag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer push tag-type rtag index 2",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 2",
+    "matchPattern": "action order [0-9]+: frer push tag-type rtag index 2",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0003",
+    "name": "Create frer recover action with default parameters",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 10",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 10 alg vector history-length [0-9]+ reset-time [0-9]+",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0004",
+    "name": "Create frer recover action with vector algorithm explicit",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector index 11",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 11",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 11 alg vector",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0005",
+    "name": "Create frer recover action with match algorithm",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg match index 12",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 12",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 12 alg match",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0006",
+    "name": "Create frer recover action with history-length 16",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector history-length 16 index 13",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 13",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 13 alg vector history-length 16",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0007",
+    "name": "Create frer recover action with reset-time 2000",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector reset-time 2000 index 14",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 14",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 14 alg vector history-length [0-9]+ reset-time 2000",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0008",
+    "name": "Create frer recover action with tag-pop flag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover tag-pop index 15",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 15",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 15.*tag-pop",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0009",
+    "name": "Create frer recover action with individual flag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover individual index 16",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 16",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 16.*individual",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0010",
+    "name": "Create frer recover action with take-no-seq flag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover take-no-seq index 17",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 17",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 17.*take-no-seq",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0011",
+    "name": "Create frer recover action with all parameters combined",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector history-length 16 reset-time 1000 tag-pop individual index 20",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 20",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 20 individual alg vector history-length 16 reset-time 1000 tag-pop",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0012",
+    "name": "Create frer recover action with match alg and all flags",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg match take-no-seq tag-pop individual index 21",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 21",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 21 individual alg match history-length [0-9]+ reset-time [0-9]+ tag-pop take-no-seq",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0013",
+    "name": "Show frer push action SeqGen statistics (zero after create)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1"
+    ],
+    "cmdUnderTest": "$TC -s actions show action frer index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC -s actions show action frer index 1",
+    "matchPattern": "SeqGen packets: 0",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0014",
+    "name": "Show frer recover action Statistics line (zero after create)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer recover alg vector history-length 16 reset-time 1000 tag-pop index 10"
+    ],
+    "cmdUnderTest": "$TC -s actions show action frer index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC -s actions show action frer index 10",
+    "matchPattern": "Statistics: passed=0 discarded=0 tagless=0 out-of-order=0 rogue=0 lost=0 resets=0",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0015",
+    "name": "Show frer recover action Statistics fields present",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer recover index 10"
+    ],
+    "cmdUnderTest": "$TC -s actions show action frer index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC -s actions show action frer index 10",
+    "matchPattern": "Statistics: passed=[0-9]+ discarded=[0-9]+ tagless=[0-9]+ out-of-order=[0-9]+ rogue=[0-9]+ lost=[0-9]+ resets=[0-9]+",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0016",
+    "name": "Replace frer push action (same index)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1"
+    ],
+    "cmdUnderTest": "$TC actions replace action frer push index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 1",
+    "matchPattern": "action order [0-9]+: frer push tag-type rtag index 1",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0017",
+    "name": "Replace frer recover action changing algorithm from vector to match",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer recover alg vector index 10"
+    ],
+    "cmdUnderTest": "$TC actions replace action frer recover alg match index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 10",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 10 alg match",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0018",
+    "name": "Delete frer push action by index",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1"
+    ],
+    "cmdUnderTest": "$TC actions del action frer index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer",
+    "matchPattern": "frer push tag-type rtag index 1",
+    "matchCount": "0",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0019",
+    "name": "Flush all frer actions",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1",
+      "$TC actions add action frer recover index 10",
+      "$TC actions add action frer recover index 11"
+    ],
+    "cmdUnderTest": "$TC actions flush action frer",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer",
+    "matchPattern": "action order [0-9]+: frer",
+    "matchCount": "0",
+    "teardown": [
+      "$TC actions flush action frer 2>/dev/null || true"
+    ]
+  },
+  {
+    "id": "frer_0020",
+    "name": "Add duplicate frer action index fails without replace flag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1"
+    ],
+    "cmdUnderTest": "$TC actions add action frer push index 1",
+    "expExitCode": "255",
+    "verifyCmd": "$TC actions show action frer index 1",
+    "matchPattern": "action order [0-9]+: frer push tag-type rtag index 1",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0021",
+    "name": "Create frer push action with continue control action",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer push index 1 continue",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 1",
+    "matchPattern": "action order [0-9]+: frer push tag-type rtag index 1.*control continue",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0022",
+    "name": "Create frer recover action with pipe control action",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover index 10 pipe",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 10",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 10.*control pipe",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0023",
+    "name": "Create frer recover action history-length minimum boundary (1)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector history-length 1 index 30",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 30",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 30 alg vector history-length 1",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0024",
+    "name": "Create frer recover action history-length maximum boundary (32)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector history-length 32 index 31",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 31",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 31 alg vector history-length 32",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0025",
+    "name": "Create frer recover action with reset-time 0 (timer disabled)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ]
+    ],
+    "cmdUnderTest": "$TC actions add action frer recover alg vector reset-time 0 index 32",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 32",
+    "matchPattern": "action order [0-9]+: frer recover tag-type rtag index 32 alg vector history-length [0-9]+ reset-time 0",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0026",
+    "name": "List all frer actions shows correct count",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1",
+      "$TC actions add action frer recover alg vector index 10",
+      "$TC actions add action frer recover alg match tag-pop index 11"
+    ],
+    "cmdUnderTest": "$TC actions show action frer",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer",
+    "matchPattern": "action order [0-9]+: frer",
+    "matchCount": "3",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0027",
+    "name": "Bind frer push action to egress clsact filter",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "ip link del frer_dummy 2>/dev/null || true",
+      "ip link add frer_dummy type dummy",
+      "ip link set frer_dummy up",
+      "$TC qdisc add dev frer_dummy clsact"
+    ],
+    "cmdUnderTest": "$TC filter add dev frer_dummy egress protocol ip flower skip_hw action frer push index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC filter show dev frer_dummy egress",
+    "matchPattern": "frer push tag-type rtag index 1",
+    "matchCount": "1",
+    "teardown": [
+      "$TC qdisc del dev frer_dummy clsact",
+      "$TC actions flush action frer",
+      "ip link del frer_dummy"
+    ]
+  },
+  {
+    "id": "frer_0028",
+    "name": "Bind frer recover action to ingress clsact filter",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "ip link del frer_dummy 2>/dev/null || true",
+      "ip link add frer_dummy type dummy",
+      "ip link set frer_dummy up",
+      "$TC qdisc add dev frer_dummy clsact"
+    ],
+    "cmdUnderTest": "$TC filter add dev frer_dummy ingress protocol all flower skip_hw action frer recover alg vector history-length 16 reset-time 1000 tag-pop index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC filter show dev frer_dummy ingress",
+    "matchPattern": "frer recover tag-type rtag index 10 alg vector history-length 16 reset-time 1000",
+    "matchCount": "1",
+    "teardown": [
+      "$TC qdisc del dev frer_dummy clsact",
+      "$TC actions flush action frer",
+      "ip link del frer_dummy"
+    ]
+  },
+  {
+    "id": "frer_0029",
+    "name": "Share frer recover action across two ingress filters (refcount check)",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "ip link del frer_a 2>/dev/null || true",
+      "ip link del frer_b 2>/dev/null || true",
+      "ip link add frer_a type dummy",
+      "ip link add frer_b type dummy",
+      "ip link set frer_a up",
+      "ip link set frer_b up",
+      "$TC qdisc add dev frer_a clsact",
+      "$TC qdisc add dev frer_b clsact",
+      "$TC filter add dev frer_a ingress protocol all flower skip_hw action frer recover alg vector history-length 16 tag-pop index 10"
+    ],
+    "cmdUnderTest": "$TC filter add dev frer_b ingress protocol all flower skip_hw action frer recover index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC -s actions show action frer index 10",
+    "matchPattern": "ref [2-9][0-9]*",
+    "matchCount": "1",
+    "teardown": [
+      "$TC qdisc del dev frer_a clsact",
+      "$TC qdisc del dev frer_b clsact",
+      "$TC actions flush action frer",
+      "ip link del frer_a",
+      "ip link del frer_b"
+    ]
+  },
+  {
+    "id": "frer_0030",
+    "name": "frer push action refcount increments when bound to filter",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "ip link del frer_dummy 2>/dev/null || true",
+      "ip link add frer_dummy type dummy",
+      "ip link set frer_dummy up",
+      "$TC qdisc add dev frer_dummy clsact",
+      "$TC actions add action frer push index 1"
+    ],
+    "cmdUnderTest": "$TC filter add dev frer_dummy egress protocol ip flower skip_hw action frer push index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC -s actions show action frer index 1",
+    "matchPattern": "ref [2-9][0-9]*",
+    "matchCount": "1",
+    "teardown": [
+      "$TC qdisc del dev frer_dummy clsact",
+      "$TC actions flush action frer",
+      "ip link del frer_dummy"
+    ]
+  },
+  {
+    "id": "frer_0031",
+    "name": "frer push output shows not_in_hw flag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer push index 1"
+    ],
+    "cmdUnderTest": "$TC actions show action frer index 1",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 1",
+    "matchPattern": "not_in_hw",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  },
+  {
+    "id": "frer_0032",
+    "name": "frer recover output shows not_in_hw flag",
+    "category": [
+      "actions",
+      "frer"
+    ],
+    "setup": [
+      [
+        "modprobe act_frer",
+        0,
+        1
+      ],
+      "$TC actions add action frer recover index 10"
+    ],
+    "cmdUnderTest": "$TC actions show action frer index 10",
+    "expExitCode": "0",
+    "verifyCmd": "$TC actions show action frer index 10",
+    "matchPattern": "not_in_hw",
+    "matchCount": "1",
+    "teardown": [
+      "$TC actions flush action frer"
+    ]
+  }
+]
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 3/6] uapi: tc_act: add tc_frer UAPI header
From: Xiaoliang Yang @ 2026-06-22  9:21 UTC (permalink / raw)
  To: netdev, linux-kernel, linux-kselftest
  Cc: davem, edumazet, kuba, pabeni, jhs, jiri, horms, shuah,
	vladimir.oltean, vinicius.gomes, fejes, xiaoliang.yang_1
In-Reply-To: <20260622092118.6846-1-xiaoliang.yang_1@nxp.com>

Define the netlink attribute layout and enumerations for the FRER tc
action (IEEE 802.1CB Frame Replication and Elimination for Reliability).

The action is split into two functional sub-commands selected by the
TCA_FRER_FUNC attribute:

  TCA_FRER_FUNC_PUSH    - Egress: sequence number generation and R-TAG
                          insertion. The action inserts an R-TAG with
                          the current sequence number into the frame
                          before passing it on. When chained with
                          "action mirred egress mirror", the mirrored
                          copy already carries the R-TAG, so all
                          replicated frames on different egress paths
                          carry the same sequence number without any
                          additional shared state.

  TCA_FRER_FUNC_RECOVER - Ingress: duplicate detection and elimination.
                          Multiple ingress filters can share the same
                          recovery state by referencing the same action
                          index, implementing Sequence Recovery across
                          ports (IEEE 802.1CB Section 7.4.2).
                          When TCA_FRER_RCVY_INDIVIDUAL flag is set,
                          the action uses private per-action state
                          (Individual Recovery, Section 7.5).

Statistics attributes map directly to the managed objects defined in
IEEE 802.1CB Table 10-1.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
---
 include/uapi/linux/tc_act/tc_frer.h | 89 +++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 include/uapi/linux/tc_act/tc_frer.h

diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h
new file mode 100644
index 000000000000..241e90827e26
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_frer.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2026 NXP */
+
+#ifndef __LINUX_TC_FRER_H
+#define __LINUX_TC_FRER_H
+
+#include <linux/pkt_cls.h>
+
+/* Base parameters passed in TCA_FRER_PARMS */
+struct tc_frer {
+	tc_gen;
+};
+
+/**
+ * enum TCA_FRER_* - netlink attributes for the FRER tc action
+ *
+ * @TCA_FRER_FUNC:             Functional sub-command (tc_frer_func).
+ *                             Mandatory.
+ * @TCA_FRER_TAG_TYPE:         Redundancy tag type (tc_frer_tag_type).
+ *                             Mandatory.
+ *
+ * Push-specific attributes (TCA_FRER_FUNC_PUSH):
+ * Recover-specific attributes (TCA_FRER_FUNC_RECOVER):
+ * @TCA_FRER_RCVY_INDIVIDUAL:  Flag. Force Individual Recovery.
+ * @TCA_FRER_RCVY_ALG:         u8. Recovery algorithm (tc_frer_rcvy_alg).
+ * @TCA_FRER_RCVY_HISTORY_LEN: u8. SequenceHistory window size (1-32).
+ *                             Maps to frerSeqRcvyHistoryLength.
+ * @TCA_FRER_RCVY_RESET_MSEC:  u32. Reset timer in milliseconds.
+ *                             0 disables the timer.
+ *                             Maps to frerSeqRcvyResetMSec.
+ * @TCA_FRER_RCVY_TAKE_NO_SEQ: Flag. Accept frames without a redundancy
+ *                             tag and pass them unconditionally.
+ *                             Maps to frerSeqRcvyTakeNoSeq.
+ * @TCA_FRER_RCVY_TAG_POP:     Flag. Remove the redundancy tag from
+ *                             frames that pass the recovery function.
+ *
+ * Read-only statistics (filled on dump, IEEE 802.1CB Table 10-1):
+ * @TCA_FRER_STATS_TAGLESS_PKTS:       frerCpsSeqRcvyTaglessPackets
+ * @TCA_FRER_STATS_OUT_OF_ORDER_PKTS:  frerCpsSeqRcvyOutOfOrderPackets
+ * @TCA_FRER_STATS_ROGUE_PKTS:         frerCpsSeqRcvyRoguePackets
+ * @TCA_FRER_STATS_LOST_PKTS:          frerCpsSeqRcvyLostPackets
+ * @TCA_FRER_STATS_RESETS:             frerCpsSeqRcvyResets
+ * @TCA_FRER_STATS_PASSED_PKTS:        frerCpsSeqRcvyPassedPackets
+ * @TCA_FRER_STATS_DISCARDED_PKTS:     frerCpsSeqRcvyDiscardedPackets
+ * @TCA_FRER_STATS_SEQGEN_PKTS:        frerCpsSeqGenPackets
+ */
+enum {
+	TCA_FRER_UNSPEC,
+	TCA_FRER_TM,                       /* struct tcf_t */
+	TCA_FRER_PARMS,                    /* struct tc_frer */
+	TCA_FRER_PAD,
+	TCA_FRER_FUNC,                     /* u8: tc_frer_func */
+	TCA_FRER_TAG_TYPE,                 /* u8: tc_frer_tag_type */
+	TCA_FRER_RCVY_INDIVIDUAL,          /* NLA_FLAG */
+	TCA_FRER_RCVY_ALG,                 /* u8: tc_frer_rcvy_alg */
+	TCA_FRER_RCVY_HISTORY_LEN,         /* u8: 1-32 */
+	TCA_FRER_RCVY_RESET_MSEC,          /* u32 */
+	TCA_FRER_RCVY_TAKE_NO_SEQ,         /* NLA_FLAG */
+	TCA_FRER_RCVY_TAG_POP,             /* NLA_FLAG */
+	TCA_FRER_STATS_TAGLESS_PKTS,       /* u64 */
+	TCA_FRER_STATS_OUT_OF_ORDER_PKTS,  /* u64 */
+	TCA_FRER_STATS_ROGUE_PKTS,         /* u64 */
+	TCA_FRER_STATS_LOST_PKTS,          /* u64 */
+	TCA_FRER_STATS_RESETS,             /* u64 */
+	TCA_FRER_STATS_PASSED_PKTS,        /* u64 */
+	TCA_FRER_STATS_DISCARDED_PKTS,     /* u64 */
+	TCA_FRER_STATS_SEQGEN_PKTS,        /* u64 */
+	__TCA_FRER_MAX,
+};
+
+#define TCA_FRER_MAX (__TCA_FRER_MAX - 1)
+
+enum tc_frer_func {
+	TCA_FRER_FUNC_PUSH    = 1,
+	TCA_FRER_FUNC_RECOVER = 2,
+};
+
+enum tc_frer_tag_type {
+	TCA_FRER_TAG_RTAG = 1,
+	TCA_FRER_TAG_HSR,
+	TCA_FRER_TAG_PRP,
+};
+
+enum tc_frer_rcvy_alg {
+	TCA_FRER_RCVY_VECTOR_ALG = 0,  /* IEEE 802.1CB 7.4.3.4 */
+	TCA_FRER_RCVY_MATCH_ALG  = 1,  /* IEEE 802.1CB 7.4.3.5 */
+};
+
+#endif /* __LINUX_TC_FRER_H */
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 6/6] selftests: net: add kselftest for IEEE 802.1CB FRER tc action
From: Xiaoliang Yang @ 2026-06-22  9:21 UTC (permalink / raw)
  To: netdev, linux-kernel, linux-kselftest
  Cc: davem, edumazet, kuba, pabeni, jhs, jiri, horms, shuah,
	vladimir.oltean, vinicius.gomes, fejes, xiaoliang.yang_1
In-Reply-To: <20260622092118.6846-1-xiaoliang.yang_1@nxp.com>

Add frer_test.sh, a TAP-format kselftest script covering the FRER
(IEEE 802.1CB Frame Replication and Elimination for Reliability)
tc action (act_frer).

Tests 1-4 use a bond-based two-namespace topology:

  ns_talker
  +---------------------------+
  | bond0 (IP_SRC, balance-rr)|
  |   slave: veth_a0 (frer push + mirror to veth_b0)|
  |   slave: veth_b0 (frer push + mirror to veth_a0)|
  +-------+---------------+--+
          |               |
     veth_a0         veth_b0
          |               |
     veth_a1         veth_b1
          |               |
  +-------+---------------+--+
  | bond1 (IP_DST, balance-rr)|
  |   slave: veth_a1 (frer recover ingress)          |
  |   slave: veth_b1 (frer recover ingress)          |
  +---------------------------+
  ns_listener

  IP_SRC is assigned to bond0; IP_DST is assigned to bond1.  FRER push
  is configured on both veth_a0 and veth_b0 egress with cross-mirroring
  so every frame sent by either bond slave carries an R-TAG and a
  mirrored copy reaches the peer slave.  Tests 1-4 exercise shared and
  individual recover modes on the listener side.

Test 5 uses a self-contained single-path (no bond) topology:

  ns_p2p_src                        ns_p2p_dst
  +----------------------+          +----------------------+
  | frer_p2p_a0 (IP_P2P_SRC)| <---> | frer_p2p_a1 (IP_P2P_DST)|
  | egress: frer push     |          | ingress: frer recover |
  +----------------------+          +----------------------+

Test 6 uses a four-namespace relay topology:

  ns_talker -- bridge0 (br_r0) -+- path A -+- bridge1 (br_r1) -- ns_listener
                                 \- path B -/

  bridge0 acts as sequence generator (frer push + replicate to both
  redundant paths); bridge1 acts as eliminator (frer shared recover with
  tag-pop on both ingress ports).

Six functional test cases are included:

  1. push verify              - confirm that the frer push action inserts
                                an R-TAG (EtherType 0xF1C1) on egress;
                                tcpdump on both veth_a1 and veth_b1 must
                                capture at least one R-TAG frame each.

  2. shared recover e2e       - veth_a1 and veth_b1 share one recover
                                action; the action passes exactly one copy
                                and discards the duplicate; verified via
                                ping success, tcpdump frame count on bond1,
                                and tc stats (passed >= PING_COUNT,
                                discarded >= PING_COUNT).

  3. individual recover       - veth_a1 and veth_b1 use independent recover
                                actions so both copies are passed without
                                cross-port deduplication; verified via
                                per-slave tcpdump and tc stats
                                (discarded = 0 on each port).

  4. no tag-pop               - shared recover without tag-pop leaves the
                                R-TAG on passed frames; verified by
                                capturing EtherType 0xF1C1 (expect >= 1)
                                and plain ICMP (expect 0) on bond1.

  5. simple point-to-point    - single-path push + individual recover (with
                                tag-pop) end-to-end ping test; no bond.

  6. relay e2e                - four-namespace bridge relay topology; bridge0
                                pushes R-TAG and replicates to two paths;
                                bridge1 recovers (shared, tag-pop) and
                                forwards deduplicated frames to listener;
                                verified via ping success, tcpdump frame
                                count on listener, and bridge1 tc stats.

The script conforms to the kselftest framework (TAP output, KSFT_PASS /
KSFT_FAIL / KSFT_SKIP exit codes).  It loads kselftest/lib.sh when
available and falls back to a minimal inline implementation otherwise.
All tests are skipped gracefully when act_frer is not available in the
running kernel.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
---
 tools/testing/selftests/net/Makefile     |    1 +
 tools/testing/selftests/net/frer_test.sh | 1013 ++++++++++++++++++++++
 2 files changed, 1014 insertions(+)
 create mode 100755 tools/testing/selftests/net/frer_test.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 6a190a525a39..67b896611f08 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -38,6 +38,7 @@ TEST_PROGS := \
 	fib_rule_tests.sh \
 	fib_tests.sh \
 	fin_ack_lat.sh \
+	frer_test.sh \
 	fq_band_pktlimit.sh \
 	gre_gso.sh \
 	gre_ipv6_lladdr.sh \
diff --git a/tools/testing/selftests/net/frer_test.sh b/tools/testing/selftests/net/frer_test.sh
new file mode 100755
index 000000000000..ecd88952f495
--- /dev/null
+++ b/tools/testing/selftests/net/frer_test.sh
@@ -0,0 +1,1013 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2026 NXP
+#
+# frer_test.sh - IEEE 802.1CB FRER tc action kselftest
+#
+# Topology for tests 1-4:
+#
+#   ns_talker  bond0 (veth_a0 + veth_b0)  <--->  bond1 (veth_a1 + veth_b1)  ns_listener
+#
+#   IP_SRC assigned to bond0;  IP_DST assigned to bond1
+#
+#   bond mode: balance-rr (round-robin), so frames are distributed across
+#              both slaves.  FRER push is configured on both veth_a0 and
+#              veth_b0 egress with cross-mirror so every frame sent by either
+#              slave carries an R-TAG and a mirrored copy reaches the peer.
+#   FRER recover: veth_a1/veth_b1 ingress, shared or individual recover per test
+#
+#   Ping runs from bond0 to bond1; tcpdump captures on bond1 (or on individual
+#   slave interfaces for tests where both copies must be observable).
+#
+# Test 5: simple point-to-point, self-contained topology (no bond).
+# Test 6: relay system, self-contained topology.
+#
+# All namespaces, veth pairs, bond interfaces, tc rules and addresses are
+# created and destroyed within this script.  External dependencies:
+#   - kernel with CONFIG_NET_ACT_FRER and CONFIG_BONDING
+#   - iproute2 tc with frer action support
+#   - tcpdump, ping
+#   - root privileges
+
+# ----------------------------------------------------------------------------
+# kselftest library: TAP output + exit-code constants
+# ----------------------------------------------------------------------------
+ksft_lib="${KSFT_LIB:-$(dirname "$0")/../kselftest/lib.sh}"
+if [ -f "$ksft_lib" ]; then
+	# shellcheck source=/dev/null
+	. "$ksft_lib"
+else
+	# Minimal fallback when run outside the kselftest tree
+	KSFT_PASS=0
+	KSFT_FAIL=1
+	KSFT_SKIP=4
+	_ksft_count=0
+	_ksft_pass=0
+	_ksft_fail=0
+	_ksft_skip=0
+
+	ksft_print_header() { echo "TAP version 13"; }
+	ksft_set_plan()     { echo "1..$1"; }
+	ksft_test_result_pass() {
+		_ksft_count=$((_ksft_count + 1)); _ksft_pass=$((_ksft_pass + 1))
+		echo "ok $_ksft_count - $*"
+	}
+	ksft_test_result_fail() {
+		_ksft_count=$((_ksft_count + 1)); _ksft_fail=$((_ksft_fail + 1))
+		echo "not ok $_ksft_count - $*"
+	}
+	ksft_test_result_skip() {
+		_ksft_count=$((_ksft_count + 1)); _ksft_skip=$((_ksft_skip + 1))
+		echo "ok $_ksft_count - $* # SKIP"
+	}
+	ksft_print_cnts() {
+		echo "# Totals: pass=$_ksft_pass fail=$_ksft_fail skip=$_ksft_skip"
+	}
+	ksft_exit_pass()     { exit $KSFT_PASS; }
+	ksft_exit_fail()     { exit $KSFT_FAIL; }
+	ksft_exit_fail_msg() { echo "# FATAL: $*" >&2; exit $KSFT_FAIL; }
+fi
+
+# ----------------------------------------------------------------------------
+# Configuration (override via environment)
+# ----------------------------------------------------------------------------
+TC="${TC:-tc}"
+PING="${PING:-ping}"
+TCPDUMP="${TCPDUMP:-tcpdump}"
+PING_COUNT="${PING_COUNT:-5}"
+PING_TIMEOUT="${PING_TIMEOUT:-2}"
+SKIP_MODPROBE="${SKIP_MODPROBE:-0}"
+
+# Bond topology interfaces (tests 1-4)
+readonly VETH_A0="frer_a0"
+readonly VETH_A1="frer_a1"
+readonly VETH_B0="frer_b0"
+readonly VETH_B1="frer_b1"
+readonly BOND0="frer_bond0"
+readonly BOND1="frer_bond1"
+
+readonly NS_TALKER="frer_ns_talker"
+readonly NS_LISTENER="frer_ns_listener"
+
+readonly IP_SRC="10.0.0.1"
+readonly IP_DST="10.0.0.2"
+
+# Point-to-point topology interfaces (test 5)
+readonly P2P_NS_SRC="frer_p2p_src"
+readonly P2P_NS_DST="frer_p2p_dst"
+readonly P2P_VETH_A0="frer_p2p_a0"
+readonly P2P_VETH_A1="frer_p2p_a1"
+readonly IP_P2P_SRC="10.0.1.1"
+readonly IP_P2P_DST="10.0.1.2"
+
+# Relay topology interfaces (test 6)
+#
+#   ns_talker (talker_eth.100) -- talker_eth/br0_uplink -- bridge0 (br_r0)
+#                                         |-- br0_swp0/br1_swp0 --\
+#                                         \-- br0_swp1/br1_swp1 --+--\
+#		bridge1 (br_r1) -- br1_downlink/listener_eth -- ns_listener
+#
+# bridge0 acts as sequence generator (frer push + replicate to both paths).
+# bridge1 acts as eliminator (frer recover, shared, tag-pop).
+readonly R_NS_TALKER="frer_r_talker"
+readonly R_NS_BRIDGE0="frer_r_bridge0"
+readonly R_NS_BRIDGE1="frer_r_bridge1"
+readonly R_NS_LISTENER="frer_r_listener"
+readonly R_TALKER_ETH="r_tlk_eth"       # talker-side physical port
+readonly R_BR0_UPLINK="r_br0_uplink"    # bridge0 uplink facing talker
+readonly R_BR0_SWP0="r_br0_swp0"        # bridge0 redundant path port 0
+readonly R_BR0_SWP1="r_br0_swp1"        # bridge0 redundant path port 1
+readonly R_BR1_SWP0="r_br1_swp0"        # bridge1 redundant path port 0
+readonly R_BR1_SWP1="r_br1_swp1"        # bridge1 redundant path port 1
+readonly R_BR1_DOWNLINK="r_br1_dwnlnk"  # bridge1 downlink facing listener
+readonly R_LISTENER_ETH="r_lst_eth"     # listener-side physical port
+readonly R_BR0="br_r0"
+readonly R_BR1="br_r1"
+readonly R_VLAN=100
+readonly R_IP_TALKER="10.1.0.1"
+readonly R_IP_LISTENER="10.1.0.2"
+
+# FRER action index constants
+readonly IDX_PUSH=1
+readonly IDX_SHARED_RCVY=10
+readonly IDX_INDV_RCVY_A=20
+readonly IDX_INDV_RCVY_B=21
+readonly IDX_NO_POP=30
+readonly IDX_P2P_RCVY=40
+readonly IDX_RELAY_PUSH=50
+readonly IDX_RELAY_RCVY=60
+
+readonly NUM_TESTS=6
+
+# ----------------------------------------------------------------------------
+# Prerequisite check
+# ----------------------------------------------------------------------------
+check_prerequisites()
+{
+	local missing=0
+
+	[ "$(id -u)" -eq 0 ] || { echo "# Must be run as root" >&2; missing=1; }
+
+	for cmd in ip "$TC" "$TCPDUMP" "$PING"; do
+		command -v "$cmd" >/dev/null 2>&1 || {
+			echo "# Missing command: $cmd" >&2
+			missing=1
+		}
+	done
+
+	if [ "$missing" -ne 0 ]; then
+		ksft_set_plan "$NUM_TESTS"
+		for i in $(seq 1 "$NUM_TESTS"); do
+			ksft_test_result_skip "prerequisites not met (test $i)"
+		done
+		ksft_print_cnts
+		exit "$KSFT_SKIP"
+	fi
+}
+
+load_module()
+{
+	[ "$SKIP_MODPROBE" = "1" ] && return
+	if ! modprobe act_frer 2>/dev/null; then
+		echo "# modprobe act_frer failed - may be built-in or unavailable" >&2
+	fi
+	if ! modprobe bonding 2>/dev/null; then
+		echo "# modprobe bonding failed - may be built-in or unavailable" >&2
+	fi
+}
+
+check_frer_action()
+{
+	ip netns exec "$NS_TALKER" \
+		$TC actions add action frer push index 999 2>/dev/null || return 1
+	ip netns exec "$NS_TALKER" \
+		$TC actions del action frer index 999 2>/dev/null || true
+	return 0
+}
+
+# ----------------------------------------------------------------------------
+# Bond topology setup / teardown (used by tests 1-4)
+# ----------------------------------------------------------------------------
+setup_topology()
+{
+	for n in "$NS_TALKER" "$NS_LISTENER"; do
+		ip netns add "$n"
+	done
+
+	ip link add "$VETH_A0" type veth peer name "$VETH_A1"
+	ip link set "$VETH_A0" netns "$NS_TALKER"
+	ip link set "$VETH_A1" netns "$NS_LISTENER"
+
+	ip link add "$VETH_B0" type veth peer name "$VETH_B1"
+	ip link set "$VETH_B0" netns "$NS_TALKER"
+	ip link set "$VETH_B1" netns "$NS_LISTENER"
+
+	# ns_talker: create bond0 (balance-rr), frames round-robin across both slaves.
+	ip netns exec "$NS_TALKER" ip link set lo up
+	ip netns exec "$NS_TALKER" ip link add "$BOND0" type bond mode balance-rr miimon 100
+	ip netns exec "$NS_TALKER" ip link set "$VETH_A0" master "$BOND0"
+	ip netns exec "$NS_TALKER" ip link set "$VETH_B0" master "$BOND0"
+	ip netns exec "$NS_TALKER" ip link set "$VETH_A0" up
+	ip netns exec "$NS_TALKER" ip link set "$VETH_B0" up
+	ip netns exec "$NS_TALKER" ip link set "$BOND0" up
+	ip netns exec "$NS_TALKER" ip addr add "${IP_SRC}/24" dev "$BOND0"
+
+	# ns_listener: create bond1 (balance-rr).
+	ip netns exec "$NS_LISTENER" ip link set lo up
+	ip netns exec "$NS_LISTENER" ip link add "$BOND1" type bond mode balance-rr miimon 100
+	ip netns exec "$NS_LISTENER" ip link set "$VETH_A1" master "$BOND1"
+	ip netns exec "$NS_LISTENER" ip link set "$VETH_B1" master "$BOND1"
+	ip netns exec "$NS_LISTENER" ip link set "$VETH_A1" up
+	ip netns exec "$NS_LISTENER" ip link set "$VETH_B1" up
+	ip netns exec "$NS_LISTENER" ip link set "$BOND1" up
+	ip netns exec "$NS_LISTENER" ip addr add "${IP_DST}/24" dev "$BOND1"
+
+	# Static ARP so L2 forwarding works without ARP broadcasts.
+	# With balance-rr both slaves share the bond MAC.
+	local mac_bond0 mac_bond1
+	mac_bond0=$(ip netns exec "$NS_TALKER"   cat /sys/class/net/"$BOND0"/address)
+	mac_bond1=$(ip netns exec "$NS_LISTENER" cat /sys/class/net/"$BOND1"/address)
+	ip netns exec "$NS_TALKER"   ip neigh add "$IP_DST" lladdr "$mac_bond1" dev "$BOND0"
+	ip netns exec "$NS_LISTENER" ip neigh add "$IP_SRC" lladdr "$mac_bond0" dev "$BOND1"
+}
+
+cleanup()
+{
+	for n in "$NS_TALKER" "$NS_LISTENER" \
+		"$P2P_NS_SRC" "$P2P_NS_DST" \
+		"$R_NS_TALKER" "$R_NS_BRIDGE0" "$R_NS_BRIDGE1" "$R_NS_LISTENER"; do
+		ip netns del "$n" 2>/dev/null || true
+	done
+}
+trap cleanup EXIT
+
+# ----------------------------------------------------------------------------
+# TC rule helpers
+# ----------------------------------------------------------------------------
+
+# Push on both veth_a0 and veth_b0 egress using the same shared frer push
+# action (IDX_PUSH).  Each slave also mirrors to the other so that every
+# outgoing frame is replicated onto both paths regardless of which slave the
+# bond currently selects.  This prevents packet loss during bond link changes.
+setup_push_mirror()
+{
+	ip netns exec "$NS_TALKER" $TC qdisc add dev "$VETH_A0" clsact
+	ip netns exec "$NS_TALKER" $TC filter add dev "$VETH_A0" egress \
+		protocol ip flower skip_hw \
+		action frer push index $IDX_PUSH \
+		action mirred egress mirror dev "$VETH_B0"
+
+	ip netns exec "$NS_TALKER" $TC qdisc add dev "$VETH_B0" clsact
+	ip netns exec "$NS_TALKER" $TC filter add dev "$VETH_B0" egress \
+		protocol ip flower skip_hw \
+		action frer push index $IDX_PUSH \
+		action mirred egress mirror dev "$VETH_A0"
+}
+
+teardown_tc()
+{
+	for dev in "$VETH_A0" "$VETH_B0"; do
+		ip netns exec "$NS_TALKER" $TC qdisc del dev "$dev" clsact \
+			2>/dev/null || true
+	done
+	for dev in "$VETH_A1" "$VETH_B1"; do
+		ip netns exec "$NS_LISTENER" $TC qdisc del dev "$dev" clsact \
+			2>/dev/null || true
+	done
+	ip netns exec "$NS_TALKER"   $TC actions flush action frer 2>/dev/null || true
+	ip netns exec "$NS_LISTENER" $TC actions flush action frer 2>/dev/null || true
+}
+
+# ----------------------------------------------------------------------------
+# Packet-capture helpers
+#
+# capture_start_on NS IFACE PCAP [BPF_FILTER]
+#   Starts tcpdump in namespace NS on IFACE, writing to PCAP.
+#   Stores PID in _CAP_PID.
+#
+# capture_stop
+#   Waits for tcpdump (stored in _CAP_PID) to finish.
+#
+# capture_count_on NS PCAP
+#   Prints the number of captured packets.
+#
+# Convenience wrappers capture_start / capture_count target bond1 in
+# NS_LISTENER (the primary observation point for tests 2 and 4).
+# ----------------------------------------------------------------------------
+_CAP_PID=""
+
+capture_start_on()
+{
+	local ns="$1" iface="$2" pcap="$3" filter="${4:-}"
+
+	if [ -n "$filter" ]; then
+		ip netns exec "$ns" timeout 4 \
+			$TCPDUMP -i "$iface" -w "$pcap" \
+			--immediate-mode -Z root -y EN10MB \
+			$filter >/dev/null 2>&1 &
+	else
+		ip netns exec "$ns" timeout 4 \
+			$TCPDUMP -i "$iface" -w "$pcap" \
+			--immediate-mode -Z root -y EN10MB \
+			>/dev/null 2>&1 &
+	fi
+	_CAP_PID=$!
+
+	# Wait until tcpdump opens a packet socket (max ~2.5 s).
+	local tries=0
+	while [ $tries -lt 50 ]; do
+		ip netns exec "$ns" grep -q "$iface" /proc/net/packet 2>/dev/null && break
+		sleep 0.05
+		tries=$((tries + 1))
+	done
+}
+
+capture_stop()
+{
+	[ -n "$_CAP_PID" ] || return 0
+	wait "$_CAP_PID" 2>/dev/null || true
+	_CAP_PID=""
+}
+
+capture_count_on()
+{
+	local ns="$1" pcap="$2"
+	ip netns exec "$ns" \
+		$TCPDUMP -r "$pcap" --no-promiscuous-mode 2>/dev/null \
+		| grep -c "^[0-9]" || true
+}
+
+# Convenience wrappers: default to bond1 in NS_LISTENER
+capture_start() { capture_start_on "$NS_LISTENER" "$BOND1" "$@"; }
+capture_count() { capture_count_on "$NS_LISTENER" "$1"; }
+
+# ----------------------------------------------------------------------------
+# Ping helper
+# ----------------------------------------------------------------------------
+do_ping()
+{
+	local rc=0
+	ip netns exec "$NS_TALKER" \
+		$PING -c "$PING_COUNT" -W "$PING_TIMEOUT" -i 0.2 -q \
+		"$IP_DST" >/dev/null 2>&1 || rc=$?
+	return $rc
+}
+
+# ----------------------------------------------------------------------------
+# tc statistics parser
+# ----------------------------------------------------------------------------
+tc_stat()
+{
+	local dump="$1" field="$2"
+	echo "$dump" | awk -F"${field}=" 'NF>1{split($2,a," ");print a[1];exit}' || echo "0"
+}
+
+# ----------------------------------------------------------------------------
+# TEST 1: PUSH VERIFY (bond topology)
+#
+# Only push is configured on the talker side; no recover on the listener.
+# The push action on veth_a0 egress inserts an R-TAG and mirrors a copy to
+# veth_b0, so both listener slaves (veth_a1 and veth_b1) receive a frame
+# with EtherType 0xF1C1.  Captures run sequentially on each slave to verify
+# that both paths carry R-TAG frames.
+#
+# Pass criteria:
+#   - veth_a1 captures >= 1 R-TAG frame
+#   - veth_b1 captures >= 1 R-TAG frame
+# ----------------------------------------------------------------------------
+test_push_verify_bond()
+{
+	local pcap_a pcap_b cap_a cap_b
+	local result="pass"
+
+	setup_push_mirror
+
+	# Capture 1: R-TAG frames on veth_a1 (path A)
+	pcap_a=$(mktemp /tmp/frer_bond_push_a_XXXXXX.pcap)
+	capture_start_on "$NS_LISTENER" "$VETH_A1" "$pcap_a" "ether proto 0xf1c1"
+	ip netns exec "$NS_TALKER" \
+		$PING -c 3 -W 1 -i 0.2 -q "$IP_DST" >/dev/null 2>&1 || true
+	capture_stop
+	cap_a=$(capture_count_on "$NS_LISTENER" "$pcap_a")
+	rm -f "$pcap_a"
+
+	# Capture 2: R-TAG frames on veth_b1 (path B, mirrored copy)
+	pcap_b=$(mktemp /tmp/frer_bond_push_b_XXXXXX.pcap)
+	capture_start_on "$NS_LISTENER" "$VETH_B1" "$pcap_b" "ether proto 0xf1c1"
+	ip netns exec "$NS_TALKER" \
+		$PING -c 3 -W 1 -i 0.2 -q "$IP_DST" >/dev/null 2>&1 || true
+	capture_stop
+	cap_b=$(capture_count_on "$NS_LISTENER" "$pcap_b")
+	rm -f "$pcap_b"
+
+	teardown_tc
+
+	echo "# bond push verify: veth_a1 R-TAG=$cap_a veth_b1 R-TAG=$cap_b"
+
+	[ "$cap_a" -ge 1 ] || result="fail"
+	[ "$cap_b" -ge 1 ] || result="fail"
+
+	if [ "$result" = "pass" ]; then
+		ksft_test_result_pass \
+			"bond push verify: R-TAG on both paths (a1=$cap_a b1=$cap_b)"
+	else
+		ksft_test_result_fail \
+			"bond push verify: expected R-TAG on both paths (a1=$cap_a b1=$cap_b)"
+	fi
+}
+
+# ----------------------------------------------------------------------------
+# TEST 2: SHARED RECOVER E2E (bond topology)
+#
+# veth_a1 and veth_b1 ingress share one recover action (idx=10) with tag-pop.
+# The listener receives two R-TAG copies per request; the shared recover passes
+# exactly one and discards the other.  The recovered plain ICMP reaches bond1's
+# IP stack and a reply is sent, making ping succeed.
+#
+# Pass criteria:
+#   - ping succeeds (rc=0)
+#   - tcpdump on bond1 captures exactly PING_COUNT ICMP echo-request frames
+#     (filter is restricted to type=8 to exclude echo replies, which would
+#     double the count since bond1 also originates the reply packets)
+#   - tc stats on veth_a1: passed >= PING_COUNT, discarded >= PING_COUNT
+# ----------------------------------------------------------------------------
+test_shared_recover_bond()
+{
+	local pcap cap_count ping_rc=0
+	local dump_a
+	local total_passed total_discarded tagless
+	local result="pass"
+
+	setup_push_mirror
+
+	# veth_a1 ingress: create shared recover action with tag-pop
+	ip netns exec "$NS_LISTENER" $TC qdisc add dev "$VETH_A1" clsact
+	ip netns exec "$NS_LISTENER" $TC filter add dev "$VETH_A1" ingress \
+		protocol all flower skip_hw \
+		action frer recover alg vector history-length 16 \
+			reset-time 2000 tag-pop index $IDX_SHARED_RCVY
+
+	# veth_b1 ingress: bind to the same shared action by index
+	ip netns exec "$NS_LISTENER" $TC qdisc add dev "$VETH_B1" clsact
+	ip netns exec "$NS_LISTENER" $TC filter add dev "$VETH_B1" ingress \
+		protocol all flower skip_hw \
+		action frer recover index $IDX_SHARED_RCVY
+
+	pcap=$(mktemp /tmp/frer_bond_shared_XXXXXX.pcap)
+	capture_start "$pcap" "icmp[icmptype] == icmp-echo"
+
+	do_ping || ping_rc=$?
+
+	capture_stop
+
+	cap_count=$(capture_count "$pcap")
+	rm -f "$pcap"
+
+	dump_a=$(ip netns exec "$NS_LISTENER" \
+		$TC -s filter show dev "$VETH_A1" ingress 2>/dev/null)
+
+	teardown_tc
+
+	total_passed=$(tc_stat    "$dump_a" "passed")
+	total_discarded=$(tc_stat "$dump_a" "discarded")
+	tagless=$(tc_stat         "$dump_a" "tagless")
+	total_discarded=$((total_discarded - tagless))
+
+	echo "# bond shared recover: ping_rc=$ping_rc cap=$cap_count" \
+		"passed=$total_passed discarded=$total_discarded"
+
+	[ "$ping_rc"         -eq 0 ]            || result="fail"
+	[ "$cap_count"       -eq "$PING_COUNT" ] || result="fail"
+	[ "$total_passed"    -ge "$PING_COUNT" ] || result="fail"
+	[ "$total_discarded" -ge "$PING_COUNT" ] || result="fail"
+
+	if [ "$result" = "pass" ]; then
+		ksft_test_result_pass \
+			"bond shared recover: ping OK, cap=$cap_count" \
+			"passed=$total_passed discarded=$total_discarded"
+	else
+		ksft_test_result_fail \
+			"bond shared recover: ping_rc=$ping_rc cap=$cap_count" \
+			"passed=$total_passed discarded=$total_discarded" \
+			"(expected ping OK, cap=$PING_COUNT," \
+			"passed>=$PING_COUNT, discarded>=$PING_COUNT)"
+	fi
+}
+
+# ----------------------------------------------------------------------------
+# TEST 3: INDIVIDUAL RECOVER (bond topology)
+#
+# veth_a1 and veth_b1 use independent recover actions (idx=20 and idx=21).
+# Each port maintains its own sequence history so both copies of every frame
+# are passed (no cross-port deduplication).  With active-backup bond1, only
+# the active slave's (veth_a1) recovered frame reaches bond1's IP stack, so
+# ping succeeds.  The absence of deduplication is verified via per-slave
+# tcpdump (each slave should capture PING_COUNT ICMP frames) and tc stats.
+#
+# Pass criteria:
+#   - ping succeeds
+#   - veth_a1 captures PING_COUNT ICMP frames (passed, not discarded)
+#   - veth_b1 captures PING_COUNT ICMP frames (passed independently)
+#   - tc stats: veth_a1 passed=PING_COUNT discarded=0
+#               veth_b1 passed=PING_COUNT discarded=0
+# ----------------------------------------------------------------------------
+test_individual_recover_bond()
+{
+	local pcap_a pcap_b cap_a cap_b ping_rc=0
+	local dump_a dump_b
+	local passed_a discarded_a passed_b discarded_b tagless_a tagless_b
+	local result="pass"
+
+	setup_push_mirror
+
+	# veth_a1 ingress: individual recover idx=20 (independent state)
+	ip netns exec "$NS_LISTENER" $TC qdisc add dev "$VETH_A1" clsact
+	ip netns exec "$NS_LISTENER" $TC filter add dev "$VETH_A1" ingress \
+		protocol all flower skip_hw \
+		action frer recover individual alg vector history-length 16 \
+			reset-time 2000 tag-pop index $IDX_INDV_RCVY_A
+
+	# veth_b1 ingress: individual recover idx=21 (separate independent state)
+	ip netns exec "$NS_LISTENER" $TC qdisc add dev "$VETH_B1" clsact
+	ip netns exec "$NS_LISTENER" $TC filter add dev "$VETH_B1" ingress \
+		protocol all flower skip_hw \
+		action frer recover individual alg vector history-length 16 \
+			reset-time 2000 tag-pop index $IDX_INDV_RCVY_B
+
+	# Per-slave capture A: verify veth_a1 passes frames; also use this run
+	# for the overall ping_rc check (do_ping targets bond0->bond1).
+	pcap_a=$(mktemp /tmp/frer_bond_indv_a_XXXXXX.pcap)
+	capture_start_on "$NS_LISTENER" "$VETH_A1" "$pcap_a" "icmp"
+	do_ping || ping_rc=$?
+	capture_stop
+	cap_a=$(capture_count_on "$NS_LISTENER" "$pcap_a")
+	rm -f "$pcap_a"
+
+	# Per-slave capture B: verify veth_b1 also passes frames (balance-rr
+	# distributes egress across both slaves, so both paths carry traffic).
+	pcap_b=$(mktemp /tmp/frer_bond_indv_b_XXXXXX.pcap)
+	capture_start_on "$NS_LISTENER" "$VETH_B1" "$pcap_b" "icmp"
+	do_ping || true
+	capture_stop
+	cap_b=$(capture_count_on "$NS_LISTENER" "$pcap_b")
+	rm -f "$pcap_b"
+
+	dump_a=$(ip netns exec "$NS_LISTENER" \
+		$TC -s filter show dev "$VETH_A1" ingress 2>/dev/null)
+	dump_b=$(ip netns exec "$NS_LISTENER" \
+		$TC -s filter show dev "$VETH_B1" ingress 2>/dev/null)
+
+	teardown_tc
+
+	passed_a=$(tc_stat    "$dump_a" "passed")
+	discarded_a=$(tc_stat "$dump_a" "discarded")
+	tagless_a=$(tc_stat   "$dump_a" "tagless")
+	passed_b=$(tc_stat    "$dump_b" "passed")
+	discarded_b=$(tc_stat "$dump_b" "discarded")
+	tagless_b=$(tc_stat   "$dump_b" "tagless")
+	discarded_a=$((discarded_a - tagless_a))
+	discarded_b=$((discarded_b - tagless_b))
+
+	echo "# bond individual recover: ping_rc=$ping_rc" \
+		"a1: cap=$cap_a passed=$passed_a discarded=$discarded_a" \
+		"b1: cap=$cap_b passed=$passed_b discarded=$discarded_b"
+
+	[ "$ping_rc"   -eq 0 ]            || result="fail"
+	[ "$cap_a"     -ge "$PING_COUNT" ] || result="fail"
+	[ "$cap_b"     -ge "$PING_COUNT" ] || result="fail"
+	[ "$passed_a"  -ge "$PING_COUNT" ] || result="fail"
+	[ "$passed_b"  -ge "$PING_COUNT" ] || result="fail"
+	[ "$discarded_a" -eq 0 ]           || result="fail"
+	[ "$discarded_b" -eq 0 ]           || result="fail"
+
+	if [ "$result" = "pass" ]; then
+		ksft_test_result_pass \
+			"bond individual recover: ping OK" \
+			"a1: cap=$cap_a passed=$passed_a/0" \
+			"b1: cap=$cap_b passed=$passed_b/0"
+	else
+		ksft_test_result_fail \
+			"bond individual recover: ping_rc=$ping_rc" \
+			"a1: cap=$cap_a passed=$passed_a discarded=$discarded_a" \
+			"b1: cap=$cap_b passed=$passed_b discarded=$discarded_b"
+	fi
+}
+
+# ----------------------------------------------------------------------------
+# TEST 4: NO TAG-POP (bond topology)
+#
+# Shared recover runs without tag-pop; passed frames still carry the R-TAG
+# when they reach bond1.
+#
+# Pass criteria:
+#   - tcpdump on bond1 with "ether proto 0xf1c1" captures >= 1 R-TAG frame
+#   - tcpdump on bond1 with "icmp" captures 0 frames (outer EtherType is
+#     0xF1C1, not 0x0800, so plain-IP ICMP filter does not match)
+# ----------------------------------------------------------------------------
+test_no_tag_pop_bond()
+{
+	local pcap_rtag pcap_icmp rtag_count icmp_count
+	local result="pass"
+
+	setup_push_mirror
+
+	# veth_a1 ingress: shared recover WITHOUT tag-pop
+	ip netns exec "$NS_LISTENER" $TC qdisc add dev "$VETH_A1" clsact
+	ip netns exec "$NS_LISTENER" $TC filter add dev "$VETH_A1" ingress \
+		protocol all flower skip_hw \
+		action frer recover alg vector history-length 16 \
+			reset-time 2000 index $IDX_NO_POP
+
+	# veth_b1 ingress: bind to the same shared action
+	ip netns exec "$NS_LISTENER" $TC qdisc add dev "$VETH_B1" clsact
+	ip netns exec "$NS_LISTENER" $TC filter add dev "$VETH_B1" ingress \
+		protocol all flower skip_hw \
+		action frer recover index $IDX_NO_POP
+
+	# Capture 1: frames with R-TAG EtherType on bond1 (expect >= 1)
+	pcap_rtag=$(mktemp /tmp/frer_bond_nopop_rtag_XXXXXX.pcap)
+	capture_start "$pcap_rtag" "ether proto 0xf1c1"
+	ip netns exec "$NS_TALKER" \
+		$PING -c 3 -W 1 -i 0.2 -q "$IP_DST" >/dev/null 2>&1 || true
+	capture_stop
+	rtag_count=$(capture_count "$pcap_rtag")
+	rm -f "$pcap_rtag"
+
+	# Capture 2: plain ICMP frames on bond1 (expect 0)
+	pcap_icmp=$(mktemp /tmp/frer_bond_nopop_icmp_XXXXXX.pcap)
+	capture_start "$pcap_icmp" "icmp"
+	ip netns exec "$NS_TALKER" \
+		$PING -c 3 -W 1 -i 0.2 -q "$IP_DST" >/dev/null 2>&1 || true
+	capture_stop
+	icmp_count=$(capture_count "$pcap_icmp")
+	rm -f "$pcap_icmp"
+
+	teardown_tc
+
+	echo "# bond no tag-pop: rtag=$rtag_count (expected >=1) icmp=$icmp_count (expected 0)"
+
+	[ "$rtag_count" -ge 1 ] || result="fail"
+	[ "$icmp_count" -eq 0 ] || result="fail"
+
+	if [ "$result" = "pass" ]; then
+		ksft_test_result_pass \
+			"bond no tag-pop: R-TAG present on bond1 " \
+			"(rtag=$rtag_count), ICMP absent (icmp=$icmp_count)"
+	else
+		ksft_test_result_fail \
+			"bond no tag-pop: rtag=$rtag_count icmp=$icmp_count " \
+			"(expected rtag>=1 icmp=0)"
+	fi
+}
+
+# ----------------------------------------------------------------------------
+# TEST 5: SIMPLE POINT-TO-POINT (no bond)
+#
+# Self-contained single-path topology: push on p2p_a0 egress, individual
+# recover (with tag-pop) on p2p_a1 ingress.  IP is assigned directly to the
+# veth interfaces (no bond).
+#
+# Pass criteria:
+#   - ping succeeds (rc=0)
+#   - veth_a1 recover stats: passed >= PING_COUNT, discarded = 0
+# ----------------------------------------------------------------------------
+test_simple_point_to_point()
+{
+	local ping_rc=0
+	local dump_a1 passed discarded
+	local result="pass"
+
+	# Create self-contained p2p namespaces
+	ip netns add "$P2P_NS_SRC"
+	ip netns add "$P2P_NS_DST"
+
+	ip link add "$P2P_VETH_A0" type veth peer name "$P2P_VETH_A1"
+	ip link set "$P2P_VETH_A0" netns "$P2P_NS_SRC"
+	ip link set "$P2P_VETH_A1" netns "$P2P_NS_DST"
+
+	ip netns exec "$P2P_NS_SRC" ip link set lo up
+	ip netns exec "$P2P_NS_SRC" ip link set "$P2P_VETH_A0" up
+	ip netns exec "$P2P_NS_SRC" ip addr add "${IP_P2P_SRC}/24" dev "$P2P_VETH_A0"
+
+	ip netns exec "$P2P_NS_DST" ip link set lo up
+	ip netns exec "$P2P_NS_DST" ip link set "$P2P_VETH_A1" up
+	ip netns exec "$P2P_NS_DST" ip addr add "${IP_P2P_DST}/24" dev "$P2P_VETH_A1"
+
+	local mac_a0 mac_a1
+	mac_a0=$(ip netns exec "$P2P_NS_SRC" cat /sys/class/net/"$P2P_VETH_A0"/address)
+	mac_a1=$(ip netns exec "$P2P_NS_DST" cat /sys/class/net/"$P2P_VETH_A1"/address)
+	ip netns exec "$P2P_NS_SRC" ip neigh add "$IP_P2P_DST" lladdr "$mac_a1" dev "$P2P_VETH_A0"
+	ip netns exec "$P2P_NS_DST" ip neigh add "$IP_P2P_SRC" lladdr "$mac_a0" dev "$P2P_VETH_A1"
+
+	# veth_a0 egress: push R-TAG
+	ip netns exec "$P2P_NS_SRC" $TC qdisc add dev "$P2P_VETH_A0" clsact
+	ip netns exec "$P2P_NS_SRC" $TC filter add dev "$P2P_VETH_A0" egress \
+		protocol ip flower skip_hw \
+		action frer push index $IDX_PUSH
+
+	# veth_a1 ingress: individual recover with tag-pop
+	ip netns exec "$P2P_NS_DST" $TC qdisc add dev "$P2P_VETH_A1" clsact
+	ip netns exec "$P2P_NS_DST" $TC filter add dev "$P2P_VETH_A1" ingress \
+		protocol all flower skip_hw \
+		action frer recover individual alg vector history-length 16 \
+			reset-time 2000 tag-pop index $IDX_P2P_RCVY
+
+	ip netns exec "$P2P_NS_SRC" \
+		$PING -c "$PING_COUNT" -W "$PING_TIMEOUT" -i 0.2 -q \
+		"$IP_P2P_DST" >/dev/null 2>&1 || ping_rc=$?
+
+	dump_a1=$(ip netns exec "$P2P_NS_DST" \
+		$TC -s filter show dev "$P2P_VETH_A1" ingress 2>/dev/null)
+
+	# Teardown p2p topology
+	for dev in "$P2P_VETH_A0"; do
+		ip netns exec "$P2P_NS_SRC" $TC qdisc del dev "$dev" clsact \
+			2>/dev/null || true
+	done
+	for dev in "$P2P_VETH_A1"; do
+		ip netns exec "$P2P_NS_DST" $TC qdisc del dev "$dev" clsact \
+			2>/dev/null || true
+	done
+	ip netns exec "$P2P_NS_SRC" $TC actions flush action frer 2>/dev/null || true
+	ip netns exec "$P2P_NS_DST" $TC actions flush action frer 2>/dev/null || true
+	ip netns del "$P2P_NS_SRC" 2>/dev/null || true
+	ip netns del "$P2P_NS_DST" 2>/dev/null || true
+
+	passed=$(tc_stat    "$dump_a1" "passed")
+	discarded=$(tc_stat "$dump_a1" "discarded")
+	local tagless
+	tagless=$(tc_stat   "$dump_a1" "tagless")
+	discarded=$((discarded - tagless))
+
+	echo "# p2p: ping_rc=$ping_rc passed=$passed discarded=$discarded"
+
+	[ "$ping_rc"   -eq 0 ]            || result="fail"
+	[ "$passed"    -ge "$PING_COUNT" ] || result="fail"
+	[ "$discarded" -eq 0 ]            || result="fail"
+
+	if [ "$result" = "pass" ]; then
+		ksft_test_result_pass \
+			"simple p2p: ping OK, passed=$passed discarded=$discarded"
+	else
+		ksft_test_result_fail \
+			"simple p2p: ping_rc=$ping_rc passed=$passed discarded=$discarded"
+	fi
+}
+
+# ----------------------------------------------------------------------------
+# TEST 6: RELAY E2E (self-contained, no bond)
+#
+# Talker sends VLAN-100 frames into bridge0 (sequence generator).  Bridge0
+# pushes an R-TAG and replicates to two redundant paths.  Bridge1 (eliminator)
+# recovers (shared, tag-pop) on both paths and forwards the deduplicated frame
+# to the listener.
+#
+# Topology:
+#   ns_talker (talker_eth.100) -- talker_eth/br0_uplink
+#       -- bridge0 (br_r0) -+- br0_swp0/br1_swp0 -+
+#                            \- br0_swp1/br1_swp1 -+
+#       -- bridge1 (br_r1) -- br1_downlink/listener_eth -- ns_listener
+#
+# FRER rules:
+#   bridge0 / br0_uplink ingress  : push idx=50, redirect br0_swp0, mirror br0_swp1
+#   bridge1 / br1_swp0 ingress    : recover (shared, tag-pop) idx=60, redirect br1_downlink
+#   bridge1 / br1_swp1 ingress    : recover idx=60 (bind same), redirect br1_downlink
+#   bridge1 / br1_downlink ingress: redirect br1_swp0 (reply path, bypass FDB)
+#
+# Pass criteria:
+#   - ping from ns_talker to ns_listener succeeds (rc=0)
+#   - tcpdump on listener captures exactly PING_COUNT ICMP echo-request frames
+#   - br1_swp0 tc stats: passed >= PING_COUNT, discarded >= PING_COUNT
+# ----------------------------------------------------------------------------
+teardown_relay_tc()
+{
+	for dev in "$R_BR0_UPLINK"; do
+		ip netns exec "$R_NS_BRIDGE0" $TC qdisc del dev "$dev" clsact \
+			2>/dev/null || true
+	done
+	for dev in "$R_BR1_SWP0" "$R_BR1_SWP1" "$R_BR1_DOWNLINK"; do
+		ip netns exec "$R_NS_BRIDGE1" $TC qdisc del dev "$dev" clsact \
+			2>/dev/null || true
+	done
+	ip netns exec "$R_NS_BRIDGE0" $TC actions flush action frer 2>/dev/null || true
+	ip netns exec "$R_NS_BRIDGE1" $TC actions flush action frer 2>/dev/null || true
+}
+
+test_relay_e2e()
+{
+	local ping_rc=0
+	local dump_r1swp0
+	local total_passed total_discarded
+	local result="pass"
+	local ns
+
+	for ns in "$R_NS_TALKER" "$R_NS_BRIDGE0" "$R_NS_BRIDGE1" "$R_NS_LISTENER"; do
+		ip netns add "$ns" || {
+			echo "# relay e2e: failed to create netns $ns" >&2
+			ksft_test_result_skip "relay e2e: netns setup failed"
+			return
+		}
+	done
+
+	ip link add "$R_TALKER_ETH"   type veth peer name "$R_BR0_UPLINK"
+	ip link add "$R_BR0_SWP0"     type veth peer name "$R_BR1_SWP0"
+	ip link add "$R_BR0_SWP1"     type veth peer name "$R_BR1_SWP1"
+	ip link add "$R_BR1_DOWNLINK" type veth peer name "$R_LISTENER_ETH"
+
+	ip link set "$R_TALKER_ETH"   netns "$R_NS_TALKER"
+	ip link set "$R_BR0_UPLINK"   netns "$R_NS_BRIDGE0"
+	ip link set "$R_BR0_SWP0"     netns "$R_NS_BRIDGE0"
+	ip link set "$R_BR0_SWP1"     netns "$R_NS_BRIDGE0"
+	ip link set "$R_BR1_SWP0"     netns "$R_NS_BRIDGE1"
+	ip link set "$R_BR1_SWP1"     netns "$R_NS_BRIDGE1"
+	ip link set "$R_BR1_DOWNLINK" netns "$R_NS_BRIDGE1"
+	ip link set "$R_LISTENER_ETH" netns "$R_NS_LISTENER"
+
+	local ns_dev
+	for ns_dev in \
+		"$R_NS_TALKER:$R_TALKER_ETH" \
+		"$R_NS_BRIDGE0:$R_BR0_UPLINK" "$R_NS_BRIDGE0:$R_BR0_SWP0" \
+		"$R_NS_BRIDGE0:$R_BR0_SWP1" \
+		"$R_NS_BRIDGE1:$R_BR1_SWP0" "$R_NS_BRIDGE1:$R_BR1_SWP1" \
+		"$R_NS_BRIDGE1:$R_BR1_DOWNLINK" \
+		"$R_NS_LISTENER:$R_LISTENER_ETH"; do
+		local _ns="${ns_dev%%:*}"
+		local _dev="${ns_dev##*:}"
+		ip netns exec "$_ns" ip link set lo up
+		ip netns exec "$_ns" ip link set "$_dev" up
+	done
+
+	# bridge0: sequence generator, VLAN filtering
+	ip netns exec "$R_NS_BRIDGE0" ip link add name "$R_BR0" type bridge vlan_filtering 1
+	ip netns exec "$R_NS_BRIDGE0" ip link set "$R_BR0" up
+	ip netns exec "$R_NS_BRIDGE0" ip link set "$R_BR0_UPLINK" master "$R_BR0"
+	ip netns exec "$R_NS_BRIDGE0" ip link set "$R_BR0_SWP0" master "$R_BR0"
+	ip netns exec "$R_NS_BRIDGE0" ip link set "$R_BR0_SWP1" master "$R_BR0"
+
+	ip netns exec "$R_NS_BRIDGE0" bridge vlan add dev "$R_BR0_UPLINK" vid "$R_VLAN"
+	ip netns exec "$R_NS_BRIDGE0" bridge vlan add dev "$R_BR0_SWP0" vid "$R_VLAN"
+	ip netns exec "$R_NS_BRIDGE0" bridge vlan del dev "$R_BR0_SWP1" vid 1
+	ip netns exec "$R_NS_BRIDGE0" bridge vlan add dev "$R_BR0_SWP1" \
+		vid "$R_VLAN" pvid untagged
+	ip netns exec "$R_NS_BRIDGE0" bridge link set dev "$R_BR0_SWP0" learning off
+	ip netns exec "$R_NS_BRIDGE0" bridge link set dev "$R_BR0_SWP1" learning off
+	ip netns exec "$R_NS_BRIDGE0" bridge vlan set dev "$R_BR0_SWP0" vid "$R_VLAN" noflood
+	ip netns exec "$R_NS_BRIDGE0" bridge vlan set dev "$R_BR0_SWP1" vid "$R_VLAN" noflood
+
+	# bridge1: eliminator, VLAN filtering
+	ip netns exec "$R_NS_BRIDGE1" ip link add name "$R_BR1" type bridge vlan_filtering 1
+	ip netns exec "$R_NS_BRIDGE1" ip link set "$R_BR1" up
+	ip netns exec "$R_NS_BRIDGE1" ip link set "$R_BR1_SWP0" master "$R_BR1"
+	ip netns exec "$R_NS_BRIDGE1" ip link set "$R_BR1_SWP1" master "$R_BR1"
+	ip netns exec "$R_NS_BRIDGE1" ip link set "$R_BR1_DOWNLINK" master "$R_BR1"
+
+	ip netns exec "$R_NS_BRIDGE1" bridge vlan add dev "$R_BR1_SWP0" vid "$R_VLAN"
+	ip netns exec "$R_NS_BRIDGE1" bridge vlan del dev "$R_BR1_SWP1" vid 1
+	ip netns exec "$R_NS_BRIDGE1" bridge vlan add dev "$R_BR1_SWP1" \
+		vid "$R_VLAN" pvid untagged
+	ip netns exec "$R_NS_BRIDGE1" bridge vlan add dev "$R_BR1_DOWNLINK" vid "$R_VLAN"
+	ip netns exec "$R_NS_BRIDGE1" bridge link set dev "$R_BR1_SWP0" learning off
+	ip netns exec "$R_NS_BRIDGE1" bridge link set dev "$R_BR1_SWP1" learning off
+	ip netns exec "$R_NS_BRIDGE1" bridge vlan set dev "$R_BR1_SWP0" vid "$R_VLAN" noflood
+	ip netns exec "$R_NS_BRIDGE1" bridge vlan set dev "$R_BR1_SWP1" vid "$R_VLAN" noflood
+
+	# ns_talker: VLAN sub-interface
+	ip netns exec "$R_NS_TALKER" ip link add link "$R_TALKER_ETH" \
+		name "${R_TALKER_ETH}.${R_VLAN}" type vlan id "$R_VLAN"
+	ip netns exec "$R_NS_TALKER" ip link set "${R_TALKER_ETH}.${R_VLAN}" up
+	ip netns exec "$R_NS_TALKER" ip addr add "${R_IP_TALKER}/24" \
+		dev "${R_TALKER_ETH}.${R_VLAN}"
+
+	# ns_listener: VLAN sub-interface
+	ip netns exec "$R_NS_LISTENER" ip link add link "$R_LISTENER_ETH" \
+		name "${R_LISTENER_ETH}.${R_VLAN}" type vlan id "$R_VLAN"
+	ip netns exec "$R_NS_LISTENER" ip link set "${R_LISTENER_ETH}.${R_VLAN}" up
+	ip netns exec "$R_NS_LISTENER" ip addr add "${R_IP_LISTENER}/24" \
+		dev "${R_LISTENER_ETH}.${R_VLAN}"
+
+	# Static ARP (VLAN 100 flooding is disabled)
+	local mac_talker mac_listener
+	mac_talker=$(ip netns exec "$R_NS_TALKER" \
+		cat /sys/class/net/"${R_TALKER_ETH}.${R_VLAN}"/address)
+	mac_listener=$(ip netns exec "$R_NS_LISTENER" \
+		cat /sys/class/net/"${R_LISTENER_ETH}.${R_VLAN}"/address)
+	ip netns exec "$R_NS_TALKER"   ip neigh add "$R_IP_LISTENER" \
+		lladdr "$mac_listener" dev "${R_TALKER_ETH}.${R_VLAN}"
+	ip netns exec "$R_NS_LISTENER" ip neigh add "$R_IP_TALKER" \
+		lladdr "$mac_talker"   dev "${R_LISTENER_ETH}.${R_VLAN}"
+
+	# bridge0 / br0_uplink ingress: push R-TAG then replicate to both redundant paths.
+	# mirror must come before redirect because redirect is a terminating action.
+	ip netns exec "$R_NS_BRIDGE0" $TC qdisc add dev "$R_BR0_UPLINK" clsact
+	ip netns exec "$R_NS_BRIDGE0" $TC filter add dev "$R_BR0_UPLINK" ingress \
+		protocol 802.1Q flower skip_hw vlan_id "$R_VLAN" \
+		action frer push index $IDX_RELAY_PUSH \
+		action mirred egress mirror  dev "$R_BR0_SWP1" \
+		action mirred egress redirect dev "$R_BR0_SWP0"
+
+	# bridge1 / br1_swp0 ingress: create shared recover action (tag-pop)
+	ip netns exec "$R_NS_BRIDGE1" $TC qdisc add dev "$R_BR1_SWP0" clsact
+	ip netns exec "$R_NS_BRIDGE1" $TC filter add dev "$R_BR1_SWP0" ingress \
+		protocol all flower skip_hw \
+		action frer recover alg vector history-length 16 \
+			reset-time 2000 tag-pop index $IDX_RELAY_RCVY \
+		action mirred egress redirect dev "$R_BR1_DOWNLINK"
+
+	# bridge1 / br1_swp1 ingress: bind to the same shared recover action
+	ip netns exec "$R_NS_BRIDGE1" $TC qdisc add dev "$R_BR1_SWP1" clsact
+	ip netns exec "$R_NS_BRIDGE1" $TC filter add dev "$R_BR1_SWP1" ingress \
+		protocol all flower skip_hw \
+		action frer recover index $IDX_RELAY_RCVY \
+		action mirred egress redirect dev "$R_BR1_DOWNLINK"
+
+	# bridge1 / br1_downlink ingress: redirect VLAN 100 replies directly to br1_swp0
+	ip netns exec "$R_NS_BRIDGE1" $TC qdisc add dev "$R_BR1_DOWNLINK" clsact
+	ip netns exec "$R_NS_BRIDGE1" $TC filter add dev "$R_BR1_DOWNLINK" ingress \
+		protocol 802.1Q flower skip_hw vlan_id "$R_VLAN" \
+		action mirred egress redirect dev "$R_BR1_SWP0"
+
+	# Capture ICMP echo-requests on listener_eth.VLAN to verify exactly
+	# PING_COUNT deduplicated frames reach the listener after recovery.
+	local pcap cap_count
+	pcap=$(mktemp /tmp/frer_relay_XXXXXX.pcap)
+	capture_start_on "$R_NS_LISTENER" "${R_LISTENER_ETH}.${R_VLAN}" \
+		"$pcap" "icmp[icmptype] == icmp-echo"
+
+	ip netns exec "$R_NS_TALKER" \
+		$PING -c "$PING_COUNT" -W "$PING_TIMEOUT" -i 0.2 -q \
+		"$R_IP_LISTENER" >/dev/null 2>&1 || ping_rc=$?
+
+	capture_stop
+	cap_count=$(capture_count_on "$R_NS_LISTENER" "$pcap")
+	rm -f "$pcap"
+
+	dump_br1_swp0=$(ip netns exec "$R_NS_BRIDGE1" \
+		$TC -s filter show dev "$R_BR1_SWP0" ingress 2>/dev/null)
+
+	teardown_relay_tc
+	for ns in "$R_NS_TALKER" "$R_NS_BRIDGE0" "$R_NS_BRIDGE1" "$R_NS_LISTENER"; do
+		ip netns del "$ns" 2>/dev/null || true
+	done
+
+	total_passed=$(tc_stat    "$dump_br1_swp0" "passed")
+	total_discarded=$(tc_stat "$dump_br1_swp0" "discarded")
+	local tagless
+	tagless=$(tc_stat         "$dump_br1_swp0" "tagless")
+	total_discarded=$((total_discarded - tagless))
+
+	echo "# relay e2e: ping_rc=$ping_rc cap=$cap_count" \
+		"passed=$total_passed discarded=$total_discarded"
+
+	[ "$ping_rc"         -eq 0 ]            || result="fail"
+	[ "$cap_count"       -eq "$PING_COUNT" ] || result="fail"
+	[ "$total_passed"    -ge "$PING_COUNT" ] || result="fail"
+	[ "$total_discarded" -ge "$PING_COUNT" ] || result="fail"
+
+	if [ "$result" = "pass" ]; then
+		ksft_test_result_pass \
+			"relay e2e: ping OK, cap=$cap_count " \
+			"passed=$total_passed discarded=$total_discarded"
+	else
+		ksft_test_result_fail \
+			"relay e2e: ping_rc=$ping_rc cap=$cap_count " \
+			"passed=$total_passed discarded=$total_discarded" \
+			"(expected ping OK, cap=$PING_COUNT," \
+			"passed>=$PING_COUNT, discarded>=$PING_COUNT)"
+	fi
+}
+
+# ----------------------------------------------------------------------------
+# Main
+# ----------------------------------------------------------------------------
+main()
+{
+	ksft_print_header
+	check_prerequisites
+	load_module
+	setup_topology
+
+	if ! check_frer_action; then
+		ksft_set_plan "$NUM_TESTS"
+		for i in $(seq 1 "$NUM_TESTS"); do
+			ksft_test_result_skip \
+				"frer action not available in this kernel (test $i)"
+		done
+		ksft_print_cnts
+		exit "$KSFT_SKIP"
+	fi
+
+	ksft_set_plan "$NUM_TESTS"
+
+	test_push_verify_bond        # TEST 1: push on a0/b0, no recover, R-TAG on both paths
+	test_shared_recover_bond     # TEST 2: shared recover, dedup, ping succeeds
+	test_individual_recover_bond # TEST 3: individual recover, no dedup, double frames
+	test_no_tag_pop_bond         # TEST 4: shared recover without tag-pop, R-TAG preserved
+	test_simple_point_to_point   # TEST 5: single-path p2p, no bond
+	test_relay_e2e               # TEST 6: relay bridge topology
+
+	ksft_print_cnts
+
+	[ "$_ksft_fail" -eq 0 ] && ksft_exit_pass || ksft_exit_fail
+}
+
+main "$@"
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH net v2] net: wwan: iosm: bound device offsets in the MUX downlink decoder
From: Loic Poulain @ 2026-06-22  9:24 UTC (permalink / raw)
  To: Maoyi Xie
  Cc: Sergey Ryazanov, Johannes Berg, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, netdev, linux-kernel,
	stable
In-Reply-To: <178196118045.462404.11069139160448641355@maoyixie.com>

On Sat, Jun 20, 2026 at 3:13 PM Maoyi Xie <maoyixie.tju@gmail.com> wrote:
>
> mux_dl_adb_decode() walks a chain of aggregated datagram tables using
> offsets and lengths taken from the modem. first_table_index,
> next_table_index, table_length, datagram_index and datagram_length are
> all device supplied le values. Only first_table_index was checked, and
> only for being non zero. The decoder then formed adth = block +
> adth_index and read the table header and the datagram entries with no
> bound against the received skb. A modem that reports an index or a
> length past the downlink buffer makes the decoder read out of bounds.
>
> The buffer is IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE and skb->len is at most
> that, so skb->len is the real limit, but none of these in band offsets
> were checked against it.
>
> Validate every device offset and length against skb->len before use.
> The block header must fit. Each table header, on entry and after every
> next_table_index, must lie inside the skb. The datagram table must fit.
> Each datagram index and length must stay inside the skb. The header
> padding must not exceed the datagram length so the receive length does
> not wrap.
>
> This was reproduced under KASAN as a slab out of bounds read on a normal
> downlink receive once the iosm net device is up.
>
> Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support")
> Suggested-by: Loic Poulain <loic.poulain@oss.qualcomm.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Maoyi Xie <maoyixie.tju@gmail.com>

Reviewed-by: Loic Poulain <loic.poulain@oss.qualcomm.com>


> ---
> Changes in v2:
> - mux_dl_process_dg now uses intermediate native endian locals dg_index
>   and dg_len so the bound checks read cleaner and avoid the repeated
>   le32_to_cpu conversions, per Loic Poulain's review. No functional
>   change.
>
> Link to v1: https://lore.kernel.org/all/178185979029.4044562.9993615975949055530@maoyixie.com/
>
>  drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 33 ++++++++++++++++------
>  1 file changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
> index bff46f7ca59f..ff9a4bc52f29 100644
> --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
> +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
> @@ -553,19 +553,21 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh,
>         u32 packet_offset, i, rc, dg_len;
>
>         for (i = 0; i < nr_of_dg; i++, dg++) {
> -               if (le32_to_cpu(dg->datagram_index)
> -                               < sizeof(struct mux_adbh))
> +               u32 dg_index = le32_to_cpu(dg->datagram_index);
> +
> +               dg_len = le16_to_cpu(dg->datagram_length);
> +
> +               if (dg_index < sizeof(struct mux_adbh))
>                         goto dg_error;
>
> -               /* Is the packet inside of the ADB */
> -               if (le32_to_cpu(dg->datagram_index) >=
> -                                       le32_to_cpu(adbh->block_length)) {
> +               /* Is the packet inside of the ADB and the received skb ? */
> +               if (dg_index >= le32_to_cpu(adbh->block_length) ||
> +                   dg_index >= skb->len ||
> +                   dg_len > skb->len - dg_index ||
> +                   dl_head_pad_len >= dg_len) {
>                         goto dg_error;
>                 } else {
> -                       packet_offset =
> -                               le32_to_cpu(dg->datagram_index) +
> -                               dl_head_pad_len;
> -                       dg_len = le16_to_cpu(dg->datagram_length);
> +                       packet_offset = dg_index + dl_head_pad_len;
>                         /* Pass the packet to the netif layer. */
>                         rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan,
>                                                  packet_offset,
> @@ -595,6 +597,10 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
>         block = skb->data;
>         adbh = (struct mux_adbh *)block;
>
> +       /* The block header itself must fit in the received skb. */
> +       if (skb->len < sizeof(struct mux_adbh))
> +               goto adb_decode_err;
> +
>         /* Process the aggregated datagram tables. */
>         adth_index = le32_to_cpu(adbh->first_table_index);
>
> @@ -606,6 +612,11 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
>
>         /* Loop through mixed session tables. */
>         while (adth_index) {
> +               /* The table header must lie within the received skb. */
> +               if (adth_index < sizeof(struct mux_adbh) ||
> +                   adth_index > skb->len - sizeof(struct mux_adth))
> +                       goto adb_decode_err;
> +
>                 /* Get the reference to the table header. */
>                 adth = (struct mux_adth *)(block + adth_index);
>
> @@ -629,6 +640,10 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
>                 if (le16_to_cpu(adth->table_length) < sizeof(struct mux_adth))
>                         goto adb_decode_err;
>
> +               /* The whole datagram table must fit in the received skb. */
> +               if (le16_to_cpu(adth->table_length) > skb->len - adth_index)
> +                       goto adb_decode_err;
> +
>                 /* Calculate the number of datagrams. */
>                 nr_of_dg = (le16_to_cpu(adth->table_length) -
>                                         sizeof(struct mux_adth)) /
> --
> 2.34.1
>

^ permalink raw reply

* [PATCH net v2] net: airoha: Add retry mechanism to airoha_qdma_set_trtcm_param()
From: Lorenzo Bianconi @ 2026-06-22  9:35 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Lorenzo Bianconi
  Cc: Leto Liu, linux-arm-kernel, linux-mediatek, netdev, Brown Huang

From: Brown Huang <brown.huang@airoha.com>

CPU accesses QDMA via the bus. When multiple modules are using the bus
simultaneously, CPU access to QDMA may encounter bus timeouts and fails,
resulting in QDMA configuration failures and potentially causing packet
transmission issues. In order to mitigate the issue, introduce a retry
mechanism to airoha_qdma_set_trtcm_param routine in order to ensure the
configuration is correctly applied to the hardware.

Fixes: ef1ca9271313b ("net: airoha: Add sched HTB offload support")
Signed-off-by: Brown Huang <brown.huang@airoha.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes in v2:
- Wait for write configuration to be completed before running
  airoha_qdma_get_trtcm_param() in airoha_qdma_set_trtcm_param().
- Link to v1: https://lore.kernel.org/r/20260608-airoha_qdma_set_trtcm_param-retry-fix-v1-1-f07704f0d8c5@kernel.org
---
 drivers/net/ethernet/airoha/airoha_eth.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 3370c3df7c10..bb5c0599a4ee 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2673,14 +2673,30 @@ static int airoha_qdma_set_trtcm_param(struct airoha_qdma *qdma, int channel,
 		     FIELD_PREP(TRTCM_METER_GROUP_MASK, group) |
 		     FIELD_PREP(TRTCM_PARAM_INDEX_MASK, idx) |
 		     FIELD_PREP(TRTCM_PARAM_RATE_TYPE_MASK, mode);
+	int i;
 
-	airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
-	airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+	for (i = 0; i < 100; i++) {
+		u32 data;
 
-	return read_poll_timeout(airoha_qdma_rr, val,
-				 val & TRTCM_PARAM_RW_DONE_MASK,
-				 USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
-				 qdma, REG_TRTCM_CFG_PARAM(addr));
+		airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
+		wmb();
+		airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+
+		if (read_poll_timeout(airoha_qdma_rr, data,
+				      data & TRTCM_PARAM_RW_DONE_MASK,
+				      USEC_PER_MSEC, 10 * USEC_PER_MSEC,
+				      true, qdma, REG_TRTCM_CFG_PARAM(addr)))
+			return -ETIMEDOUT;
+
+		if (airoha_qdma_get_trtcm_param(qdma, channel, addr, param,
+						mode, &data, NULL))
+			continue;
+
+		if (data == val)
+			return 0;
+	}
+
+	return -EBUSY;
 }
 
 static int airoha_qdma_set_trtcm_config(struct airoha_qdma *qdma, int channel,

---
base-commit: d07d80b6a129a44538cda1549b7acf95154fb197
change-id: 20260605-airoha_qdma_set_trtcm_param-retry-fix-a9d2956b9b2f

Best regards,
-- 
Lorenzo Bianconi <lorenzo@kernel.org>


^ permalink raw reply related

* Re: [PATCH 0/2] Add bpf_sock_read_xattr() kfunc to read socket xattrs
From: Christian Brauner @ 2026-06-22 10:02 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Alexei Starovoitov, Daniel Borkmann, Christian Brauner
  Cc: Alexander Viro, Jan Kara, Simon Horman, Kuniyuki Iwashima,
	Willem de Bruijn, linux-fsdevel, netdev, bpf, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Kumar Kartikeya Dwivedi,
	Song Liu, Yonghong Song, Jiri Olsa
In-Reply-To: <20260617-work-bpf-sock-xattr-v1-0-a1276f7c9da3@kernel.org>

On Wed, 17 Jun 2026 13:18:26 +0200, Christian Brauner wrote:
> Add bpf_sock_read_xattr() kfunc to read socket xattrs
> 
> In c8db08110cbe ("Merge tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs")
> we added support for extended attributes for sockets. This comes in two
> flavors: sockfs and non-sockfs/filesystem sockets. Filesystem sockets
> are actual filesystem objects so reading xattrs must use dedicated fs
> helpers such as bpf_get_dentry_xattr() and bpf_get_file_xattr(). Those
> are inherently sleeping operations. Sockfs sockets on the other hand
> don't need to use sleeping operations as the underlying data structure
> is lockless. In addition, retrieval of sockfs extended attributes often
> happens from LSM hooks that only provide struct socket and it's
> completely nonsensical to grab a reference to a file, then force a
> sleeping operation to retrieve the xattr and drop the reference. We know
> that the sockfs file cannot go away while the LSM hook runs.
> 
> [...]

Applied to the vfs-7.3.kfunc branch of the vfs/vfs.git tree.
Patches in the vfs-7.3.kfunc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-7.3.kfunc

[1/2] fs: Add bpf_sock_read_xattr() kfunc to read socket xattrs
      https://git.kernel.org/vfs/vfs/c/f80386e3838e
[2/2] selftests/bpf: Add test for bpf_sock_read_xattr() kfunc
      https://git.kernel.org/vfs/vfs/c/99a63a6aff40


^ permalink raw reply

* [PATCH v2 net-next] sctp: use sctp_auth_shkey_release() in error path for consistency
From: Wentao Liang @ 2026-06-22 10:02 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner, Xin Long, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, linux-sctp, netdev, linux-kernel, Wentao Liang

Use the proper refcount-aware helper sctp_auth_shkey_release() instead
of kfree() when freeing cur_key in the error path of sctp_auth_set_key().
While both are equivalent in the current code, using the helper maintains
abstraction consistency and prevents potential issues if the code is
reordered in the future.

Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
---
 net/sctp/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index be9782760f50..84708f87392f 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -753,7 +753,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	/* Create a new key data based on the info passed in */
 	key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
 	if (!key) {
-		kfree(cur_key);
+		sctp_auth_shkey_release(cur_key);
 		return -ENOMEM;
 	}
 
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* AW: AW: AW: AW: [PATCH net] net: usb: lan78xx: restore VLAN filter table after device reset
From: Sven Schuchmann @ 2026-06-22 10:07 UTC (permalink / raw)
  To: Nicolai Buchwitz
  Cc: Thangaraj Samynathan, Rengarajan Sundararajan,
	UNGLinuxDriver@microchip.com, Woojung.Huh@microchip.com,
	Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, netdev@vger.kernel.org, linux-usb@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <f76711d2f45c527f9ce0f5d288631bc6@tipi-net.de>

Hello Nicolai,

On 19.6.2026 16:01, Nicolai Buchwitz wrote:
> Hi Sven
> 
> On 19.6.2026 15:31, Sven Schuchmann wrote:
> > Hello Nicolai,
> >
> > looks good from my point of view
> > (Calling the lan78xx_write_vlan_table() from
> > lan78xx_mac_link_up() and from lan78xx_reset()).
> 
> Thanks.

Just to be clear I used this patch which is looking good:

---
 drivers/net/usb/lan78xx.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a5132f2f9..a2db38650 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1571,6 +1571,7 @@ static void lan78xx_set_multicast(struct net_device *netdev)
 }
 
 static void lan78xx_rx_urb_submit_all(struct lan78xx_net *dev);
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev);
 
 static int lan78xx_mac_reset(struct lan78xx_net *dev)
 {
@@ -2528,6 +2529,10 @@ static void lan78xx_mac_link_up(struct phylink_config *config,
 	if (ret < 0)
 		goto link_up_fail;
 
+	ret = lan78xx_write_vlan_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
 	netif_start_queue(net);
 
 	return;
@@ -3081,14 +3086,20 @@ static int lan78xx_set_features(struct net_device *netdev,
 	return lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
 }
 
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
+				      DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+}
+
 static void lan78xx_deferred_vlan_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
 			container_of(param, struct lan78xx_priv, set_vlan);
-	struct lan78xx_net *dev = pdata->dev;
 
-	lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
-			       DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+	lan78xx_write_vlan_table(pdata->dev);
 }
 
 static int lan78xx_vlan_rx_add_vid(struct net_device *netdev,
@@ -3378,6 +3389,15 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 
 	lan78xx_set_multicast(dev->net);
 
+	/* The chip reset above also clears the VLAN filter table held in the
+	 * shared VLAN/DA hash RAM. The network stack does not re-add VLANs
+	 * after a silent device reset (e.g. on reset_resume after USB
+	 * autosuspend), so restore the table from our shadow copy here.
+	 */
+	ret = lan78xx_write_vlan_table(dev);
+	if (ret < 0)
+		return ret;
+
 	/* reset PHY */
 	ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
 	if (ret < 0)
-- 

> 
> > But I investigated a little more and it seems the hash table
> > (which is right behind the vlan table in the controllers memory)
> > also gets cleared. I wrote some random data into this table and have
> > seen that it gets also cleared. I think this needs to be fixed too.
> 
> Something like
> 
> static int lan78xx_write_mchash_table(struct lan78xx_net *dev)
> {
>         struct lan78xx_priv *pdata = (struct lan78xx_priv
> *)(dev->data[0]);
> 
>         return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
>                                       DP_SEL_VHF_VLAN_LEN,
>                                       DP_SEL_VHF_HASH_LEN,
> pdata->mchash_table); // from lan78xx_deferred_multicast_write)
> }
> 
> with callers in lan78xx_deferred_multicast_write() and
> lan78xx_mac_link_up(), should
> do the trick?

I used this one which is also looking good:
---
 drivers/net/usb/lan78xx.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index d449c1950fd3..6d7d349816a6 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1466,6 +1466,8 @@ static inline u32 lan78xx_hash(char addr[ETH_ALEN])
 	return (ether_crc(ETH_ALEN, addr) >> 23) & 0x1ff;
 }
 
+static int lan78xx_write_mchash_table(struct lan78xx_net *dev);
+
 static void lan78xx_deferred_multicast_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
@@ -1476,9 +1478,7 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
 	netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
 		  pdata->rfe_ctl);
 
-	ret = lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
-				     DP_SEL_VHF_VLAN_LEN,
-				     DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+	ret = lan78xx_write_mchash_table(dev);
 	if (ret < 0)
 		goto multicast_write_done;
 
@@ -2533,6 +2533,10 @@ static void lan78xx_mac_link_up(struct phylink_config *config,
 	if (ret < 0)
 		goto link_up_fail;
 
+	ret = lan78xx_write_mchash_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
 	netif_start_queue(net);
 
 	return;
@@ -3094,6 +3098,16 @@ static int lan78xx_write_vlan_table(struct lan78xx_net *dev)
 				      DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
 }
 
+static int lan78xx_write_mchash_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
+				      DP_SEL_VHF_VLAN_LEN,
+				      DP_SEL_VHF_HASH_LEN,
+				      pdata->mchash_table);
+}
+
 static void lan78xx_deferred_vlan_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
@@ -3398,6 +3412,10 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	if (ret < 0)
 		return ret;
 
+	ret = lan78xx_write_mchash_table(dev);
+	if (ret < 0)
+		return ret;
+
 	/* reset PHY */
 	ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
 	if (ret < 0)
-- 

> 
> >
> > In the Datasheet from the LAN7801 I can read:
> > "After a reset event, the RFE will automatically initialize the
> > contents of the VHF to 0h."
> > Where VHF also refers to the hash table.
> > But I still do not understand what reset is happening when I just
> > unplug the network cable....
> 
> I suspect it is triggered from the PHY:
> 
> 8.10 (MAC Reset Watchdog Timer):
> "A portion of the MAC operates on clocks generated by the Ethernet PHY
> [...] PHY Reset
> (PHY_RST) results in resetting the portion of the MAC operating on the
> PHY receive and
> transmit clocks."
> 
> So which PHY are you using?

I am using a DP83TC812R from TI. There is currently no driver available
so I ported this one
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/phy/dp83tg720.c
which is working fine (maybe I will also publish a patch for this).

The strange thing is that the MAC Reset Watchdog Timer seems 
to occur "silently" so that nor the mac or the phy driver know
about this reset.

But never the less. The two patches fixed my problem and
I think they should be mainline. 

Regards,

   Sven

^ permalink raw reply related

* [PATCH v3 0/3] Add drm_ras netlink error event support
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro

Define a new netlink event 'error-event' and a new multicast group
'error-report' in drm_ras. Each event contains device name, node and
error information to identify the error triggering the event.

Add drm_ras_nl_error_event() to trigger an event from the driver.
Wire this support to xe drm_ras to notify userspace whenever a GT or
SoC error occurs in PVC. Also add support for correctable errors in
CRI.

$ sudo ynl --family drm_ras --output-json --subscribe error-report

{
    "name": "error-event",
     "msg": {
         "device-name": "0000:03:00.0",
         "node-id": 1,
         "node-name": "uncorrectable-errors",
         "error-id": 1,
         "error-name": "core-compute",
         "error-value": 1
     }
}

Rev2: use ynl in document and commit message
      fix cosmetic review comments
      simplify caller 

Rev3: replace error-event with error-report
      had has_drm_ras check 
      add support for correctable errors in CRI

Riana Tauro (3):
  drm/drm_ras: Add drm_ras netlink error event
  drm/xe/xe_drm_ras: Add error-event support for PVC
  drm/xe/xe_ras: Add error-event support for Crescent Island

 Documentation/gpu/drm-ras.rst            | 21 ++++++
 Documentation/netlink/specs/drm_ras.yaml | 48 +++++++++++++
 drivers/gpu/drm/drm_ras.c                | 87 ++++++++++++++++++++++++
 drivers/gpu/drm/drm_ras_nl.c             |  6 ++
 drivers/gpu/drm/drm_ras_nl.h             |  4 ++
 drivers/gpu/drm/xe/xe_drm_ras.c          | 30 ++++++++
 drivers/gpu/drm/xe/xe_drm_ras.h          |  3 +
 drivers/gpu/drm/xe/xe_hw_error.c         |  5 +-
 drivers/gpu/drm/xe/xe_ras.c              | 53 +++++++++++++++
 include/drm/drm_ras.h                    |  5 ++
 include/uapi/drm/drm_ras.h               | 15 ++++
 11 files changed, 276 insertions(+), 1 deletion(-)

-- 
2.47.1


^ permalink raw reply

* [PATCH v3 1/3] drm/drm_ras: Add drm_ras netlink error event
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro, Zack McKevitt, Lijo Lazar, Hawking Zhang,
	David S. Miller, Paolo Abeni, Eric Dumazet
In-Reply-To: <20260622101716.3313496-5-riana.tauro@intel.com>

Define a new netlink event 'error-event' and a new multicast group
'error-report' in drm_ras. Each event contains device name, node and
error information to identify the error triggering the event.

Add drm_ras_nl_error_event() to trigger an event from the driver.
Userspace must subscribe to 'error-report' to receive 'error-event'
notifications.

Usage:

$ sudo ynl --family drm_ras --subscribe error-report

Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Zack McKevitt <zachary.mckevitt@oss.qualcomm.com>
Cc: Lijo Lazar <lijo.lazar@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
---
v2: remove redundant initialization
    remove unnecessary space
    use ynl in commit message and doc (Raag)
    simplify doc for error-event attrs

v3: rename error-notify to error-report
    Replace notify with report across the file (Raag)
---
 Documentation/gpu/drm-ras.rst            | 21 ++++++
 Documentation/netlink/specs/drm_ras.yaml | 48 +++++++++++++
 drivers/gpu/drm/drm_ras.c                | 87 ++++++++++++++++++++++++
 drivers/gpu/drm/drm_ras_nl.c             |  6 ++
 drivers/gpu/drm/drm_ras_nl.h             |  4 ++
 include/drm/drm_ras.h                    |  5 ++
 include/uapi/drm/drm_ras.h               | 15 ++++
 7 files changed, 186 insertions(+)

diff --git a/Documentation/gpu/drm-ras.rst b/Documentation/gpu/drm-ras.rst
index 83c21853b74b..406e4c49bac1 100644
--- a/Documentation/gpu/drm-ras.rst
+++ b/Documentation/gpu/drm-ras.rst
@@ -56,6 +56,7 @@ User space tools can:
   ``node-id`` and ``error-id`` as parameters.
 * Clear specific error counters with the ``clear-error-counter`` command, using both
   ``node-id`` and ``error-id`` as parameters.
+* Subscribe to the ``error-report`` multicast group to receive ``error-event``.
 
 YAML-based Interface
 --------------------
@@ -111,3 +112,23 @@ Example: Clear an error counter for a given node
 
     sudo ynl --family drm_ras --do clear-error-counter --json '{"node-id":0, "error-id":1}'
     None
+
+Example: Subscribe to ``error-report`` multicast group
+
+.. code-block:: bash
+
+    sudo ynl --family drm_ras --output-json --subscribe error-report
+
+.. code-block:: json
+
+    {
+        "name": "error-event",
+        "msg": {
+            "device-name": "0000:03:00.0",
+            "node-id": 1,
+            "node-name": "uncorrectable-errors",
+            "error-id": 1,
+            "error-name": "error_name1",
+            "error-value": 1
+        }
+    }
diff --git a/Documentation/netlink/specs/drm_ras.yaml b/Documentation/netlink/specs/drm_ras.yaml
index e113056f8c01..8aed3d4515e5 100644
--- a/Documentation/netlink/specs/drm_ras.yaml
+++ b/Documentation/netlink/specs/drm_ras.yaml
@@ -69,6 +69,33 @@ attribute-sets:
         name: error-value
         type: u32
         doc: Current value of the requested error counter.
+  -
+    name: error-event-attrs
+    attributes:
+      -
+        name: device-name
+        type: string
+        doc: Device (PCI BDF, UUID) that reported the error.
+      -
+        name: node-id
+        type: u32
+        doc: ID of the node that reported the error.
+      -
+        name: node-name
+        type: string
+        doc: Name of the node that reported the error.
+      -
+        name: error-id
+        type: u32
+        doc: ID of the error counter.
+      -
+        name: error-name
+        type: string
+        doc: Name of the error.
+      -
+        name: error-value
+        type: u32
+        doc: Current value of the error counter.
 
 operations:
   list:
@@ -124,3 +151,24 @@ operations:
       do:
         request:
           attributes: *id-attrs
+    -
+      name: error-event
+      doc: >-
+           Report an error event to userspace.
+           The event includes the device, node and error information
+           of the error that triggered the event.
+      attribute-set: error-event-attrs
+      mcgrp: error-report
+      event:
+        attributes:
+          - device-name
+          - node-id
+          - node-name
+          - error-id
+          - error-name
+          - error-value
+
+mcast-groups:
+  list:
+    -
+      name: error-report
diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
index d6eab29a1394..77f912a4d101 100644
--- a/drivers/gpu/drm/drm_ras.c
+++ b/drivers/gpu/drm/drm_ras.c
@@ -41,6 +41,11 @@
  *    Userspace must provide Node ID, Error ID.
  *    Clears specific error counter of a node if supported.
  *
+ * 4. ERROR_REPORT: Subscribe to this multicast group to receive error events
+ *
+ * 5. ERROR_EVENT: Report an error event to userspace. The event contains device, node
+ *    and error information that triggered the event.
+ *
  * Node registration:
  *
  * - drm_ras_node_register(): Registers a new node and assigns
@@ -186,6 +191,34 @@ static int msg_reply_value(struct sk_buff *msg, u32 error_id,
 			   value);
 }
 
+static int msg_put_error_event_attrs(struct sk_buff *msg, struct drm_ras_node *node,
+				     u32 error_id, const char *error_name, u32 value)
+{
+	int ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_DEVICE_NAME, node->device_name);
+	if (ret)
+		return ret;
+
+	ret = nla_put_u32(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_ID, node->id);
+	if (ret)
+		return ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_NAME, node->node_name);
+	if (ret)
+		return ret;
+
+	ret = nla_put_u32(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_ID, error_id);
+	if (ret)
+		return ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_NAME, error_name);
+	if (ret)
+		return ret;
+
+	return nla_put_u32(msg, DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_VALUE, value);
+}
+
 static int doit_reply_value(struct genl_info *info, u32 node_id,
 			    u32 error_id)
 {
@@ -222,6 +255,60 @@ static int doit_reply_value(struct genl_info *info, u32 node_id,
 	return genlmsg_reply(msg, info);
 }
 
+/**
+ * drm_ras_nl_error_event() - Report an error event
+ * @node: Node structure
+ * @error_id: ID of the error
+ * @error_name: Name of the error
+ * @value: Value associated with the error
+ * @flags: GFP flags for memory allocation
+ *
+ * Report an error-event to userspace using the error-report multicast group.
+ *
+ * Return: 0 on success, or negative errno on failure.
+ */
+int drm_ras_nl_error_event(struct drm_ras_node *node, u32 error_id, const char *error_name,
+			   u32 value, gfp_t flags)
+{
+	struct genl_info info;
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	int ret;
+
+	if (!error_name)
+		return -EINVAL;
+
+	if (!genl_has_listeners(&drm_ras_nl_family, &init_net, DRM_RAS_NLGRP_ERROR_REPORT))
+		return 0;
+
+	genl_info_init_ntf(&info, &drm_ras_nl_family, DRM_RAS_CMD_ERROR_EVENT);
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, flags);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_iput(msg, &info);
+	if (!hdr) {
+		ret = -EMSGSIZE;
+		goto free_msg;
+	}
+
+	ret = msg_put_error_event_attrs(msg, node, error_id, error_name, value);
+	if (ret)
+		goto cancel_msg;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast(&drm_ras_nl_family, msg, 0, DRM_RAS_NLGRP_ERROR_REPORT, flags);
+	return 0;
+
+cancel_msg:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return ret;
+}
+EXPORT_SYMBOL(drm_ras_nl_error_event);
+
 /**
  * drm_ras_nl_get_error_counter_dumpit() - Dump all Error Counters
  * @skb: Netlink message buffer
diff --git a/drivers/gpu/drm/drm_ras_nl.c b/drivers/gpu/drm/drm_ras_nl.c
index dea1c1b2494e..9d3123cc9f9c 100644
--- a/drivers/gpu/drm/drm_ras_nl.c
+++ b/drivers/gpu/drm/drm_ras_nl.c
@@ -58,6 +58,10 @@ static const struct genl_split_ops drm_ras_nl_ops[] = {
 	},
 };
 
+static const struct genl_multicast_group drm_ras_nl_mcgrps[] = {
+	[DRM_RAS_NLGRP_ERROR_REPORT] = { "error-report", },
+};
+
 struct genl_family drm_ras_nl_family __ro_after_init = {
 	.name		= DRM_RAS_FAMILY_NAME,
 	.version	= DRM_RAS_FAMILY_VERSION,
@@ -66,4 +70,6 @@ struct genl_family drm_ras_nl_family __ro_after_init = {
 	.module		= THIS_MODULE,
 	.split_ops	= drm_ras_nl_ops,
 	.n_split_ops	= ARRAY_SIZE(drm_ras_nl_ops),
+	.mcgrps		= drm_ras_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(drm_ras_nl_mcgrps),
 };
diff --git a/drivers/gpu/drm/drm_ras_nl.h b/drivers/gpu/drm/drm_ras_nl.h
index a398643572a5..03ec275aca92 100644
--- a/drivers/gpu/drm/drm_ras_nl.h
+++ b/drivers/gpu/drm/drm_ras_nl.h
@@ -21,6 +21,10 @@ int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
 int drm_ras_nl_clear_error_counter_doit(struct sk_buff *skb,
 					struct genl_info *info);
 
+enum {
+	DRM_RAS_NLGRP_ERROR_REPORT,
+};
+
 extern struct genl_family drm_ras_nl_family;
 
 #endif /* _LINUX_DRM_RAS_GEN_H */
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h
index f2a787bc4f64..d4a275efdbb0 100644
--- a/include/drm/drm_ras.h
+++ b/include/drm/drm_ras.h
@@ -78,9 +78,14 @@ struct drm_device;
 #if IS_ENABLED(CONFIG_DRM_RAS)
 int drm_ras_node_register(struct drm_ras_node *node);
 void drm_ras_node_unregister(struct drm_ras_node *node);
+int drm_ras_nl_error_event(struct drm_ras_node *node, u32 error_id, const char *error_name,
+			   u32 value, gfp_t flags);
 #else
 static inline int drm_ras_node_register(struct drm_ras_node *node) { return 0; }
 static inline void drm_ras_node_unregister(struct drm_ras_node *node) { }
+static inline int drm_ras_nl_error_event(struct drm_ras_node *node, u32 error_id,
+					 const char *error_name, u32 value, gfp_t flags)
+{ return 0; }
 #endif
 
 #endif
diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h
index 218a3ee86805..eab8231aa87c 100644
--- a/include/uapi/drm/drm_ras.h
+++ b/include/uapi/drm/drm_ras.h
@@ -38,13 +38,28 @@ enum {
 	DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX = (__DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX - 1)
 };
 
+enum {
+	DRM_RAS_A_ERROR_EVENT_ATTRS_DEVICE_NAME = 1,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_ID,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_NODE_NAME,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_ID,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_NAME,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_ERROR_VALUE,
+
+	__DRM_RAS_A_ERROR_EVENT_ATTRS_MAX,
+	DRM_RAS_A_ERROR_EVENT_ATTRS_MAX = (__DRM_RAS_A_ERROR_EVENT_ATTRS_MAX - 1)
+};
+
 enum {
 	DRM_RAS_CMD_LIST_NODES = 1,
 	DRM_RAS_CMD_GET_ERROR_COUNTER,
 	DRM_RAS_CMD_CLEAR_ERROR_COUNTER,
+	DRM_RAS_CMD_ERROR_EVENT,
 
 	__DRM_RAS_CMD_MAX,
 	DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1)
 };
 
+#define DRM_RAS_MCGRP_ERROR_REPORT	"error-report"
+
 #endif /* _UAPI_LINUX_DRM_RAS_H */
-- 
2.47.1


^ permalink raw reply related

* [PATCH v3 2/3] drm/xe/xe_drm_ras: Add error-event support for PVC
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro
In-Reply-To: <20260622101716.3313496-5-riana.tauro@intel.com>

Report drm_ras error event to userspace when an error occurs.
Add support for core-compute and SoC errors in PVC.

$ sudo ynl --family drm_ras --output-json --subscribe error-report

{
    "name": "error-event",
     "msg": {
         "device-name": "0000:03:00.0",
         "node-id": 1,
         "node-name": "uncorrectable-errors",
         "error-id": 1,
         "error-name": "core-compute",
         "error-value": 1
     }
}

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
---
v2: use ynl (Raag)
    use value as function parameter
    move error event call to hw_error_source_handler 

v3: add has_drm_ras check
---
 drivers/gpu/drm/xe/xe_drm_ras.c  | 30 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_drm_ras.h  |  3 +++
 drivers/gpu/drm/xe/xe_hw_error.c |  5 ++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
index 7937d8ba0ed9..36afdfb5e412 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.c
+++ b/drivers/gpu/drm/xe/xe_drm_ras.c
@@ -185,6 +185,36 @@ static int register_nodes(struct xe_device *xe)
 	return ret;
 }
 
+/**
+ * xe_drm_ras_event() - Report drm-ras error event to userspace
+ * @xe: xe device structure
+ * @component: error component (see &enum drm_xe_ras_error_component)
+ * @severity: error severity (see &enum drm_xe_ras_error_severity)
+ * @value: value of error counter
+ * @flags: flags for allocation
+ *
+ * Report an error-event to userspace.
+ */
+void xe_drm_ras_event(struct xe_device *xe, u32 component, u32 severity, u32 value, gfp_t flags)
+{
+	struct xe_drm_ras *ras = &xe->ras;
+	struct xe_drm_ras_counter *info = ras->info[severity];
+	struct drm_ras_node *node = &ras->node[severity];
+	int ret;
+
+	/* Event is supported only if drm_ras is enabled */
+	if (!xe->info.has_drm_ras)
+		return;
+
+	if (!info || !info[component].name)
+		return;
+
+	ret = drm_ras_nl_error_event(node, component, info[component].name, value, flags);
+	if (ret)
+		drm_err(&xe->drm, "RAS error-event failed: %d for %s %s\n", ret,
+			info[component].name, error_severity[severity]);
+}
+
 /**
  * xe_drm_ras_init() - Initialize DRM RAS
  * @xe: xe device instance
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.h b/drivers/gpu/drm/xe/xe_drm_ras.h
index 365c70e93e82..2a694bf69478 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.h
+++ b/drivers/gpu/drm/xe/xe_drm_ras.h
@@ -5,11 +5,14 @@
 #ifndef _XE_DRM_RAS_H_
 #define _XE_DRM_RAS_H_
 
+#include <linux/types.h>
+
 struct xe_device;
 
 #define for_each_error_severity(i)	\
 	for (i = 0; i < DRM_XE_RAS_ERR_SEV_MAX; i++)
 
 int xe_drm_ras_init(struct xe_device *xe);
+void xe_drm_ras_event(struct xe_device *xe, u32 component, u32 severity, u32 value, gfp_t flags);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index 4a4b363fc844..a833cecc74ec 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -432,7 +432,7 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er
 	struct xe_drm_ras *ras = &xe->ras;
 	struct xe_drm_ras_counter *info = ras->info[severity];
 	unsigned long flags, err_src;
-	u32 err_bit;
+	u32 err_bit, value;
 
 	if (!IS_DGFX(xe))
 		return;
@@ -495,6 +495,9 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er
 			gt_hw_error_handler(tile, hw_err, error_id);
 		if (err_bit == XE_SOC_ERROR)
 			soc_hw_error_handler(tile, hw_err, error_id);
+
+		value = atomic_read(&info[error_id].counter);
+		xe_drm_ras_event(xe, error_id, severity, value, GFP_ATOMIC);
 	}
 
 clear_reg:
-- 
2.47.1


^ permalink raw reply related

* [PATCH v3 3/3] drm/xe/xe_ras: Add error-event support for CRI
From: Riana Tauro @ 2026-06-22 10:17 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: aravind.iddamsetty, anshuman.gupta, rodrigo.vivi, joonas.lahtinen,
	kuba, simona.vetter, airlied, pratik.bari, joshua.santosh.ranjan,
	ashwin.kumar.kulkarni, shubham.kumar, ravi.kishore.koppuravuri,
	raag.jadav, maarten.lankhorst, mallesh.koujalagi, soham.purkait,
	Riana Tauro
In-Reply-To: <20260622101716.3313496-5-riana.tauro@intel.com>

Add error-event support for Correctable errors in CRI.
error-event is reported to  userspace for all errors that crossed
threshold on receiving an interrupt for correctable errors.

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
 drivers/gpu/drm/xe/xe_ras.c | 53 +++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 44f4e1a3455b..acf3207aa2fd 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -77,6 +77,18 @@ static u8 drm_to_xe_ras_severity(u8 severity)
 	}
 }
 
+static u8 xe_to_drm_ras_severity(u8 severity)
+{
+	switch (severity) {
+	case XE_RAS_SEV_CORRECTABLE:
+		return DRM_XE_RAS_ERR_SEV_CORRECTABLE;
+	case XE_RAS_SEV_UNCORRECTABLE:
+		return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE;
+	default:
+		return DRM_XE_RAS_ERR_SEV_MAX;
+	}
+}
+
 static u8 drm_to_xe_ras_component(u8 component)
 {
 	switch (component) {
@@ -95,6 +107,24 @@ static u8 drm_to_xe_ras_component(u8 component)
 	}
 }
 
+static u8 xe_to_drm_ras_component(u8 component)
+{
+	switch (component) {
+	case XE_RAS_COMP_DEVICE_MEMORY:
+		return DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY;
+	case XE_RAS_COMP_CORE_COMPUTE:
+		return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE;
+	case XE_RAS_COMP_PCIE:
+		return DRM_XE_RAS_ERR_COMP_PCIE;
+	case XE_RAS_COMP_FABRIC:
+		return DRM_XE_RAS_ERR_COMP_FABRIC;
+	case XE_RAS_COMP_SOC_INTERNAL:
+		return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL;
+	default:
+		return DRM_XE_RAS_ERR_COMP_MAX;
+	}
+}
+
 static int ras_status_to_errno(u32 status)
 {
 	switch (status) {
@@ -131,6 +161,27 @@ static inline const char *comp_to_str(u8 component)
 	return xe_ras_components[component];
 }
 
+static void ras_send_error_event(struct xe_device *xe, u8 severity, u8 component)
+{
+	u8 drm_severity, drm_component;
+	u32 value;
+	int ret;
+
+	drm_severity = xe_to_drm_ras_severity(severity);
+	if (drm_severity == DRM_XE_RAS_ERR_SEV_MAX)
+		return;
+
+	drm_component = xe_to_drm_ras_component(component);
+	if (drm_component == DRM_XE_RAS_ERR_COMP_MAX)
+		return;
+
+	ret = xe_ras_get_counter(xe, severity, component, &value);
+	if (ret)
+		return;
+
+	xe_drm_ras_event(xe, drm_component, drm_severity, value, GFP_KERNEL);
+}
+
 void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 				      struct xe_sysctrl_event_response *response)
 {
@@ -152,6 +203,8 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 		severity = errors[id].common.severity;
 		component = errors[id].common.component;
 
+		ras_send_error_event(xe, severity, component);
+
 		xe_warn(xe, "[RAS]: %s %s detected\n",
 			comp_to_str(component), sev_to_str(severity));
 	}
-- 
2.47.1


^ permalink raw reply related

* Re: [PATCH v3 net] net: watchdog: fix refcount tracking races
From: Marek Szyprowski @ 2026-06-22 10:22 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	netdev, eric.dumazet, syzbot+381d82bbf0253710b35d,
	syzbot+3479efbc2821cb2a79f2
In-Reply-To: <CANn89i+GVoQxFS26=s5w5vUa-ytRUgD1NM6MDZQdtB7FtcXv-w@mail.gmail.com>

On 22.06.2026 10:59, Eric Dumazet wrote:
> On Wed, Jun 17, 2026 at 3:48 AM Marek Szyprowski
> <m.szyprowski@samsung.com> wrote:
>> On 11.06.2026 17:27, Eric Dumazet wrote:
>>> Blamed commit converted the untracked dev_hold()/dev_put() calls
>>> in the watchdog code to use the tracked dev_hold_track()/dev_put_track()
>>> (which were later renamed/interfaced to netdev_hold() and netdev_put()).
>>>
>>> By introducing dev->watchdog_dev_tracker to store the
>>> reference tracking information without adding synchronization
>>> between netdev_watchdog_up() and dev_watchdog(), it enabled the
>>> race condition where this pointer could be overwritten or freed
>>> concurrently, leading to the list corruption crash syzbot reported:
>>>
>>> list_del corruption, ffff888114a18c00->next is NULL
>>>  kernel BUG at lib/list_debug.c:52 !
>>> Oops: invalid opcode: 0000 [#1] SMP KASAN PTI
>>> CPU: 1 UID: 0 PID: 91 Comm: kworker/u8:5 Not tainted syzkaller #0 PREEMPT(lazy)
>>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/09/2026
>>> Workqueue: events_unbound linkwatch_event
>>>  RIP: 0010:__list_del_entry_valid_or_report.cold+0x22/0x2a lib/list_debug.c:52
>>> Call Trace:
>>>  <TASK>
>>>   __list_del_entry_valid include/linux/list.h:132 [inline]
>>>   __list_del_entry include/linux/list.h:246 [inline]
>>>   list_move_tail include/linux/list.h:341 [inline]
>>>   ref_tracker_free+0x1a7/0x6c0 lib/ref_tracker.c:329
>>>   netdev_tracker_free include/linux/netdevice.h:4491 [inline]
>>>   netdev_put include/linux/netdevice.h:4508 [inline]
>>>   netdev_put include/linux/netdevice.h:4504 [inline]
>>>   netdev_watchdog_down net/sched/sch_generic.c:600 [inline]
>>>   dev_deactivate_many+0x28c/0xfe0 net/sched/sch_generic.c:1363
>>>   dev_deactivate+0x109/0x1d0 net/sched/sch_generic.c:1397
>>>   linkwatch_do_dev net/core/link_watch.c:184 [inline]
>>>   linkwatch_do_dev+0xd3/0x120 net/core/link_watch.c:166
>>>   __linkwatch_run_queue+0x3a5/0x810 net/core/link_watch.c:240
>>>   linkwatch_event+0x8f/0xc0 net/core/link_watch.c:314
>>>   process_one_work+0xa0e/0x1980 kernel/workqueue.c:3314
>>>   process_scheduled_works kernel/workqueue.c:3397 [inline]
>>>   worker_thread+0x5ef/0xe50 kernel/workqueue.c:3478
>>>   kthread+0x370/0x450 kernel/kthread.c:436
>>>   ret_from_fork+0x69a/0xc80 arch/x86/kernel/process.c:158
>>>   ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
>>>
>>> This patch has three coordinated parts:
>>>
>>> 1) Add dev->watchdog_lock and dev->watchdog_ref_held to serialize watchdog operations.
>>>
>>> 2) Remove netdev_watchdog_up() call from netif_carrier_on():
>>>    This ensures netdev_watchdog_up() is only called from process/BH context
>>>    (via linkwatch workqueue dev_activate()), allowing us to use
>>>    spin_lock_bh() for synchronization.
>>>
>>> 3) Synchronize watchdog up and watchdog timer:
>>>    Protect netdev_watchdog_up() with tx_global_lock and watchdog_lock.
>>>    Only allocate a new tracker in netdev_watchdog_up() if one is
>>>    not already present.
>>>    In dev_watchdog(), ensure we don't release the tracker if the
>>>    timer was rescheduled either by dev_watchdog() itself or concurrently
>>>    by netdev_watchdog_up().
>>>
>>> Fixes: f12bf6f3f942 ("net: watchdog: add net device refcount tracker")
>>> Reported-by: syzbot+381d82bbf0253710b35d@syzkaller.appspotmail.com
>>> Closes: https://lore.kernel.org/netdev/6a26b751.c25708ab.1b19ef.0013.GAE@google.com/T/#u
>>> Tested-by: syzbot+3479efbc2821cb2a79f2@syzkaller.appspotmail.com
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> This patch landed recently in linux-next as commit 8eed5519e496 ("net: watchdog:
>> fix refcount tracking races"). In my tests I found that it causes the following
>> deadlock during system suspend/resume on QEmu's ARM64bit 'virt' machine:
>>
>> root@target:~# time rtcwake -s10 -mmem
>> rtcwake: assuming RTC uses UTC ...
>> rtcwake: wakeup from "mem" using /dev/rtc0 at Wed Jun 17 10:46:12 2026
>> PM: suspend entry (s2idle)
>> Filesystems sync: 0.055 seconds
>> Freezing user space processes
>> Freezing user space processes completed (elapsed 0.006 seconds)
>> OOM killer disabled.
>> Freezing remaining freezable tasks
>> Freezing remaining freezable tasks completed (elapsed 0.003 seconds)
>>
>> ============================================
>> WARNING: possible recursive locking detected
>> 7.1.0-rc7+ #13003 Not tainted
>> --------------------------------------------
>> rtcwake/254 is trying to acquire lock:
>> ffff000006de64e8 (&dev->tx_global_lock){+.-.}-{3:3}, at: netdev_watchdog_up+0x40/0x108
>>
>> but task is already holding lock:
>> ffff000006de64e8 (&dev->tx_global_lock){+.-.}-{3:3}, at: netif_tx_lock+0x1c/0x34
>>
>> other info that might help us debug this:
>>  Possible unsafe locking scenario:
>>
>>        CPU0
>>        ----
>>   lock(&dev->tx_global_lock);
>>   lock(&dev->tx_global_lock);
>>
>>  *** DEADLOCK ***
>>
>>  May be due to missing lock nesting notation
>>
>> 6 locks held by rtcwake/254:
>>  #0: ffff0000071ab3e8 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0x1ec/0x35c
>>  #1: ffff00000d22c480 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0xf0/0x1c4
>>  #2: ffff0000049162c8 (kn->active#61){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1c4
>>  #3: ffffaa79533c03b0 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0x98/0x608
>>  #4: ffff000005e3a138 (&dev->mutex){....}-{4:4}, at: device_resume+0xb4/0x254
>>  #5: ffff000006de64e8 (&dev->tx_global_lock){+.-.}-{3:3}, at: netif_tx_lock+0x1c/0x34
>>
>> stack backtrace:
>> CPU: 1 UID: 0 PID: 254 Comm: rtcwake Not tainted 7.1.0-rc7+ #13003 PREEMPT
>> Hardware name: linux,dummy-virt (DT)
>> Call trace:
>>  show_stack+0x18/0x24 (C)
>>  dump_stack_lvl+0x90/0xd0
>>  dump_stack+0x18/0x24
>>  print_deadlock_bug+0x260/0x350
>>  __lock_acquire+0x11b8/0x225c
>>  lock_acquire+0x1c4/0x3f0
>>  _raw_spin_lock_bh+0x50/0x68
>>  netdev_watchdog_up+0x40/0x108
>>  netif_device_attach+0x9c/0xb0
>>  virtnet_restore+0x100/0x21c
>>  virtio_device_restore_priv+0x11c/0x1d0
>>  virtio_device_restore+0x14/0x20
>>  virtio_mmio_restore+0x34/0x40
>>  platform_pm_resume+0x2c/0x68
>>  dpm_run_callback+0xa0/0x240
>>  device_resume+0x120/0x254
>>  dpm_resume+0x1f8/0x2ec
>>  dpm_resume_end+0x18/0x34
>>  suspend_devices_and_enter+0x1d0/0x990
>>  pm_suspend+0x1ec/0x608
>>  state_store+0x8c/0x110
>>  kobj_attr_store+0x18/0x2c
>>  sysfs_kf_write+0x50/0x7c
>>  kernfs_fop_write_iter+0x130/0x1c4
>>  vfs_write+0x2b8/0x35c
>>  ksys_write+0x6c/0x104
>>  __arm64_sys_write+0x1c/0x28
>>  invoke_syscall+0x54/0x110
>>  el0_svc_common.constprop.0+0x40/0xe8
>>  do_el0_svc+0x20/0x2c
>>  el0_svc+0x54/0x338
>>  el0t_64_sync_handler+0xa0/0xe4
>>  el0t_64_sync+0x198/0x19c
>>
>>
>> Reverting $subject on top of linux-next fixes this issue.
> Thanks for the report Marek!
>
> Acquiring tx_global_lock in netdev_watchdog_up() appears unnecessary anyway
> because the critical state (timer and refcount tracker) is already
> protected by dev->watchdog_lock.
>
> Could you try this patch?

This fixes the observed issue. Thanks! Feel free to add:

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>


> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 3f1c510df850dbdbaf10d483547c7b1f3a5d5482..ef2b4bf51564173751c74fefe17e3913ed2fa056
> 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -594,9 +594,8 @@ void netdev_watchdog_up(struct net_device *dev)
>                 return;
>         if (dev->watchdog_timeo <= 0)
>                 dev->watchdog_timeo = 5*HZ;
> -       spin_lock_bh(&dev->tx_global_lock);
>
> -       spin_lock(&dev->watchdog_lock);
> +       spin_lock_bh(&dev->watchdog_lock);
>         if (!mod_timer(&dev->watchdog_timer,
>                        round_jiffies(jiffies + dev->watchdog_timeo))) {
>                 if (!dev->watchdog_ref_held) {
> @@ -605,9 +604,7 @@ void netdev_watchdog_up(struct net_device *dev)
>                         dev->watchdog_ref_held = true;
>                 }
>         }
> -       spin_unlock(&dev->watchdog_lock);
> -
> -       spin_unlock_bh(&dev->tx_global_lock);
> +       spin_unlock_bh(&dev->watchdog_lock);
>  }
>  EXPORT_SYMBOL_GPL(netdev_watchdog_up);
>
Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply

* [PATCH net v2] net: usb: lan78xx: restore VLAN and hash filters after link up
From: Nicolai Buchwitz @ 2026-06-22 10:29 UTC (permalink / raw)
  To: Thangaraj Samynathan, Rengarajan Sundararajan, UNGLinuxDriver,
	Woojung.Huh
  Cc: Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Sven Schuchmann, netdev, linux-usb, linux-kernel,
	Nicolai Buchwitz

Configured VLANs intermittently stop receiving traffic after a link
down/up cycle, e.g. when the network cable is unplugged and plugged back
in. VLAN filtering stays enabled but all VLAN-tagged frames are dropped
until a VLAN is added or removed again.

The LAN7801 datasheet (DS00002123E) states:

  "A portion of the MAC operates on clocks generated by the Ethernet
   PHY. During a PHY reset event, this portion of the MAC is designed to
   not be taken out of reset until the PHY clocks are operational"
  (section 8.10, MAC Reset Watchdog Timer)

  "After a reset event, the RFE will automatically initialize the
   contents of the VHF to 0h."
  (section 7.1.4, VHF Organization)

Thus a link down/up cycle stops and restarts the PHY clock, resets the
PHY-clocked portion of the MAC, and the RFE clears its VLAN/DA hash
filter (VHF) memory. The VHF holds both the VLAN filter table and the
multicast hash table, but the driver never reprograms either from its
shadow copy once the link is back, so both stay empty.

Reprogram the VLAN filter and multicast hash tables on link up.

Reported-by: Sven Schuchmann <schuchmann@schleissheimer.de>
Closes: https://lore.kernel.org/netdev/BEZP281MB224501E38B30BFDC4BD3D364D9E32@BEZP281MB2245.DEUP281.PROD.OUTLOOK.COM/T/#u
Tested-by: Sven Schuchmann <schuchmann@schleissheimer.de>
Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
v2:
 - Reprogram in lan78xx_mac_link_up() instead of lan78xx_reset(); the
   table is lost on a plain link down/up cycle, where reset() is not
   called. This also avoids the usb_autopm_get_interface() -EACCES path
   in reset_resume() that was flagged on v1.
 - Also restore the multicast hash table: the RFE clears the whole VHF
   (VLAN + hash) memory, per the LAN7801 datasheet.

v1: https://lore.kernel.org/netdev/20260618191109.4086598-1-nb@tipi-net.de/

 drivers/net/usb/lan78xx.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index bcf293ea1bd3..c4cebacabcb5 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1452,6 +1452,15 @@ static inline u32 lan78xx_hash(char addr[ETH_ALEN])
 	return (ether_crc(ETH_ALEN, addr) >> 23) & 0x1ff;
 }
 
+static int lan78xx_write_mchash_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
+				      DP_SEL_VHF_VLAN_LEN,
+				      DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+}
+
 static void lan78xx_deferred_multicast_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
@@ -1462,9 +1471,7 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
 	netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
 		  pdata->rfe_ctl);
 
-	ret = lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
-				     DP_SEL_VHF_VLAN_LEN,
-				     DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+	ret = lan78xx_write_mchash_table(dev);
 	if (ret < 0)
 		goto multicast_write_done;
 
@@ -1557,6 +1564,7 @@ static void lan78xx_set_multicast(struct net_device *netdev)
 }
 
 static void lan78xx_rx_urb_submit_all(struct lan78xx_net *dev);
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev);
 
 static int lan78xx_mac_reset(struct lan78xx_net *dev)
 {
@@ -2514,6 +2522,17 @@ static void lan78xx_mac_link_up(struct phylink_config *config,
 	if (ret < 0)
 		goto link_up_fail;
 
+	/* The RFE clears the VLAN/DA hash filter (VHF) on a link down/up
+	 * cycle, so reprogram both tables from their shadow copies.
+	 */
+	ret = lan78xx_write_vlan_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
+	ret = lan78xx_write_mchash_table(dev);
+	if (ret < 0)
+		goto link_up_fail;
+
 	netif_start_queue(net);
 
 	return;
@@ -3065,14 +3084,20 @@ static int lan78xx_set_features(struct net_device *netdev,
 	return lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
 }
 
+static int lan78xx_write_vlan_table(struct lan78xx_net *dev)
+{
+	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+
+	return lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
+				      DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+}
+
 static void lan78xx_deferred_vlan_write(struct work_struct *param)
 {
 	struct lan78xx_priv *pdata =
 			container_of(param, struct lan78xx_priv, set_vlan);
-	struct lan78xx_net *dev = pdata->dev;
 
-	lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, 0,
-			       DP_SEL_VHF_VLAN_LEN, pdata->vlan_table);
+	lan78xx_write_vlan_table(pdata->dev);
 }
 
 static int lan78xx_vlan_rx_add_vid(struct net_device *netdev,

base-commit: d07d80b6a129a44538cda1549b7acf95154fb197
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH net v6 0/4] Fix i40e/ice/iavf VF bonding after netdev lock changes
From: Simon Horman @ 2026-06-22 10:31 UTC (permalink / raw)
  To: Jose Ignacio Tornos Martinez
  Cc: netdev, intel-wired-lan, przemyslaw.kitszel, aleksandr.loktionov,
	jacob.e.keller, jesse.brandeburg, anthony.l.nguyen, davem,
	edumazet, kuba, pabeni
In-Reply-To: <20260619061321.8554-1-jtornosm@redhat.com>

On Fri, Jun 19, 2026 at 08:13:15AM +0200, Jose Ignacio Tornos Martinez wrote:
> This series fixes VF bonding failures introduced by commit ad7c7b2172c3
> ("net: hold netdev instance lock during sysfs operations").

...

Hi Jose,

Unfortunately the Netdev CI was unable to apply this series cleanly against net.
Would you be able to rebase and repost?

-- 
pw-bot: changes-requested



^ permalink raw reply

* Re: [PATCH v2 1/2] net: fman: fix clk reference leak in read_dts_node()
From: Andrew Lunn @ 2026-06-22 10:33 UTC (permalink / raw)
  To: ZhaoJinming
  Cc: horms, andrew+netdev, davem, edumazet, kuba, linux-kernel,
	madalin.bucur, netdev, pabeni, sean.anderson
In-Reply-To: <20260622090505.2418478-1-zhaojinming@uniontech.com>

On Mon, Jun 22, 2026 at 05:05:04PM +0800, ZhaoJinming wrote:
> of_clk_get() returns a reference that must be released with clk_put()
> when the clock is no longer needed. The current code never calls
> clk_put(clk), leaking the reference on both the success path and the
> clk_rate == 0 error path.
> 
> Add clk_put(clk) after the clock rate is consumed on the success path,
> and jump to a new clk_put label on the error path to properly release
> the clock reference.

"When the clock is no longer needed": So once you know the rate the
clock ticks at, you no longer need the clock? It is O.K. for it to
disappear, since there is no reference to it?

    Andrew

---
pw-bot: cr

^ permalink raw reply

* Re: AW: AW: AW: AW: [PATCH net] net: usb: lan78xx: restore VLAN filter table after device reset
From: Nicolai Buchwitz @ 2026-06-22 10:34 UTC (permalink / raw)
  To: Sven Schuchmann
  Cc: Thangaraj Samynathan, Rengarajan Sundararajan, UNGLinuxDriver,
	Woojung.Huh, Andrew Lunn, David S . Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, netdev, linux-usb, linux-kernel
In-Reply-To: <BEZP281MB22457C80F9D1AC7F788EC008D9EF2@BEZP281MB2245.DEUP281.PROD.OUTLOOK.COM>

Hi Sven

On 22.6.2026 12:07, Sven Schuchmann wrote:

> [...]

>> > looks good from my point of view
>> > (Calling the lan78xx_write_vlan_table() from
>> > lan78xx_mac_link_up() and from lan78xx_reset()).
>> 
>> Thanks.
> 
> Just to be clear I used this patch which is looking good:

> [...]

Thanks for testing! I've sent a v2 of my patch with your t-b:
https://lore.kernel.org/netdev/20260622102911.484045-1-nb@tipi-net.de/

Regards
Nicolai


^ permalink raw reply

* Re: [PATCH v2 2/2] net: fman: use devm_kzalloc() for fman and rely on devres
From: Andrew Lunn @ 2026-06-22 10:36 UTC (permalink / raw)
  To: ZhaoJinming
  Cc: horms, andrew+netdev, davem, edumazet, kuba, linux-kernel,
	madalin.bucur, netdev, pabeni, sean.anderson
In-Reply-To: <20260622090505.2418478-2-zhaojinming@uniontech.com>

On Mon, Jun 22, 2026 at 05:05:05PM +0800, ZhaoJinming wrote:
> The driver now allocates the top-level struct fman with devm_kzalloc()
> so that its lifetime is bound to the device and resources are released
> automatically by the driver core on probe failure or device removal.
> 
> Remove the explicit kfree(fman) from the error paths in fman_config()
> and read_dts_node() to avoid double-free/use-after-free and to follow
> the devm_ allocation convention.
> 
> After of_find_matching_node() consumes fm_node's reference via
> of_node_put(from), the post-muram error paths no longer need to clean
> up fm_node, so replace goto fman_free with direct return ERR_PTR(err).
> 
> This change complements the existing use of devm_* resources (irq,
> ioremap, etc.) and simplifies the error handling paths.
> 
> Signed-off-by: ZhaoJinming <zhaojinming@uniontech.com>

Please take a read of:

https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html

Please read it all, but see section 1.7.4.

    Andrew

---
pw-bot: cr

^ permalink raw reply

* Re: "ip help" output is an error
From: Dmitri Seletski @ 2026-06-22 10:39 UTC (permalink / raw)
  To: David Laight, Stephen Hemminger; +Cc: netdev
In-Reply-To: <20260622084925.6f3dfc4f@pumpkin>

Hello David,(sorry for duplicate, I keep on forgetting to turn off HTML)

Yes, user could do the redirection, but it's less convenient and 
counterintuitive. IMHO, it's just wrong.

Once user has done "ip help" - IMHO, exit code should be 0. Because user 
did run a command and it correctly executed with a reasonable 
results(provided help text).

And if exit code is 0 - then it reasonably stands that output data is 
standard output and not an error.


"it do exit(0) is likely cause new scripts to fail on old systems." - do 
I understand correctly, concern is that existing scripts that depend on 
current behavior will fail?

First of all, I am guessing that there are not many scripts that depend 
on "ip help".

Second of all, if there are such scripts and we follow this logic - do 
we ever patch anything? Since any script out there can depend on broken 
behavior that will be patched?

Third of all, people who wrote such scripts, should have reported bug 
here in the first place. But it's neither here nor there.


Do I fail to see a bigger picture here? Cause I feel silly talking in 
this mailing list.(I submitted a change for this issue about half a day ago)

Kind Regards

Dmitri Seletski


On 6/22/26 08:49, David Laight wrote:
> On Sun, 21 Jun 2026 08:21:05 -0700
> Stephen Hemminger <stephen@networkplumber.org> wrote:
>
>> On Sat, 20 Jun 2026 10:36:31 +0100
>> Dmitri Seletski <drjoms@gmail.com> wrote:
>>
>>> Hello iproute2 maintainers,
>>>
>>> I am reporting an inconsistency regarding the exit status of the ip help
>>> command.
>>>
>>> Current Behavior:
>>> When running ip help, the command prints the help documentation to
>>> stdout, but exits with a non-zero status (error). This causes issues in
>>> shell scripts that rely on exit codes for control flow.
>>>
>>> Steps to reproduce:
>>> bash
>>>
>>> # This returns "FAIL" because the exit code is non-zero
>>> if ip help > /dev/null; then
>>>       echo "SUCCESS"
>>> else
>>>       echo "FAIL"
>>> fi
>>>
>>> Expected Behavior:
>>> Since the command successfully performs the requested task (displaying
>>> help information) and does not encounter a system error, it should
>>> return an exit code of 0.
>>>
>>> Context:
>>> This behavior breaks standard Bash logic for automation. For example:
>>> ip help && echo "This will not execute"
>>>
>>> "ip help |grep br" - this will bring no result.
>>>
>>> Current version tested: iproute2-6.19.0
>>>
>>> Thank you for your time and for maintaining this tool.
>>>
>>> Regards,
>>> Dmitri Seletski
>>>
>>>    
>> Yes iproute2 doesn't do a great job of handling error codes
>> with usage vs help. Its a bug and no one has bothered to fix it.
>>
> The version I've got does write(2, "Usage...", 972); exit(-1);
> Changing it to do write(1, ...) is likely to break scripts, and making
> it do exit(0) is likely cause new scripts to fail on old systems.
>
> The 'grep' works fine if you redirect stderr to stdout.
>
> The exit(-1) is a bug; the parameter is only 8 bits and the high bit
> is expected to be used to indicate abnormal termination (eg by a signal).
> That should probably be changed to exit(1), there doesn't seem to be
> a standard way to differentiate between command line errors and
> operational ones.
>
> 	David
>

^ permalink raw reply

* Re: [PATCH net] net: mana: Fall back to standard MTU when PF reports adapter_mtu of 0
From: Simon Horman @ 2026-06-22 10:41 UTC (permalink / raw)
  To: Erni Sri Satya Vennela
  Cc: kys, haiyangz, wei.liu, decui, longli, andrew+netdev, davem,
	edumazet, kuba, pabeni, dipayanroy, ssengar, jacob.e.keller,
	gargaditya, kees, linux-hyperv, netdev, linux-kernel, bpf
In-Reply-To: <20260619055348.467224-1-ernis@linux.microsoft.com>

On Thu, Jun 18, 2026 at 10:53:38PM -0700, Erni Sri Satya Vennela wrote:
> Commit d7709812e13d ("net: mana: hardening: Validate adapter_mtu from
> MANA_QUERY_DEV_CONFIG") rejected any adapter_mtu value smaller than
> ETH_MIN_MTU + ETH_HLEN, including 0, returning -EPROTO and failing
> mana_probe().
> 
> Some older PF firmware versions still in the field report
> adapter_mtu as 0 in the MANA_QUERY_DEV_CONFIG response. With the
> hardening check in place, the MANA VF driver now fails to load on
> those hosts, breaking networking entirely for guests.
> 
> MANA hardware always supports the standard Ethernet MTU. Treat a
> reported adapter_mtu of 0 as "the PF did not advertise a value" and
> fall back to ETH_FRAME_LEN, the same value used for the pre-V2
> message version path. Only jumbo frames remain unavailable until
> the PF reports a valid MTU.
> 
> Other small-but-nonzero bogus values are still rejected, preserving
> the original protection against the unsigned-subtraction wrap that
> would otherwise let ndev->max_mtu underflow to a huge value.
> 
> Fixes: d7709812e13d ("net: mana: hardening: Validate adapter_mtu from MANA_QUERY_DEV_CONFIG")
> Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>

Reviewed-by: Simon Horman <horms@kernel.org>

FTR, I agree with your assessment that the issue flagged in the
AI-generated review of this patch on sashiko.dev can be
treated as a follow-up [1].

And I don't think the low priority issue flagged in the AI-generated
review on https://netdev-ai.bots.linux.dev/sashiko/ should impede progress
of this patch.

[1] https://lore.kernel.org/bpf/ajj+5mhswcqhI2z7@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/


^ permalink raw reply

* [RFC net-next 00/17] MPTCP KTLS support
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang

From: Geliang Tang <tanggeliang@kylinos.cn>

Prior to this work, MPTCP did not support TLS. The two protocols
conflicted because both MPTCP and TLS use the ULP (Upper Layer
Protocol) infrastructure in the Linux kernel. ULP settings, including
TLS configuration, were disabled in MPTCP. If an application attempted
to set TLS for an MPTCP socket, the system would return an error code
indicating EOPNOTSUPP (Operation not supported).

This series adds KTLS support for MPTCP. Since no ULP is currently
attached to the MPTCP socket (msk), KTLS can be configured directly on
the msk rather than on individual subflows. This does not affect its
existing communication, and leverages HMAC-based authentication to
ensure subflow security.

RFC versions of this series have gone through many iterations on MPTCP
mailing list, mainly to address Sashiko's review comments. It is now mostly
stable.

A follow-up series will add MPTCP support to the TLS selftests
(tools/testing/selftests/net/tls.c). All existing TCP test cases have
already been verified to pass over MPTCP as well.

The primary validation use case for this work is NVMe over MPTCP with KTLS.
NVMe over TCP is a storage protocol that transports NVMe commands over TCP.
By combining it with MPTCP, multipath capabilities for storage traffic are 
gained. By adding KTLS, the storage traffic is secured with encryption.
Although NVMe over MPTCP is still under active development, I have already
verified that KTLS operates correctly on top of it.

All feedback is welcome.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/480

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>

Gang Yan (1):
  mptcp: update mptcp_check_readable helper

Geliang Tang (16):
  tls: make tls_ctx_create and update_sk_prot static
  tls: factor out __tls_build_proto for mptcp support
  tls: add protocol dimension to tls operation cache
  mptcp: add sendmsg_locked to proto_ops
  tls: use sendmsg_locked from the underlying socket
  mptcp: implement peek_len for proto_ops
  tls: replace tcp_inq with socket peek_len
  tls: store original read_sock for non-tcp sockets
  tls: introduce tls protocol ops structure
  tls: use protocol ops via tls_context
  mptcp: implement mptcp-specific tls protocol ops
  tls: add mptcp support for sk_poll
  tls: disable device offload for mptcp sockets
  mptcp: implement ulp getsockopt for tls support
  mptcp: implement ulp setsockopt for tls support
  selftests: mptcp: connect: use espintcp for ulp test

 include/net/mptcp.h                           |  11 +
 include/net/tcp.h                             |   1 +
 include/net/tls.h                             |  19 ++
 net/ipv4/tcp.c                                |   9 +-
 net/mptcp/protocol.c                          | 180 +++++++++++++-
 net/mptcp/protocol.h                          |   1 +
 net/mptcp/sockopt.c                           |  68 +++++-
 net/tls/tls.h                                 |   2 -
 net/tls/tls_device.c                          |  10 +-
 net/tls/tls_main.c                            | 227 +++++++++++++++---
 net/tls/tls_strp.c                            |  35 ++-
 net/tls/tls_sw.c                              |  10 +-
 tools/testing/selftests/net/mptcp/config      |   4 +
 .../selftests/net/mptcp/mptcp_connect.c       |   4 +-
 14 files changed, 516 insertions(+), 65 deletions(-)

-- 
2.53.0


^ permalink raw reply

* [RFC net-next 01/17] tls: make tls_ctx_create and update_sk_prot static
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

The TLS TOE (TCP offload engine) support has been removed. As a result,
tls_ctx_create() and update_sk_prot() are no longer used outside of
tls_main.c.

Make them static and remove their prototypes from tls.h. This avoids
exporting unnecessary symbols and cleans up the internal API.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls.h      | 2 --
 net/tls/tls_main.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/tls/tls.h b/net/tls/tls.h
index 60a37bdaaa25..68dfe109808e 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -136,9 +136,7 @@ struct tls_rec {
 int __net_init tls_proc_init(struct net *net);
 void __net_exit tls_proc_fini(struct net *net);
 
-struct tls_context *tls_ctx_create(struct sock *sk);
 void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
-void update_sk_prot(struct sock *sk, struct tls_context *ctx);
 
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 void tls_err_abort(struct sock *sk, int err);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 8c588cdab733..9675c75bc50c 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -126,7 +126,7 @@ static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CON
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 			 const struct proto *base);
 
-void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
@@ -913,7 +913,7 @@ static int tls_disconnect(struct sock *sk, int flags)
 	return -EOPNOTSUPP;
 }
 
-struct tls_context *tls_ctx_create(struct sock *sk)
+static struct tls_context *tls_ctx_create(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tls_context *ctx;
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 02/17] tls: factor out __tls_build_proto for mptcp support
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

tls_build_proto() contains duplicated logic for building IPv4 and IPv6
TLS protocol caches.

Factor out the common code into a new helper __tls_build_proto(), which
takes the saved protocol pointer, mutex, and IP family as parameters.
This prepares for adding MPTCP support by reducing the amount of
duplicated code needed when introducing additional protocol variants.

No functional change intended.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls_main.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9675c75bc50c..be824affd1b1 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -968,35 +968,37 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 #endif
 }
 
-static void tls_build_proto(struct sock *sk)
+static void __tls_build_proto(struct sock *sk,
+			      const struct proto *saved_prot,
+			      struct mutex *prot_mutex,
+			      int family)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 	struct proto *prot = READ_ONCE(sk->sk_prot);
 
-	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
-	if (ip_ver == TLSV6 &&
-	    unlikely(prot != smp_load_acquire(&saved_tcpv6_prot))) {
-		mutex_lock(&tcpv6_prot_mutex);
-		if (likely(prot != saved_tcpv6_prot)) {
-			build_protos(tls_prots[TLSV6], prot);
-			build_proto_ops(tls_proto_ops[TLSV6],
-					sk->sk_socket->ops);
-			smp_store_release(&saved_tcpv6_prot, prot);
+	if (ip_ver == family) {
+		/* smp_load_acquire pairs with smp_store_release below */
+		if (unlikely(prot != smp_load_acquire(&saved_prot))) {
+			mutex_lock(prot_mutex);
+			if (likely(prot != saved_prot)) {
+				build_protos(tls_prots[family], prot);
+				build_proto_ops(tls_proto_ops[family],
+						sk->sk_socket->ops);
+				/* pairs with smp_load_acquire above */
+				smp_store_release(&saved_prot, prot);
+			}
+			mutex_unlock(prot_mutex);
 		}
-		mutex_unlock(&tcpv6_prot_mutex);
 	}
+}
 
-	if (ip_ver == TLSV4 &&
-	    unlikely(prot != smp_load_acquire(&saved_tcpv4_prot))) {
-		mutex_lock(&tcpv4_prot_mutex);
-		if (likely(prot != saved_tcpv4_prot)) {
-			build_protos(tls_prots[TLSV4], prot);
-			build_proto_ops(tls_proto_ops[TLSV4],
-					sk->sk_socket->ops);
-			smp_store_release(&saved_tcpv4_prot, prot);
-		}
-		mutex_unlock(&tcpv4_prot_mutex);
-	}
+static void tls_build_proto(struct sock *sk)
+{
+	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
+	__tls_build_proto(sk, saved_tcpv6_prot, &tcpv6_prot_mutex,
+			  TLSV6);
+	__tls_build_proto(sk, saved_tcpv4_prot, &tcpv4_prot_mutex,
+			  TLSV4);
 }
 
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
-- 
2.53.0


^ permalink raw reply related

* [RFC net-next 03/17] tls: add protocol dimension to tls operation cache
From: Geliang Tang @ 2026-06-22 10:43 UTC (permalink / raw)
  To: Matthieu Baerts, Mat Martineau, Geliang Tang, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, John Fastabend, Sabrina Dubroca,
	Hannes Reinecke
  Cc: Geliang Tang, netdev, mptcp, Gang Yan, Zqiang
In-Reply-To: <cover.1782123118.git.tanggeliang@kylinos.cn>

From: Geliang Tang <tanggeliang@kylinos.cn>

The current TLS operation cache is indexed solely by IP version
(IPv4/IPv6). This was sufficient when only TCP was supported.
Rename TLS_NUM_PROTS to TLS_NUM_FAMILY to accurately reflect that it
represents the number of address families.

With the introduction of MPTCP, both TCP and MPTCP sockets within the
same IP version now share the same cache entries. When an MPTCP socket
enables TLS, it overwrites the cache with MPTCP-specific operations,
causing existing TCP TLS sockets to use the wrong ops, leading to type
confusion and kernel panics.

Fix by extending the cache arrays with a protocol dimension to separate
TCP and MPTCP. Introduce TLSTCP and TLSMPTCP enum values, along with
separate saved protocol pointers and mutexes for MPTCP. update_sk_prot()
and __tls_build_proto() now select the appropriate cache based on
sk->sk_protocol.

Co-developed-by: Gang Yan <yangang@kylinos.cn>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Co-developed-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/tls/tls_main.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index be824affd1b1..94133d62f73e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -53,7 +53,13 @@ MODULE_ALIAS_TCP_ULP("tls");
 enum {
 	TLSV4,
 	TLSV6,
-	TLS_NUM_PROTS,
+	TLS_NUM_FAMILY,
+};
+
+enum {
+	TLSTCP,
+	TLSMPTCP,
+	TLS_NUM_PROTO,
 };
 
 #define CHECK_CIPHER_DESC(cipher,ci)				\
@@ -117,23 +123,30 @@ CHECK_CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm);
 CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128);
 CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256);
 
+static const struct proto *saved_mptcpv6_prot;
+static DEFINE_MUTEX(mptcpv6_prot_mutex);
 static const struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
+static const struct proto *saved_mptcpv4_prot;
+static DEFINE_MUTEX(mptcpv4_prot_mutex);
 static const struct proto *saved_tcpv4_prot;
 static DEFINE_MUTEX(tcpv4_prot_mutex);
-static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
-static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
+static struct proto
+tls_prots[TLS_NUM_FAMILY][TLS_NUM_PROTO][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
+static struct proto_ops
+tls_proto_ops[TLS_NUM_FAMILY][TLS_NUM_PROTO][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 			 const struct proto *base);
 
 static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
+	int proto = sk->sk_protocol == IPPROTO_MPTCP ? TLSMPTCP : TLSTCP;
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
 	WRITE_ONCE(sk->sk_prot,
-		   &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+		   &tls_prots[ip_ver][proto][ctx->tx_conf][ctx->rx_conf]);
 	WRITE_ONCE(sk->sk_socket->ops,
-		   &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+		   &tls_proto_ops[ip_ver][proto][ctx->tx_conf][ctx->rx_conf]);
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -971,18 +984,19 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 static void __tls_build_proto(struct sock *sk,
 			      const struct proto *saved_prot,
 			      struct mutex *prot_mutex,
-			      int family)
+			      int family, int protocol)
 {
+	int proto = sk->sk_protocol == IPPROTO_MPTCP ? TLSMPTCP : TLSTCP;
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 	struct proto *prot = READ_ONCE(sk->sk_prot);
 
-	if (ip_ver == family) {
+	if (ip_ver == family && proto == protocol) {
 		/* smp_load_acquire pairs with smp_store_release below */
 		if (unlikely(prot != smp_load_acquire(&saved_prot))) {
 			mutex_lock(prot_mutex);
 			if (likely(prot != saved_prot)) {
-				build_protos(tls_prots[family], prot);
-				build_proto_ops(tls_proto_ops[family],
+				build_protos(tls_prots[family][protocol], prot);
+				build_proto_ops(tls_proto_ops[family][protocol],
 						sk->sk_socket->ops);
 				/* pairs with smp_load_acquire above */
 				smp_store_release(&saved_prot, prot);
@@ -995,10 +1009,14 @@ static void __tls_build_proto(struct sock *sk,
 static void tls_build_proto(struct sock *sk)
 {
 	/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
+	__tls_build_proto(sk, saved_mptcpv6_prot, &mptcpv6_prot_mutex,
+			  TLSV6, TLSMPTCP);
 	__tls_build_proto(sk, saved_tcpv6_prot, &tcpv6_prot_mutex,
-			  TLSV6);
+			  TLSV6, TLSTCP);
+	__tls_build_proto(sk, saved_mptcpv4_prot, &mptcpv4_prot_mutex,
+			  TLSV4, TLSMPTCP);
 	__tls_build_proto(sk, saved_tcpv4_prot, &tcpv4_prot_mutex,
-			  TLSV4);
+			  TLSV4, TLSTCP);
 }
 
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
-- 
2.53.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox