* [PATCH v10 2/2] net: mhi: Add IP_SW1 interface for M-plane support over MHI
From: Vivek Pernamitta @ 2026-04-09 6:08 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, linux-kernel, Vivek Pernamitta
In-Reply-To: <20260409-vdev_b1_eth_b1_next-20260408-v10-0-6d44ca48f189@oss.qualcomm.com>
Introduce the IP_SW1 channel to represent the network interface
exposed by the MHI device for M-plane communication. The driver
creates a corresponding netdev, allowing the host to access the
device network interface over MHI.
This interface enables support for O-RAN M-plane management
between O-DU and O-RU, including capability exchange, configuration
management, performance monitoring, and fault management using
NETCONF/YANG, as defined by O-RAN WG4 M-plane specifications.
Signed-off-by: Vivek Pernamitta <vivek.pernamitta@oss.qualcomm.com>
---
drivers/net/mhi_net.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
index 47b8617de027980a69c57261a9b4bcefc828dc96..e285aa289cb945cd1afd2f3d581214f2f2f15145 100644
--- a/drivers/net/mhi_net.c
+++ b/drivers/net/mhi_net.c
@@ -430,6 +430,7 @@ static const struct mhi_device_id mhi_net_id_table[] = {
{ .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
/* Software data PATH (to modem CPU) */
{ .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
+ { .chan = "IP_SW1", .driver_data = (kernel_ulong_t)&mhi_swip0 },
{ .chan = "IP_ETH0", .driver_data = (kernel_ulong_t)&mhi_eth0 },
{ .chan = "IP_ETH1", .driver_data = (kernel_ulong_t)&mhi_eth0 },
{}
--
2.34.1
^ permalink raw reply related
* [PATCH v10 1/2] net: mhi: Enable Ethernet interface support
From: Vivek Pernamitta @ 2026-04-09 6:08 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, linux-kernel, Vivek Pernamitta
In-Reply-To: <20260409-vdev_b1_eth_b1_next-20260408-v10-0-6d44ca48f189@oss.qualcomm.com>
Add support to configure a new client as Ethernet type over MHI by
setting "mhi_device_info.ethernet_if = true". Create a new Ethernet
interface named eth%d. This complements existing NET driver support.
Allocate MHI netdevs using NET_NAME_ENUM to reflect kernel-enumerated
naming. This updates the reported name_assign_type for MHI net
interfaces created by this driver, aligning naming semantics across
existing and new devices. No functional or interface naming changes
are introduced
Introduce ETH0 and ETH1 network interfaces required for NETCONF
and S-plane components.
IP_ETH channels represent the Ethernet interface exposed by the
MHI device. The driver creates a corresponding netdev instance,
allowing the host to access and operate the device’s Ethernet
interface.
NETCONF:
Use NETCONF protocol for configuration operations such as fetching,
modifying, and deleting network device configurations.
S-plane:
Support frequency and time synchronization between O-DUs and O-RUs
using Synchronous Ethernet and IEEE 1588. Assume PTP transport over
L2 Ethernet (ITU-T G.8275.1) for full timing support; allow PTP over
UDP/IP (ITU-T G.8275.2) with reduced reliability. as per ORAN spec
O-RAN.WG4.CUS.0-R003-v12.00.
Signed-off-by: Vivek Pernamitta <vivek.pernamitta@oss.qualcomm.com>
---
drivers/net/mhi_net.c | 65 +++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 53 insertions(+), 12 deletions(-)
diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
index ae169929a9d8e449b5a427993abf68e8d032fae2..47b8617de027980a69c57261a9b4bcefc828dc96 100644
--- a/drivers/net/mhi_net.c
+++ b/drivers/net/mhi_net.c
@@ -4,6 +4,7 @@
* Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
*/
+#include <linux/etherdevice.h>
#include <linux/if_arp.h>
#include <linux/mhi.h>
#include <linux/mod_devicetable.h>
@@ -42,6 +43,7 @@ struct mhi_net_dev {
struct mhi_device_info {
const char *netname;
+ bool ethernet_if;
};
static int mhi_ndo_open(struct net_device *ndev)
@@ -119,11 +121,29 @@ static void mhi_ndo_get_stats64(struct net_device *ndev,
} while (u64_stats_fetch_retry(&mhi_netdev->stats.tx_syncp, start));
}
+static int mhi_mac_address(struct net_device *dev, void *p)
+{
+ if (dev->type == ARPHRD_ETHER)
+ return eth_mac_addr(dev, p);
+
+ return -EOPNOTSUPP;
+}
+
+static int mhi_validate_address(struct net_device *dev)
+{
+ if (dev->type == ARPHRD_ETHER)
+ return eth_validate_addr(dev);
+
+ return 0;
+}
+
static const struct net_device_ops mhi_netdev_ops = {
.ndo_open = mhi_ndo_open,
.ndo_stop = mhi_ndo_stop,
.ndo_start_xmit = mhi_ndo_xmit,
.ndo_get_stats64 = mhi_ndo_get_stats64,
+ .ndo_set_mac_address = mhi_mac_address,
+ .ndo_validate_addr = mhi_validate_address,
};
static void mhi_net_setup(struct net_device *ndev)
@@ -140,6 +160,13 @@ static void mhi_net_setup(struct net_device *ndev)
ndev->tx_queue_len = 1000;
}
+static void mhi_ethernet_setup(struct net_device *ndev)
+{
+ ndev->netdev_ops = &mhi_netdev_ops;
+ ether_setup(ndev);
+ ndev->max_mtu = ETH_MAX_MTU;
+}
+
static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
struct sk_buff *skb)
{
@@ -208,17 +235,20 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
skb = mhi_net_skb_agg(mhi_netdev, skb);
mhi_netdev->skbagg_head = NULL;
}
-
- switch (skb->data[0] & 0xf0) {
- case 0x40:
- skb->protocol = htons(ETH_P_IP);
- break;
- case 0x60:
- skb->protocol = htons(ETH_P_IPV6);
- break;
- default:
- skb->protocol = htons(ETH_P_MAP);
- break;
+ if (mhi_netdev->ndev->type == ARPHRD_ETHER) {
+ skb->protocol = eth_type_trans(skb, mhi_netdev->ndev);
+ } else {
+ switch (skb->data[0] & 0xf0) {
+ case 0x40:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case 0x60:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ skb->protocol = htons(ETH_P_MAP);
+ break;
+ }
}
u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
@@ -306,6 +336,9 @@ static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
struct mhi_net_dev *mhi_netdev;
int err;
+ if (ndev->header_ops)
+ eth_hw_addr_random(ndev);
+
mhi_netdev = netdev_priv(ndev);
dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
@@ -356,7 +389,8 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
int err;
ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
- NET_NAME_PREDICTABLE, mhi_net_setup);
+ NET_NAME_ENUM, info->ethernet_if ?
+ mhi_ethernet_setup : mhi_net_setup);
if (!ndev)
return -ENOMEM;
@@ -386,11 +420,18 @@ static const struct mhi_device_info mhi_swip0 = {
.netname = "mhi_swip%d",
};
+static const struct mhi_device_info mhi_eth0 = {
+ .netname = "eth%d",
+ .ethernet_if = true,
+};
+
static const struct mhi_device_id mhi_net_id_table[] = {
/* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
{ .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
/* Software data PATH (to modem CPU) */
{ .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
+ { .chan = "IP_ETH0", .driver_data = (kernel_ulong_t)&mhi_eth0 },
+ { .chan = "IP_ETH1", .driver_data = (kernel_ulong_t)&mhi_eth0 },
{}
};
MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
--
2.34.1
^ permalink raw reply related
* [PATCH v10 0/2] net: mhi: Add support to enable ethernet interface
From: Vivek Pernamitta @ 2026-04-09 6:08 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, linux-kernel, Vivek Pernamitta
Add support to configure a new client as Ethernet type over MHI
by setting "mhi_device_info.ethernet_if = true". Create a new
Ethernet interface named eth%d. This complements existing NET
driver support.
Enable IP_SW1 (ch:48/49), IP_ETH0 (ch:50,51) and IP_ETH1 (ch:52,
53) channels over MHI for M-plane, NETCONF and S-plane interface
for QDU100.
IP_ETH channels as representing the Ethernet interface of the MHI
device, and create a corresponding netdev instance to expose this
interface to the host networking stack.
M-plane:
Implement DU M-Plane software for non-real-time O-RAN
management between O-DU and O-RU using NETCONF/YANG and
O-RAN WG4 M-Plane YANG models. Provide capability exchange,
configuration management, performance monitoring, and fault
management per O-RAN.WG4.TS.MP.0-R004-v18.00.
YANG model based interface aligned with O-RAN WG4 M-Plane
specifications over TCP between the OAM application on the
host and the DU M-Plane software running on the X100 platform.
Netconf:
Use NETCONF protocol for configuration operations such as
fetching, modifying, and deleting network device
configurations.
This interface is used for IETF Netconf communication,
enabling a Netconf server on the ORU to interact with a
Netconf client running on the host.
S-plane:
Support frequency and time synchronization between O-DUs and
O-RUs using Synchronous Ethernet and IEEE 1588. Assume PTP
transport over L2 Ethernet (ITU-T G.8275.1) for full timing
support; allow PTP over UDP/IP (ITU-T G.8275.2) with reduced
reliability, as per ORAN spec O-RAN.WG4.CUS.0-R003-v12.00.
To support accurate phase and time synchronization between
the host (L2) and device (L1-High), the system must exchange
PTP messages as raw Layer-2 Ethernet frames, because the
ITU-T G.8275.1 profile operates strictly over Ethernet
multicast and not over IP networks. This means the device’s
PTP stack can only send and receive PTP Announce, Sync,
Follow-Up, and Delay messages in native Ethernet format, not
as IPv4/IPv6 packets. However, the host and device communicate
only through MHI/PCIe, which provides no native Ethernet
interface. Therefore, the system must implement a virtual
Ethernet interface over MHI on both sides. This virtual
Layer-2 link enables true Ethernet-frame transport, ensuring
the device’s PTP implementation remains fully compliant with
the G.8275.1 Ethernet-based timing model.
The actual link between the device (QDU100) and the host is
PCIe/MHI. The device has the Ethernet interface and is exposed
as the MHI channel to the host. So this patch creates the
Ethernet interface on the host based on the 'IP_ETH' channel
so that the host can use this interface for exchanging the
NETCONF packets.
The patch primarily addresses host-to-DU(QDU100) communication.
However, the NETCONF/M-Plane packets originating from the host
will eventually be transmitted from the DU to the RU over the
fronthaul, which uses Ethernet. For additional details on this
architecture and data flow, refer to the O-RAN Management
Plane Specification:
O-RAN.WG4.MP.0-v07.00
O-RAN Alliance Working Group 4 – Management Plane
Specification
Chapter 4: O-RU to O-DU Interface Management
MHI_CHANNEL_CONFIG defines channel attributes for the host
controller to set up channel rings. These entries are part of
the MHI controller’s configuration so that client drivers, such
as the MHI network driver, can attach to them. Each interface is
mapped to an MHI channel (for example, eth0 → IP_ETH0 channels
50/51), which is why this configuration resides in the bus code.
Allocate MHI netdev's using NET_NAME_ENUM to reflect kernel-enumerated
naming. This updates the reported name_assign_type for MHI net
interfaces created by this driver, aligning naming semantics across
existing and new devices. No functional or interface naming changes
are introduced.
Signed-off-by: Vivek Pernamitta <vivek.pernamitta@oss.qualcomm.com>
---
patchset link for V9 : https://lore.kernel.org/all/20260324-vdev_eth_next-20260323-v9-0-da6e4e486437@oss.qualcomm.com/
patchset link for V8 : https://lore.kernel.org/all/872d3520-f749-4d4d-a8c3-25dba30e947c@oss.qualcomm.com/#r
patchset link for V7 : https://lore.kernel.org/all/20260205-eth_vdev_next-20260204_eth-v7-0-f85645210f81@qti.qualcomm.com/
patchset link for V6 : https://lore.kernel.org/all/20251209-vdev_next-20251208_eth_v6-v6-0-80898204f5d8@quicinc.com/
patchset link for V1 (first post) : https://lore.kernel.org/all/20250724-b4-eth_us-v1-0-4dff04a9a128@quicinc.com/
changes to v10:
- Updated patchset as per comments provided by Loic Poulain
Return -EOPNOTSUPP for non-ethernet device type which changing MAC address.
Removed double negation in mhi_net_link for if check.
changes to v9:
- Updated patchset as per comments provided by Mani to check
for ARPHRD_ETHER type while assigining ethernet protocol and also
removed explictly setting ethernet_if to false for IP_SW/IP_HW
interfaces.
- Moved adding IP_SW1 channel to different patch in same series.
- Added more description in comments.
- As MHI(pci_generic) got merged, dropping the patch from series.
changes to v8:
- Removed skb_copy_to_linear_data call as it is not needed, updated
as per Loic Poulain and Paolo Abeni.
- Removed ethernet_if member in struct mhi_net_dev instead used
!!ndev->header_ops check for ethernet protocol as per paolo Abeni.
- Updated more infromation in commit text for change from
NET_NAME_PREDICTABLE to NET_NAME_ENUM.
changes to v7:
- Updated to NET_NAME_ENUM while allocating netdev as per Andrew
- Updated more information as per comments from Jakub and Mani
changes to v6:
- Removed interm variable useage as per comments from Simon and Dmirty.
- Squashed gerrits 1 and 2 in single gerrit.
- Added more description for M-plane, Netconf and S-plane.
changes to v5:
- change in email ID from "quic_vpernami@quicinc.com" to "vivek.pernamitta@oss.qualcomm.com"
- Renamed to patch v5 as per comments from Manivannan
- Restored to original name as per comments from Jakub
- Renamed the ethernet interfce to eth%d as per Jakub
---
---
Vivek Pernamitta (2):
net: mhi: Enable Ethernet interface support
net: mhi: Add IP_SW1 interface for M-plane support over MHI
drivers/net/mhi_net.c | 66 +++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 54 insertions(+), 12 deletions(-)
---
base-commit: db7efce4ae23ad5e42f5f55428f529ff62b86fab
change-id: 20260409-vdev_b1_eth_b1_next-20260408-d1fb6698cae3
Best regards,
--
Vivek Pernamitta <vivek.pernamitta@oss.qualcomm.com>
^ permalink raw reply
* [PATCH nf] netfilter: nft_fwd_netdev: use recursion counter in neigh egress path
From: Weiming Shi @ 2026-04-09 5:36 UTC (permalink / raw)
To: Pablo Neira Ayuso, Florian Westphal, David S . Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Phil Sutter, Simon Horman, netfilter-devel, coreteam, netdev,
Xiang Mei, Weiming Shi
nft_fwd_neigh_eval() can be attached to NF_NETDEV_EGRESS chains since
commit f87b9464d152 ("netfilter: nft_fwd_netdev: Support egress hook").
When a forwarding rule targets the same device (or two devices forward
to each other), the evaluator calls neigh_xmit() which reaches
dev_queue_xmit(), re-entering nf_hook_egress() before the previous
invocation returns. This recurses until the kernel stack is exhausted.
The nf_dup_skb_recursion counter in nf_do_netdev_egress() was added by
commit fcd53c51d037 ("netfilter: nf_dup_netdev: add and use recursion
counter") to prevent exactly this class of bug, but nft_fwd_neigh_eval()
bypasses that helper entirely by calling neigh_xmit() directly.
BUG: KASAN: slab-out-of-bounds in nft_do_chain (net/netfilter/nf_tables_core.c:287)
Call Trace:
nft_do_chain (net/netfilter/nf_tables_core.c:287)
nft_do_chain_netdev (net/netfilter/nft_chain_filter.c:289)
nf_hook_slow (include/linux/netfilter.h:158)
__dev_queue_xmit (net/core/dev.c:4807)
neigh_resolve_output (include/linux/seqlock.h:392)
neigh_xmit (net/core/neighbour.c:3230)
nft_fwd_neigh_eval (net/netfilter/nft_fwd_netdev.c:150)
nft_do_chain (net/netfilter/nf_tables_core.c:287)
nft_do_chain_netdev (net/netfilter/nft_chain_filter.c:289)
nf_hook_slow (include/linux/netfilter.h:158)
__dev_queue_xmit (net/core/dev.c:4807)
[repeats until stack exhaustion]
Kernel panic - not syncing: Fatal exception in interrupt
Export the recursion counter helpers from nf_dup_netdev and use them
in nft_fwd_neigh_eval() to bound the recursion depth, matching the
protection already present in nf_do_netdev_egress().
Fixes: f87b9464d152 ("netfilter: nft_fwd_netdev: Support egress hook")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
include/net/netfilter/nf_dup_netdev.h | 4 ++++
net/netfilter/nf_dup_netdev.c | 18 ++++++++++++++++++
net/netfilter/nft_fwd_netdev.c | 7 +++++++
3 files changed, 29 insertions(+)
diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h
index b175d271aec9..17362f76d1d1 100644
--- a/include/net/netfilter/nf_dup_netdev.h
+++ b/include/net/netfilter/nf_dup_netdev.h
@@ -7,6 +7,10 @@
void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif);
void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif);
+bool nf_dup_netdev_has_recursed(void);
+void nf_dup_netdev_recursion_inc(void);
+void nf_dup_netdev_recursion_dec(void);
+
struct nft_offload_ctx;
struct nft_flow_rule;
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index fab8b9011098..e2fe8bb6fe0d 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -29,6 +29,24 @@ static u8 *nf_get_nf_dup_skb_recursion(void)
#endif
+bool nf_dup_netdev_has_recursed(void)
+{
+ return *nf_get_nf_dup_skb_recursion() > NF_RECURSION_LIMIT;
+}
+EXPORT_SYMBOL_GPL(nf_dup_netdev_has_recursed);
+
+void nf_dup_netdev_recursion_inc(void)
+{
+ (*nf_get_nf_dup_skb_recursion())++;
+}
+EXPORT_SYMBOL_GPL(nf_dup_netdev_recursion_inc);
+
+void nf_dup_netdev_recursion_dec(void)
+{
+ (*nf_get_nf_dup_skb_recursion())--;
+}
+EXPORT_SYMBOL_GPL(nf_dup_netdev_recursion_dec);
+
static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
enum nf_dev_hooks hook)
{
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 152a9fb4d23a..d85f72af3589 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -141,13 +141,20 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
goto out;
}
+ if (nf_dup_netdev_has_recursed()) {
+ verdict = NF_DROP;
+ goto out;
+ }
+
dev = dev_get_by_index_rcu(nft_net(pkt), oif);
if (dev == NULL)
return;
skb->dev = dev;
skb_clear_tstamp(skb);
+ nf_dup_netdev_recursion_inc();
neigh_xmit(neigh_table, dev, addr, skb);
+ nf_dup_netdev_recursion_dec();
out:
regs->verdict.code = verdict;
}
--
2.43.0
^ permalink raw reply related
* Re: [PATCH 3/4] drm/drm_ras: Add DRM RAS netlink error event notification
From: Raag Jadav @ 2026-04-09 5:35 UTC (permalink / raw)
To: Tauro, Riana
Cc: aravind.iddamsetty, rodrigo.vivi, intel-xe, dri-devel, netdev,
anshuman.gupta, joonas.lahtinen, simona.vetter, airlied,
pratik.bari, joshua.santosh.ranjan, ashwin.kumar.kulkarni,
shubham.kumar, ravi.kishore.koppuravuri, anvesh.bakwad,
maarten.lankhorst, Zack McKevitt, Lijo Lazar, Hawking Zhang,
David S. Miller, Paolo Abeni, Eric Dumazet, Jakub Kicinski
In-Reply-To: <09f253b3-ee64-4fbb-8cec-820e9769aa80@intel.com>
On Wed, Apr 08, 2026 at 07:59:33PM +0530, Tauro, Riana wrote:
> On 3/25/2026 7:01 PM, Raag Jadav wrote:
> > On Wed, Mar 11, 2026 at 03:59:17PM +0530, Riana Tauro wrote:
...
> > > +Example: Listen to error events
> > > +
> > > +.. code-block:: bash
> > > +
> > > + sudo ynl --family drm_ras --subscribe error-notify
> > > + {'msg': {'error-id': 1, 'node-id': 1}, 'name': 'error-event'}
> > Can we also have error-name and node-name? I'd be pulling my hair off
> > if I need to remember all the ids.
>
> Yeah makes sense. We can add the node_name, error_name.
> Adding device_name would also be more useful in the event.
>
> @Rodrigo/@aravind thoughts?
>
> >
> > On that note, I think it'll be good to have them as part of request
> > attributes as an alternative to ids (also for existing commands) but
> > that can done as a follow up.
> >
> We cannot use names as alternative because it won't work for multiple cards.
> example in xe: Suppose there are 2 cards and each has 2 nodes. We cannot
> query using node_name+error_name.
> Also most of the netlink implementations use id's as unique identifiers.
>
> $ sudo ./cli.py --family drm_ras --dump list-nodes
> [{'device-name': 'bdf_1', 'node-id': 0, 'node-name': 'correctable-errors',
> 'node-type': 'error-counter'},
> {'device-name': 'bdf_1, 'node-id': 1, 'node-name': 'uncorrectable-errors',
> 'node-type': 'error-counter'},
> {'device-name': 'bdf_2', 'node-id': 2, 'node-name': 'correctable-errors',
> 'node-type': 'error-counter'},
> {'device-name': 'bdf_2', 'node-id': 3, 'node-name': 'uncorrectable-errors',
> 'node-type': 'error-counter'}]
This means they don't persist the user needs to figures out all the ids before
anything can happen. In device node world we have /dev/dri/by-path/<bdf> which
makes it much easier.
Also, I'm not much informed about the history and it's still unclear to me what
problem did netlink solve here that cannot be solved by anything else? But we're
too late for that discussion, and again, not my call.
> > Also, what if I have multiple devices with multiple nodes. Do they need
> > separate subscription?
> >
> No, we subscribe only to the group not the nodes. In this case the group is
> 'error-notify'
>
> $ sudo ./cli.py --family drm_ras --subscribe error-notify
> {'msg': {'error-id': 1, 'node-id': 1}, 'name': 'error-event'}
> {'msg': {'error-id': 1, 'node-id': 3}, 'name': 'error-event'}
Hm, perhaps I need to spend some time wrapping my head around the new concept.
Let's catch up sometime this week.
Raag
^ permalink raw reply
* [PATCH] octeon_ep: Remove unnecessary semicolons in octep_oq_drop_rx()
From: Nobuhiro Iwamatsu @ 2026-04-09 5:08 UTC (permalink / raw)
To: vburru, sedara, andrew+netdev, davem, edumazet, kuba, pabeni
Cc: netdev, Nobuhiro Iwamatsu
Remove unnecessary semicolons in octep_oq_drop_rx().
Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.x90@mail.toshiba>
---
drivers/net/ethernet/marvell/octeon_ep/octep_rx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
index 74de19166488f..e6ebc7e44a00c 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
@@ -392,7 +392,7 @@ static void octep_oq_drop_rx(struct octep_oq *oq,
while (data_len > 0) {
octep_oq_next_pkt(oq, buff_info, read_idx, desc_used);
data_len -= oq->buffer_size;
- };
+ }
}
/**
--
2.53.0
^ permalink raw reply related
* Re: [PATCH net-next 0/2] net: ethtool add VxLAN to the NFC API
From: Jijie Shao @ 2026-04-09 4:03 UTC (permalink / raw)
To: Jakub Kicinski
Cc: shaojijie, davem, netdev, linux-kernel, shenjian15,
liuyonglong@huawei.com, chenhao (EZ), yangshuaisong
In-Reply-To: <20220822095327.00b4ebd5@kernel.org>
on 2022/8/23 0:53, Jakub Kicinski wrote:
> On Mon, 22 Aug 2022 22:46:14 +0800 huangguangbin (A) wrote:
>
> I understand your motivation and these are very valid points.
> However, we have to draw the line somewhere. We have at least
> three ways of configuring flow offloads (ethtool, nft, tc).
> Supporting all of them is a lot of work for the drivers, leading
> to a situation where there's no "standard Linux API" because each
> vendor picks a slightly different approach :(
> TC seems the most popular because of the OVS offload, so my preference
> is to pick it over the other APIs.
Hi, Jakub,
I have recently taken over this job and used tc flow to configure flow steering to a specific queue.
Currently, the community already supports FLOW_ACTION_RX_QUEUE_MAPPING, so the previously discussed issue no longer exists.
However, we have encountered new problems:
1. First, the driver does not support FLOW_ACTION_REDIRECT, and the NIC is not a switching device.
Therefore, I personally think that the driver does not need to implement switchdev.
2. However, the VF itself supports flow rules, and its capabilities are the same as those of the PF.
The VF supports drop and select_queue, and the VF rules are configured through the PF.
3. However, it seems that tc flow does not have a proper parameter to specify the vf_id to tell the PF
which VF to configure the rules for.
Therefore, we have encountered a problem in supporting tc flow for the VF.
A feasible idea is to use chain_index to indicate the vf_id.
However, this changes the original meaning of chain_index, which may cause confusion.
Or perhaps I missed some feature about vf in TC Flow?
I'd appreciate your insights on how to proceed.
Are there existing mechanisms or planned extensions to address VF-specific rule configuration?
Thanks,
Jijie Shao
^ permalink raw reply
* [PATCH net] net: fix __this_cpu_add() in preemptible code in dev_xmit_recursion_inc/dec
From: Jiayuan Chen @ 2026-04-09 3:53 UTC (permalink / raw)
To: netdev
Cc: Jiayuan Chen, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman, Andrew Lunn, Sebastian Andrzej Siewior,
Clark Williams, Steven Rostedt, Weiming Shi, linux-kernel,
linux-rt-devel
dev_xmit_recursion_inc/dec() use __this_cpu_inc/dec() which requires
migration to be disabled. However, some callers like SCTP's UDP
encapsulation path invoke iptunnel_xmit() from process context without
disabling BH or preemption:
sctp_inet_connect -> __sctp_connect -> sctp_do_sm ->
sctp_outq_flush -> sctp_packet_transmit -> sctp_v4_xmit ->
udp_tunnel_xmit_skb -> iptunnel_xmit -> dev_xmit_recursion_inc
This triggers the following warning on PREEMPT(full) kernels:
BUG: using __this_cpu_add() in preemptible [00000000]
caller is dev_xmit_recursion_inc include/linux/netdevice.h:3595 [inline]
caller is iptunnel_xmit+0x1cd/0xb80 net/ipv4/ip_tunnel_core.c:72
Tainted: [L]=SOFTLOCKUP
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120
check_preemption_disabled+0xd8/0xe0 lib/smp_processor_id.c:47
dev_xmit_recursion_inc include/linux/netdevice.h:3595 [inline]
iptunnel_xmit+0x1cd/0xb80 net/ipv4/ip_tunnel_core.c:72
sctp_v4_xmit+0x75f/0x1060 net/sctp/protocol.c:1073
sctp_packet_transmit+0x22ec/0x3060 net/sctp/output.c:653
sctp_packet_singleton+0x19e/0x370 net/sctp/outqueue.c:783
sctp_outq_flush_ctrl net/sctp/outqueue.c:914 [inline]
sctp_outq_flush+0x315/0x3350 net/sctp/outqueue.c:1212
sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1824 [inline]
sctp_side_effects net/sctp/sm_sideeffect.c:1204 [inline]
sctp_do_sm+0xce1/0x5be0 net/sctp/sm_sideeffect.c:1175
sctp_primitive_ASSOCIATE+0x9c/0xd0 net/sctp/primitive.c:73
__sctp_connect+0x9fc/0xc70 net/sctp/socket.c:1235
sctp_connect net/sctp/socket.c:4818 [inline]
sctp_inet_connect+0x15f/0x220 net/sctp/socket.c:4833
__sys_connect_file+0x141/0x1a0 net/socket.c:2089
__sys_connect+0x141/0x170 net/socket.c:2108
__do_sys_connect net/socket.c:2114 [inline]
__se_sys_connect net/socket.c:2111 [inline]
__x64_sys_connect+0x72/0xb0 net/socket.c:2111
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x106/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Fix this by adding migrate_disable/enable() around the __this_cpu
operations in dev_xmit_recursion_inc/dec() to ensure the per-cpu
variable is accessed on the same CPU throughout the inc/dec pair.
Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions")
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
include/linux/netdevice.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ca01eb3f7d2..6b1cd5380d70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3591,14 +3591,19 @@ static inline bool dev_xmit_recursion(void)
XMIT_RECURSION_LIMIT);
}
+/* Non PREEMPT_RT version: inc and dec must run on the same CPU,
+ * migrate_disable is sufficient.
+ */
static inline void dev_xmit_recursion_inc(void)
{
+ migrate_disable();
__this_cpu_inc(softnet_data.xmit.recursion);
}
static inline void dev_xmit_recursion_dec(void)
{
__this_cpu_dec(softnet_data.xmit.recursion);
+ migrate_enable();
}
#else
static inline int dev_recursion_level(void)
--
2.43.0
^ permalink raw reply related
* RE: [Intel-wired-lan] [PATCH v2 2/4] ice: use bitmap_weighted_xor() in ice_find_free_recp_res_idx()
From: Rinitha, SX @ 2026-04-09 3:52 UTC (permalink / raw)
To: Yury Norov, Nguyen, Anthony L, David S. Miller,
Thomas Hellström, Andrew Lunn, Andrew Morton, David Airlie,
Eric Dumazet, Jakub Kicinski, Brost, Matthew, Paolo Abeni,
Kitszel, Przemyslaw, Vivi, Rodrigo, Simona Vetter, Yury Norov,
Rasmus Villemoes, dri-devel@lists.freedesktop.org,
intel-xe@lists.freedesktop.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org
Cc: Simon Horman, David Laight
In-Reply-To: <20260302011159.61778-3-ynorov@nvidia.com>
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Yury Norov via Intel-wired-lan
> Sent: 02 March 2026 06:42
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; David S. Miller <davem@davemloft.net>; Thomas Hellström <thomas.hellstrom@linux.intel.com>; Andrew Lunn <andrew+netdev@lunn.ch>; Andrew Morton <akpm@linux-foundation.org>; David Airlie <airlied@gmail.com>; Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Brost, Matthew <matthew.brost@intel.com>; Paolo Abeni <pabeni@redhat.com>; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>; Vivi, Rodrigo <rodrigo.vivi@intel.com>; Simona Vetter <simona@ffwll.ch>; Yury Norov <yury.norov@gmail.com>; Rasmus Villemoes <linux@rasmusvillemoes.dk>; dri-devel@lists.freedesktop.org; intel-xe@lists.freedesktop.org; linux-kernel@vger.kernel.org; netdev@vger.kernel.org; intel-wired-lan@lists.osuosl.org
> Cc: Yury Norov <ynorov@nvidia.com>; Simon Horman <horms@kernel.org>; David Laight <david.laight.linux@gmail.com>
> Subject: [Intel-wired-lan] [PATCH v2 2/4] ice: use bitmap_weighted_xor() in ice_find_free_recp_res_idx()
>
> Use the right helper and save one bitmaps traverse.
>
> Signed-off-by: Yury Norov <ynorov@nvidia.com>
> ---
> drivers/net/ethernet/intel/ice/ice_switch.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
^ permalink raw reply
* RE: [Intel-wired-lan] [PATCH v1 iwl-net] ice: fix potential NULL pointer deref in error path of ice_set_ringparam()
From: Rinitha, SX @ 2026-04-09 3:45 UTC (permalink / raw)
To: Kohei Enju, intel-wired-lan@lists.osuosl.org,
netdev@vger.kernel.org
Cc: Nguyen, Anthony L, Kitszel, Przemyslaw, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Loktionov, Aleksandr, Alice Michael, Greenwalt, Paul,
Fijalkowski, Maciej, kohei.enju@gmail.com
In-Reply-To: <20260220184031.60113-1-kohei@enjuk.jp>
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Kohei Enju
> Sent: 21 February 2026 00:10
> To: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org
> Cc: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>; Andrew Lunn <andrew+netdev@lunn.ch>; David S. Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; Loktionov, Aleksandr <aleksandr.loktionov@intel.com>; Alice Michael <alice.michael@intel.com>; Greenwalt, Paul <paul.greenwalt@intel.com>; Fijalkowski, Maciej <maciej.fijalkowski@intel.com>; kohei.enju@gmail.com; Kohei Enju <kohei@enjuk.jp>
> Subject: [Intel-wired-lan] [PATCH v1 iwl-net] ice: fix potential NULL pointer deref in error path of ice_set_ringparam()
>
> ice_set_ringparam nullifies tstamp_ring of temporary tx_rings, without clearing ICE_TX_RING_FLAGS_TXTIME bit.
> When ICE_TX_RING_FLAGS_TXTIME is set and the subsequent
> ice_setup_tx_ring() call fails, a NULL pointer dereference could happen in the unwinding sequence:
>
> ice_clean_tx_ring()
> -> ice_is_txtime_cfg() == true (ICE_TX_RING_FLAGS_TXTIME is set)
> -> ice_free_tx_tstamp_ring()
> -> ice_free_tstamp_ring()
> -> tstamp_ring->desc (NULL deref)
>
> Clear ICE_TX_RING_FLAGS_TXTIME bit to avoid the potential issue.
>
> Note that this potential issue is found by manual code review.
> Compile test only since unfortunately I don't have E830 devices.
>
> Fixes: ccde82e90946 ("ice: add E830 Earliest TxTime First Offload support")
> Signed-off-by: Kohei Enju <kohei@enjuk.jp>
> ---
> drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 +
> 1 file changed, 1 insertion(+)
>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
^ permalink raw reply
* Re: [RFC PATCH 0/1] netlink: Netlink process event for cgroup migration
From: Prakash Sangappa @ 2026-04-09 3:44 UTC (permalink / raw)
To: Michal Koutný
Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
cgroups@vger.kernel.org, davem@davemloft.net, kuba@kernel.org,
edumazet@google.com, tj@kernel.org, hannes@cmpxchg.org,
Tom Hromatka, Kamalesh Babulal, Christian Brauner
In-Reply-To: <pd3vkzvgr233tkuocyvpgxc4kttsi5nlggcxuskvwi3mocoqkm@cfefi6hh74s6>
Hi Michal,
Thanks for look into this patch proposal.
> On Apr 8, 2026, at 5:54 AM, Michal Koutný <mkoutny@suse.com> wrote:
>
> Hi Prakash.
>
> On Tue, Apr 07, 2026 at 05:23:38PM +0000, Prakash Sangappa <prakash.sangappa@oracle.com> wrote:
>> With cgroup based resource management, it becomes useful for
>> userspace to be notified when a task changes cgroup membership.
>> Unexpected migrations can lead to incorrect resource accounting
>> and enforcement resulting in undesirable behavior or failures.
>> Applications/userspace have to poll /proc to detect changes to
>> cgroup membership, which is inefficient when dealing with a large
>> number of tasks.
>
> You may want to check [1] (and followup discussion).
Will take a look.
>
>> Add a new netlink proc connector event that gets generated when
>> a task migrates between cgroups. This allows applications/tools
>> to monitor cgroup membership changes without periodic polling.
>
> This CN_IDX_PROC netlink API haunts me at night.
> The hook(s) proposed above are IMO more future proof and robust approach
> to the process migration that comes as a surprise (and possibly
> interferes with intended resource management).
Ok, with [1] would there be bpf hooks that can be used for notification
of cgroup migration events? Will take a look.
Thanks,
-Prakash
>
> Thanks,
> Michal
>
> [1] https://lore.kernel.org/all/20260220-work-bpf-namespace-v1-2-866207db7b83@kernel.org/
^ permalink raw reply
* RE: [Intel-wired-lan] [PATCH v1 iwl-net] ice: ptp: don't WARN when controlling PF is unavailable
From: Rinitha, SX @ 2026-04-09 3:43 UTC (permalink / raw)
To: Kohei Enju, intel-wired-lan@lists.osuosl.org,
netdev@vger.kernel.org
Cc: Nguyen, Anthony L, Kitszel, Przemyslaw, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Richard Cochran, Temerkhanov, Sergey, Simon Horman,
kohei.enju@gmail.com
In-Reply-To: <20260201141430.131063-1-kohei@enjuk.jp>
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Kohei Enju
> Sent: 01 February 2026 19:44
> To: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org
> Cc: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>; Andrew Lunn <andrew+netdev@lunn.ch>; David S. Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; Richard Cochran <richardcochran@gmail.com>; Temerkhanov, Sergey <sergey.temerkhanov@intel.com>; Simon Horman <horms@kernel.org>; kohei.enju@gmail.com; Kohei Enju <kohei@enjuk.jp>
> Subject: [Intel-wired-lan] [PATCH v1 iwl-net] ice: ptp: don't WARN when controlling PF is unavailable
>
> In VFIO passthrough setups, it is possible to pass through only a PF which doesn't own the source timer. In that case the PTP controlling PF
> (adapter->ctrl_pf) is never initialized in the VM, so ice_get_ctrl_ptp() returns NULL and triggers WARN_ON() in ice_ptp_setup_pf().
>
> Since this is an expected behavior in that configuration, replace
> WARN_ON() with an informational message and return -EOPNOTSUPP.
>
> Fixes: e800654e85b5 ("ice: Use ice_adapter for PTP shared data instead of auxdev")
> Signed-off-by: Kohei Enju <kohei@enjuk.jp>
> ---
> drivers/net/ethernet/intel/ice/ice_ptp.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
^ permalink raw reply
* Re: [PATCH] MAINTAINERS: Remove Salil Mehta as HiSilicon HNS3/HNS Ethernet maintainer
From: Jijie Shao @ 2026-04-09 3:14 UTC (permalink / raw)
To: Salil Mehta, davem, netdev, kuba; +Cc: shaojijie, salil.mehta, shenjian15
In-Reply-To: <20260409000430.7217-1-salil.mehta@huawei.com>
on 2026/4/9 8:04, Salil Mehta wrote:
> From: Salil Mehta <salil.mehta@opnsrc.net>
>
> Closing this chapter and a long wonderful journey with my team, I sign off one
> last time with my Huawei email address. Remove my maintainer entry for the
> HiSilicon HNS and HNS3 10G/100G Ethernet drivers, and add a CREDITS entry for
> my co-authorship and maintenance contributions to these drivers.
Salil, Thank you for your hard work over the years.
Acked-by: Jijie Shao <shaojijie@huawei.com>
note: Sorry, the previous email reply was rejected because it contained HTML part.
>
> Link: https://lore.kernel.org/netdev/259cd032-2ccb-452b-8524-75bc7162e138@huawei.com/
> Cc: Jian Shen <shenjian15@huawei.com>
> Cc: Jijie Shao <shaojijie@huawei.com>
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> ---
> CREDITS | 10 ++++++++++
> MAINTAINERS | 2 --
> 2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/CREDITS b/CREDITS
> index 9091bac3d2da..a03b00452a1e 100644
> --- a/CREDITS
> +++ b/CREDITS
> @@ -3592,6 +3592,16 @@ E: wsalamon@tislabs.com
> E: wsalamon@nai.com
> D: portions of the Linux Security Module (LSM) framework and security modules
>
> +N: Salil Mehta
> +E: salil.mehta@opnsrc.net
> +D: Co-authored Huawei/HiSilicon Kunpeng 920 SoC HNS3 PF and VF 100G
> +D: Ethernet driver
> +D: Co-authored Huawei/HiSilicon Kunpeng 916 SoC HNS 10G Ethernet
> +D: driver enhancements
> +D: Maintained Huawei/HiSilicon HNS and HNS3 10G/100G Ethernet drivers
> +D: for Kunpeng 916 family, 920 family of SoCs
> +S: Cambridge, Cambridgeshire, United Kingdom
> +
> N: Robert Sanders
> E: gt8134b@prism.gatech.edu
> D: Dosemu
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9d1e6d3acbac..97d0bc3108de 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -11530,7 +11530,6 @@ F: drivers/bus/hisi_lpc.c
>
> HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
> M: Jian Shen <shenjian15@huawei.com>
> -M: Salil Mehta <salil.mehta@huawei.com>
> M: Jijie Shao <shaojijie@huawei.com>
> L: netdev@vger.kernel.org
> S: Maintained
> @@ -11545,7 +11544,6 @@ F: drivers/net/ethernet/hisilicon/hibmcge/
>
> HISILICON NETWORK SUBSYSTEM DRIVER
> M: Jian Shen <shenjian15@huawei.com>
> -M: Salil Mehta <salil.mehta@huawei.com>
> L: netdev@vger.kernel.org
> S: Maintained
> W: http://www.hisilicon.com
^ permalink raw reply
* Re: [PATCH net v3 0/5] bonding: 3ad: fix carrier state with no valid slaves
From: Jakub Kicinski @ 2026-04-09 3:13 UTC (permalink / raw)
To: Louis Scalbert
Cc: netdev, andrew+netdev, jv, edumazet, pabeni, fbl, andy,
shemminger, maheshb
In-Reply-To: <20260408152353.276204-1-louis.scalbert@6wind.com>
On Wed, 8 Apr 2026 17:23:48 +0200 Louis Scalbert wrote:
> The current behavior is not compliant with the LACP standard. This
> patchset introduces a working behavior that is not strictly
> standard-compliant either, but is widely adopted across the industry.
> It consists of bringing the bonding master interface down to signal to
> upper-layer processes that it is not usable.
Is the only problem the compliance? If so I don't think this qualifies
as a fix. Please drop the Fixes tags and repost for net-next. Please
keep in mind the 24h reposting period (also I need some time tomorrow
to queue your patch to the CI so that the selftest passes when v4 is
posted :()
^ permalink raw reply
* [PATCH net v3 1/2] flow_dissector: do not dissect PPPoE PFC frames
From: Qingfang Deng @ 2026-04-09 3:11 UTC (permalink / raw)
To: linux-ppp, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman, Qingfang Deng, Guillaume Nault,
Wojciech Drewek, Tony Nguyen, linux-kernel, netdev
Cc: Paul Mackerras, Jaco Kroon, James Carlson, Marcin Szycik
RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT
RECOMMENDED for PPPoE. In practice, pppd does not support negotiating
PFC for PPPoE sessions, and the flow dissector driver has assumed an
uncompressed frame until the blamed commit.
During the review process of that commit [1], support for PFC is
suggested. However, having a compressed (1-byte) protocol field means
the subsequent PPP payload is shifted by one byte, causing 4-byte
misalignment for the network header and an unaligned access exception
on some architectures.
The exception can be reproduced by sending a PPPoE PFC frame to an
ethernet interface of a MIPS board, with RPS enabled, even if no PPPoE
session is active on that interface:
$ 0 : 00000000 80c40000 00000000 85144817
$ 4 : 00000008 00000100 80a75758 81dc9bb8
$ 8 : 00000010 8087ae2c 0000003d 00000000
$12 : 000000e0 00000039 00000000 00000000
$16 : 85043240 80a75758 81dc9bb8 00006488
$20 : 0000002f 00000007 85144810 80a70000
$24 : 81d1bda0 00000000
$28 : 81dc8000 81dc9aa8 00000000 805ead08
Hi : 00009d51
Lo : 2163358a
epc : 805e91f0 __skb_flow_dissect+0x1b0/0x1b50
ra : 805ead08 __skb_get_hash_net+0x74/0x12c
Status: 11000403 KERNEL EXL IE
Cause : 40800010 (ExcCode 04)
BadVA : 85144817
PrId : 0001992f (MIPS 1004Kc)
Call Trace:
[<805e91f0>] __skb_flow_dissect+0x1b0/0x1b50
[<805ead08>] __skb_get_hash_net+0x74/0x12c
[<805ef330>] get_rps_cpu+0x1b8/0x3fc
[<805fca70>] netif_receive_skb_list_internal+0x324/0x364
[<805fd120>] napi_complete_done+0x68/0x2a4
[<8058de5c>] mtk_napi_rx+0x228/0xfec
[<805fd398>] __napi_poll+0x3c/0x1c4
[<805fd754>] napi_threaded_poll_loop+0x234/0x29c
[<805fd848>] napi_threaded_poll+0x8c/0xb0
[<80053544>] kthread+0x104/0x12c
[<80002bd8>] ret_from_kernel_thread+0x14/0x1c
Code: 02d51821 1060045b 00000000 <8c640000> 3084000f 2c820005 144001a2 00042080 8e220000
To reduce the attack surface and maintain performance, do not process
PPPoE PFC frames. While at it, avoid byte-swapping at runtime, restoring
the original behavior.
[1] https://patch.msgid.link/20220630231016.GA392@debian.home
Fixes: 46126db9c861 ("flow_dissector: Add PPPoE dissectors")
Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
---
Changes in v3:
Make ppp_proto_is_valid() private and fix kdoc warning, avoiding
gotchas if some out-of-tree modules use this function.
Link to v1: https://lore.kernel.org/netdev/20260407045743.174446-1-qingfang.deng@linux.dev/
include/linux/ppp_defs.h | 13 -------------
net/core/flow_dissector.c | 39 +++++++++++++++++++++++----------------
2 files changed, 23 insertions(+), 29 deletions(-)
diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
index b7e57fdbd413..45c0947fa404 100644
--- a/include/linux/ppp_defs.h
+++ b/include/linux/ppp_defs.h
@@ -12,17 +12,4 @@
#define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c)
-/**
- * ppp_proto_is_valid - checks if PPP protocol is valid
- * @proto: PPP protocol
- *
- * Assumes proto is not compressed.
- * Protocol is valid if the value is odd and the least significant bit of the
- * most significant octet is 0 (see RFC 1661, section 2).
- */
-static inline bool ppp_proto_is_valid(u16 proto)
-{
- return !!((proto & 0x0101) == 0x0001);
-}
-
#endif /* _PPP_DEFS_H_ */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1b61bb25ba0e..64b843800370 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1035,6 +1035,21 @@ static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr)
return hdr->ver == 1 && hdr->type == 1 && hdr->code == 0;
}
+/**
+ * ppp_proto_is_valid - checks if PPP protocol is valid
+ * @proto: PPP protocol
+ *
+ * Assumes proto is not compressed.
+ * Protocol is valid if the value is odd and the least significant bit of the
+ * most significant octet is 0 (see RFC 1661, section 2).
+ *
+ * Return: Whether the PPP protocol is valid.
+ */
+static bool ppp_proto_is_valid(__be16 proto)
+{
+ return (proto & htons(0x0101)) == htons(0x0001);
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @net: associated network namespace, derived from @skb if NULL
@@ -1361,7 +1376,7 @@ bool __skb_flow_dissect(const struct net *net,
struct pppoe_hdr hdr;
__be16 proto;
} *hdr, _hdr;
- u16 ppp_proto;
+ __be16 ppp_proto;
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr) {
@@ -1374,27 +1389,19 @@ bool __skb_flow_dissect(const struct net *net,
break;
}
- /* least significant bit of the most significant octet
- * indicates if protocol field was compressed
- */
- ppp_proto = ntohs(hdr->proto);
- if (ppp_proto & 0x0100) {
- ppp_proto = ppp_proto >> 8;
- nhoff += PPPOE_SES_HLEN - 1;
- } else {
- nhoff += PPPOE_SES_HLEN;
- }
+ ppp_proto = hdr->proto;
+ nhoff += PPPOE_SES_HLEN;
- if (ppp_proto == PPP_IP) {
+ if (ppp_proto == htons(PPP_IP)) {
proto = htons(ETH_P_IP);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- } else if (ppp_proto == PPP_IPV6) {
+ } else if (ppp_proto == htons(PPP_IPV6)) {
proto = htons(ETH_P_IPV6);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- } else if (ppp_proto == PPP_MPLS_UC) {
+ } else if (ppp_proto == htons(PPP_MPLS_UC)) {
proto = htons(ETH_P_MPLS_UC);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- } else if (ppp_proto == PPP_MPLS_MC) {
+ } else if (ppp_proto == htons(PPP_MPLS_MC)) {
proto = htons(ETH_P_MPLS_MC);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
} else if (ppp_proto_is_valid(ppp_proto)) {
@@ -1412,7 +1419,7 @@ bool __skb_flow_dissect(const struct net *net,
FLOW_DISSECTOR_KEY_PPPOE,
target_container);
key_pppoe->session_id = hdr->hdr.sid;
- key_pppoe->ppp_proto = htons(ppp_proto);
+ key_pppoe->ppp_proto = ppp_proto;
key_pppoe->type = htons(ETH_P_PPP_SES);
}
break;
--
2.43.0
^ permalink raw reply related
* [PATCH net v3 2/2] pppoe: drop PFC frames
From: Qingfang Deng @ 2026-04-09 3:11 UTC (permalink / raw)
To: linux-ppp, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Michal Ostrowski, Qingfang Deng,
Kuniyuki Iwashima, Kees Cook, Sebastian Andrzej Siewior, netdev,
linux-kernel
Cc: Paul Mackerras, Jaco Kroon, James Carlson, Wojciech Drewek,
Marcin Szycik, Guillaume Nault
In-Reply-To: <20260409031107.616630-1-qingfang.deng@linux.dev>
RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT
RECOMMENDED for PPPoE. In practice, pppd does not support negotiating
PFC for PPPoE sessions, and the current PPPoE driver assumes an
uncompressed (2-byte) protocol field. However, the generic PPP layer
function ppp_input() is not aware of the negotiation result, and still
accepts PFC frames.
If a peer with a broken implementation or an attacker sends a frame with
a compressed (1-byte) protocol field, the subsequent PPP payload is
shifted by one byte. This causes the network header to be 4-byte
misaligned, which may trigger unaligned access exceptions on some
architectures.
To reduce the attack surface, drop PPPoE PFC frames. Introduce
ppp_skb_is_compressed_proto() helper function to be used in both
ppp_generic.c and pppoe.c to avoid open-coding.
Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers")
Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
---
Changes in v3:
Fix kdoc warning
Link to v2: https://lore.kernel.org/netdev/20260408024245.312732-1-qingfang.deng@linux.dev/
drivers/net/ppp/ppp_generic.c | 2 +-
drivers/net/ppp/pppoe.c | 8 +++++++-
include/linux/ppp_defs.h | 16 ++++++++++++++++
3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index cb29a6968c63..eb1503b389ef 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2251,7 +2251,7 @@ ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
*/
static void __ppp_decompress_proto(struct sk_buff *skb)
{
- if (skb->data[0] & 0x01)
+ if (ppp_skb_is_compressed_proto(skb))
*(u8 *)skb_push(skb, 1) = 0x00;
}
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 1ac61c273b28..4cd10c908711 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -393,7 +393,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb_mac_header_len(skb) < ETH_HLEN)
goto drop;
- if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
goto drop;
ph = pppoe_hdr(skb);
@@ -403,6 +403,12 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->len < len)
goto drop;
+ /* skb->data points to the PPP protocol header after skb_pull_rcsum.
+ * Drop PFC frames.
+ */
+ if (ppp_skb_is_compressed_proto(skb))
+ goto drop;
+
if (pskb_trim_rcsum(skb, len))
goto drop;
diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
index 45c0947fa404..6e9587ce651c 100644
--- a/include/linux/ppp_defs.h
+++ b/include/linux/ppp_defs.h
@@ -8,8 +8,24 @@
#define _PPP_DEFS_H_
#include <linux/crc-ccitt.h>
+#include <linux/skbuff.h>
#include <uapi/linux/ppp_defs.h>
#define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c)
+/**
+ * ppp_skb_is_compressed_proto - checks if PPP protocol in a skb is compressed
+ * @skb: skb to check
+ *
+ * Check if the PPP protocol field is compressed (the least significant
+ * bit of the most significant octet is 1). skb->data must point to the PPP
+ * protocol header.
+ *
+ * Return: Whether the PPP protocol field is compressed.
+ */
+static inline bool ppp_skb_is_compressed_proto(const struct sk_buff *skb)
+{
+ return unlikely(skb->data[0] & 0x01);
+}
+
#endif /* _PPP_DEFS_H_ */
--
2.43.0
^ permalink raw reply related
* Re: [PATCH net-next V5 00/12] devlink: add per-port resource support
From: patchwork-bot+netdevbpf @ 2026-04-09 3:10 UTC (permalink / raw)
To: Tariq Toukan
Cc: edumazet, kuba, pabeni, andrew+netdev, davem, horms,
donald.hunter, jiri, corbet, skhan, saeedm, leon, mbloch, shuah,
matttbe, chuck.lever, cjubran, ohartoov, moshe, dtatulea,
daniel.zahka, shshitrit, cratiu, jacob.e.keller, parav,
ajayachandra, shayd, kees, danielj, netdev, linux-kernel,
linux-doc, linux-rdma, linux-kselftest, gal
In-Reply-To: <20260407194107.148063-1-tariqt@nvidia.com>
Hello:
This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:
On Tue, 7 Apr 2026 22:40:55 +0300 you wrote:
> Hi,
>
> This series by Or adds devlink per-port resource support.
> See detailed description by Or below [1].
>
> Regards,
> Tariq
>
> [...]
Here is the summary with links:
- [net-next,V5,01/12] devlink: Refactor resource functions to be generic
https://git.kernel.org/netdev/net-next/c/7be3163c49b2
- [net-next,V5,02/12] devlink: Add port-level resource registration infrastructure
https://git.kernel.org/netdev/net-next/c/6f38acfed5ed
- [net-next,V5,03/12] net/mlx5: Register SF resource on PF port representor
https://git.kernel.org/netdev/net-next/c/4be8326d817e
- [net-next,V5,04/12] netdevsim: Add devlink port resource registration
https://git.kernel.org/netdev/net-next/c/085b234b28cc
- [net-next,V5,05/12] devlink: Add dump support for device-level resources
https://git.kernel.org/netdev/net-next/c/11636b550eea
- [net-next,V5,06/12] devlink: Include port resources in resource dump dumpit
https://git.kernel.org/netdev/net-next/c/810b76394d69
- [net-next,V5,07/12] devlink: Add port-specific option to resource dump doit
https://git.kernel.org/netdev/net-next/c/7511ff14f30d
- [net-next,V5,08/12] selftest: netdevsim: Add devlink port resource doit test
https://git.kernel.org/netdev/net-next/c/396135377104
- [net-next,V5,09/12] devlink: Document port-level resources and full dump
https://git.kernel.org/netdev/net-next/c/170e160a0e7c
- [net-next,V5,10/12] devlink: Add resource scope filtering to resource dump
https://git.kernel.org/netdev/net-next/c/1bc45341a6ea
- [net-next,V5,11/12] selftest: netdevsim: Add resource dump and scope filter test
https://git.kernel.org/netdev/net-next/c/2a8e91235254
- [net-next,V5,12/12] devlink: Document resource scope filtering
https://git.kernel.org/netdev/net-next/c/78c327c1728d
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH net 0/9][pull request] Intel Wired LAN Driver Updates 2026-04-06 (idpf, ice, ixgbe, ixgbevf, igb, e1000)
From: patchwork-bot+netdevbpf @ 2026-04-09 3:10 UTC (permalink / raw)
To: Tony Nguyen; +Cc: davem, kuba, pabeni, edumazet, andrew+netdev, netdev
In-Reply-To: <20260406213038.444732-1-anthony.l.nguyen@intel.com>
Hello:
This series was applied to netdev/net.git (main)
by Tony Nguyen <anthony.l.nguyen@intel.com>:
On Mon, 6 Apr 2026 14:30:27 -0700 you wrote:
> Emil converts to use spinlock_t for virtchnl transactions to make
> consistent use of the xn_bm_lock when accessing the free_xn_bm bitmap,
> while also avoiding nested raw/bh spinlock issue on PREEMPT_RT kernels.
> He also sets payload size before calling the async handler, to make sure
> it doesn't error out prematurely due to invalid size check for idpf.
>
> Kohei Enju changes WARN_ON for missing PTP control PF to a dev_info() on
> ice as there are cases where this is expected and acceptable.
>
> [...]
Here is the summary with links:
- [net,1/9] idpf: fix PREEMPT_RT raw/bh spinlock nesting for async VC handling
https://git.kernel.org/netdev/net/c/591478118293
- [net,2/9] idpf: improve locking around idpf_vc_xn_push_free()
https://git.kernel.org/netdev/net/c/d086fae65006
- [net,3/9] idpf: set the payload size before calling the async handler
https://git.kernel.org/netdev/net/c/8e2a2420e267
- [net,4/9] ice: ptp: don't WARN when controlling PF is unavailable
https://git.kernel.org/netdev/net/c/bb3f21edc705
- [net,5/9] ice: fix PTP timestamping broken by SyncE code on E825C
https://git.kernel.org/netdev/net/c/bf6dbadb72b9
- [net,6/9] ixgbe: stop re-reading flash on every get_drvinfo for e610
https://git.kernel.org/netdev/net/c/d8ae40dc20cb
- [net,7/9] ixgbevf: add missing negotiate_features op to Hyper-V ops table
https://git.kernel.org/netdev/net/c/4821d563cd7f
- [net,8/9] igb: remove napi_synchronize() in igb_down()
https://git.kernel.org/netdev/net/c/b1e067240379
- [net,9/9] e1000: check return value of e1000_read_eeprom
https://git.kernel.org/netdev/net/c/d3baa34a4707
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH net-next v3] selftests/net: convert so_txtime to drv-net
From: Willem de Bruijn @ 2026-04-09 3:10 UTC (permalink / raw)
To: Jakub Kicinski, Willem de Bruijn
Cc: netdev, davem, edumazet, pabeni, horms, Willem de Bruijn
In-Reply-To: <20260407191543.0593b5aa@kernel.org>
Jakub Kicinski wrote:
> On Sun, 5 Apr 2026 22:49:22 -0400 Willem de Bruijn wrote:
> > +@ksft_variants(_test_variants_mono())
> > +def test_so_txtime_mono(cfg, ipver, args_tx, args_rx):
> > + """Run all variants of monotonic (fq) tests."""
> > + cmd(f"tc qdisc replace dev {cfg.ifname} root fq")
> > + test_so_txtime(cfg, "mono", ipver, args_tx, args_rx, False)
> > +
> > +
> > +def _test_variants_etf():
> > + for ipver in ["4", "6"]:
> > + for testcase in [
> > + ["no_delay", "a,-1", "a,-1", True],
> > + ["zero_delay", "a,0", "a,0", True],
> > + ["one_pkt", "a,10", "a,10", False],
> > + ["in_order", "a,10,b,20", "a,10,b,20", False],
> > + ["reverse_order", "a,20,b,10", "b,10,a,20", False],
> > + ]:
> > + name = f"_v{ipver}_{testcase[0]}"
>
> nit: looking at the results in NIPA:
> https://netdev-ctrl.bots.linux.dev/logs/vmksft/net-drv/results/593442/5-so-txtime-py/stdout
> the leading _ seems unnecessary?
>
> > + yield KsftNamedVariant(
> > + name, ipver, testcase[1], testcase[2], testcase[3]
> > + )
> > +
> > +
> > +@ksft_variants(_test_variants_etf())
> > +def test_so_txtime_etf(cfg, ipver, args_tx, args_rx, expect_fail):
> > + """Run all variants of etf tests."""
> > + try:
> > + # ETF does not support change, so remove and re-add it instead.
> > + cmd_prefix = f"tc qdisc replace dev {cfg.ifname} root"
> > + cmd(f"{cmd_prefix} pfifo_fast")
> > + cmd(f"{cmd_prefix} etf clockid CLOCK_TAI delta 400000")
> > + except Exception as e:
> > + raise KsftSkipEx("tc does not support qdisc etf. skipping") from e
> > +
> > + test_so_txtime(cfg, "tai", ipver, args_tx, args_rx, expect_fail)
>
> I _think_ we'll leave ETF installed on the device after the test?
> That seems not super great. As we discussed before rebuilding the
> whole hierarchy will be tedious but we could at least replace with
> mq on exit and let it put whatever the default qdisc is as its leaves?
Good point. We can not set mq on netkit. It fails netif_is_multiqueue
in mq_init_common. I'll do the following.
@@ -81,6 +81,8 @@ def main() -> None:
"""Boilerplate ksft main."""
with NetDrvEpEnv(__file__) as cfg:
ksft_run([test_so_txtime_mono, test_so_txtime_etf], args=(cfg,))
+ if not cfg._ns:
+ cmd(f"tc qdisc replace dev {cfg.ifname} root mq")
ksft_exit()
Alternatively could record the root qdisc at the start of the test and
restore that.
^ permalink raw reply
* Re: [PATCH net 0/9][pull request] Intel Wired LAN Driver Updates 2026-04-06 (idpf, ice, ixgbe, ixgbevf, igb, e1000)
From: Jakub Kicinski @ 2026-04-09 3:07 UTC (permalink / raw)
To: Tony Nguyen; +Cc: davem, pabeni, edumazet, andrew+netdev, netdev
In-Reply-To: <20260406213038.444732-1-anthony.l.nguyen@intel.com>
On Mon, 6 Apr 2026 14:30:27 -0700 Tony Nguyen wrote:
> Emil converts to use spinlock_t for virtchnl transactions to make
> consistent use of the xn_bm_lock when accessing the free_xn_bm bitmap,
> while also avoiding nested raw/bh spinlock issue on PREEMPT_RT kernels.
> He also sets payload size before calling the async handler, to make sure
> it doesn't error out prematurely due to invalid size check for idpf.
Sashiko has a bunch of comments but they all look orthogonal
^ permalink raw reply
* Re: clarification: PCI device not getting enumerated
From: Ratheesh Kannoth @ 2026-04-09 3:06 UTC (permalink / raw)
To: Bjorn Helgaas; +Cc: Vidya Sagar, bhelgaas, netdev, linux-kernel, linux-pci
In-Reply-To: <20260401193215.GA229749@bhelgaas>
On 2026-04-02 at 01:02:15, Bjorn Helgaas (helgaas@kernel.org) wrote:
> [+to Vidya, any thoughts on this?]
Bjorn, Vidya,
If i revert below 2 lines of code from commit 7246a4520b4bf1494d7d030166a11b5226f6d508,
PCI device gets enumerated.
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index 45b71806182d..c96b2de163b5 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -73,10 +73,6 @@ int pci_host_common_probe(struct platform_device *pdev)
if (IS_ERR(cfg))
return PTR_ERR(cfg);
- /* Do not reassign resources if probe only */
- if (!pci_has_flag(PCI_PROBE_ONLY))
- pci_add_flags(PCI_REASSIGN_ALL_BUS);
-
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->msi_domain = true;
Commit message (7246a4520b4bf1494d7d030166a11b5226f6d508) says,
" This also obviates the use of adding PCI_REASSIGN_ALL_BUS flag if
!PCI_PROBE_ONLY, as pci_assign_unassigned_root_bus_resources() takes care
of reassigning the resources that are not already claimed."
The statement says that PCI_REASSIGN_ALL_BUS is essentially redundant (unnecessary).
Let me know if there is any other side effects, else i can push a patch with these
two lines reverted.
^ permalink raw reply related
* Re: [net-next v1 v1 1/5] dt-bindings: net: starfive,jh7110-dwmac: Remove JH8100
From: Minda Chen @ 2026-04-09 2:44 UTC (permalink / raw)
To: Andrew Lunn
Cc: Alexandre Torgue, Andrew Lunn, David S . Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin,
Emil Renner Berthing, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org,
linux-stm32@st-md-mailman.stormreply.com,
devicetree@vger.kernel.org
In-Reply-To: <ad9ee916-6f8a-4cb9-8016-54a02b00c7ab@lunn.ch>
>
> On Wed, Apr 08, 2026 at 04:44:12PM +0800, Minda Chen wrote:
> > Remove JH8100 dt-bindings because do not support it now.
>
> Could you expand on that. If there are devices out in the field, we don't just drop
> support for it because the vendor has something newer.
>
> If the device never made it outside of the vendors lab, then we might consider
> dropping it.
>
> Please explain in detail why this is being dropped.
>
> Andrew
Yes.
We (StarFive) stop developing on JH8100 now, And do NOT release the SoC outside.
Hi Krzysztof
Could you review this series patch 1 -3 which is dt -binding doc changes? I sorry
I have sent you to old e-mail address.
^ permalink raw reply
* [PATCH net-next v7 10/10] selftests: net: Add tests for team driver decoupled tx and rx control
From: Marc Harvey @ 2026-04-09 2:59 UTC (permalink / raw)
To: Jiri Pirko, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Shuah Khan, Simon Horman
Cc: netdev, linux-kernel, linux-kselftest, Kuniyuki Iwashima,
Marc Harvey
In-Reply-To: <20260409-teaming-driver-internal-v7-0-f47e7589685d@google.com>
Use ping and tcpdump to verify that independent rx and tx enablement
of team driver member interfaces works as intended.
Signed-off-by: Marc Harvey <marcharvey@google.com>
---
Changes in v5:
- Minor typo fixes in both test files.
- Link to v4: https://lore.kernel.org/netdev/20260403-teaming-driver-internal-v4-10-d3032f33ca25@google.com/
Changes in v2:
- Fix shellcheck failures.
- Link to v1: https://lore.kernel.org/all/20260331053353.2504254-8-marcharvey@google.com/
---
tools/testing/selftests/drivers/net/team/Makefile | 1 +
.../drivers/net/team/decoupled_enablement.sh | 249 +++++++++++++++++++++
.../testing/selftests/drivers/net/team/options.sh | 99 +++++++-
3 files changed, 348 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index dab922d7f83d..7c58cf82121e 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -2,6 +2,7 @@
# Makefile for net selftests
TEST_PROGS := \
+ decoupled_enablement.sh \
dev_addr_lists.sh \
non_ether_header_ops.sh \
options.sh \
diff --git a/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh b/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh
new file mode 100755
index 000000000000..0d3d9c98e9f5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# These tests verify the decoupled RX and TX enablement of team driver member
+# interfaces.
+#
+# Topology
+#
+# +---------------------+ NS1
+# | test_team1 |
+# | | |
+# | eth0 |
+# | | |
+# | | |
+# +---------------------+
+# |
+# +---------------------+ NS2
+# | | |
+# | | |
+# | eth0 |
+# | | |
+# | test_team2 |
+# +---------------------+
+
+export ALL_TESTS="
+ team_test_tx_enablement
+ team_test_rx_enablement
+"
+
+test_dir="$(dirname "$0")"
+# shellcheck disable=SC1091
+source "${test_dir}/../../../net/lib.sh"
+# shellcheck disable=SC1091
+source "${test_dir}/team_lib.sh"
+
+NS1=""
+NS2=""
+export NODAD="nodad"
+PREFIX_LENGTH="64"
+NS1_IP="fd00::1"
+NS2_IP="fd00::2"
+NS1_IP4="192.168.0.1"
+NS2_IP4="192.168.0.2"
+MEMBERS=("eth0")
+PING_COUNT=5
+PING_TIMEOUT_S=1
+PING_INTERVAL=0.1
+
+while getopts "4" opt; do
+ case $opt in
+ 4)
+ echo "IPv4 mode selected."
+ export NODAD=
+ PREFIX_LENGTH="24"
+ NS1_IP="${NS1_IP4}"
+ NS2_IP="${NS2_IP4}"
+ ;;
+ \?)
+ echo "Invalid option: -$OPTARG" >&2
+ exit 1
+ ;;
+ esac
+done
+
+# This has to be sourced after opts are gathered...
+export REQUIRE_MZ=no
+export NUM_NETIFS=0
+# shellcheck disable=SC1091
+source "${test_dir}/../../../net/forwarding/lib.sh"
+
+# Create the network namespaces, veth pair, and team devices in the specified
+# mode.
+# Globals:
+# RET - Used by test infra, set by `check_err` functions.
+# Arguments:
+# mode - The team driver mode to use for the team devices.
+environment_create()
+{
+ trap cleanup_all_ns EXIT
+ setup_ns ns1 ns2
+ NS1="${NS_LIST[0]}"
+ NS2="${NS_LIST[1]}"
+
+ # Create the interfaces.
+ ip -n "${NS1}" link add eth0 type veth peer name eth0 netns "${NS2}"
+ ip -n "${NS1}" link add test_team1 type team
+ ip -n "${NS2}" link add test_team2 type team
+
+ # Set up the receiving network namespace's team interface.
+ setup_team "${NS2}" test_team2 roundrobin "${NS2_IP}" \
+ "${PREFIX_LENGTH}" "${MEMBERS[@]}"
+}
+
+# Set a particular option value for team or team port.
+# Arguments:
+# namespace - The namespace name that has the team.
+# option_name - The option name to set.
+# option_value - The value to set the option to.
+# team_name - The name of team to set the option for.
+# member_name - The (optional) optional name of the member port.
+set_option_value()
+{
+ local namespace="$1"
+ local option_name="$2"
+ local option_value="$3"
+ local team_name="$4"
+ local member_name="$5"
+ local port_flag="--port=${member_name}"
+
+ ip netns exec "${namespace}" teamnl "${team_name}" setoption \
+ "${option_name}" "${option_value}" "${port_flag}"
+ return $?
+}
+
+# Send some pings and return the ping command return value.
+try_ping()
+{
+ ip netns exec "${NS1}" ping -i "${PING_INTERVAL}" -c "${PING_COUNT}" \
+ "${NS2_IP}" -W "${PING_TIMEOUT_S}"
+}
+
+# Checks tcpdump output from net/forwarding lib, and checks if there are any
+# ICMP(4 or 6) packets.
+# Arguments:
+# interface - The interface name to search for.
+# ip_address - The destination IP address (4 or 6) to search for.
+did_interface_receive_icmp()
+{
+ local interface="$1"
+ local ip_address="$2"
+ local packet_count
+
+ packet_count=$(tcpdump_show "$interface" | grep -c \
+ "> ${ip_address}: ICMP")
+ echo "Packet count for ${interface} was ${packet_count}"
+
+ if [[ "$packet_count" -gt 0 ]]; then
+ true
+ else
+ false
+ fi
+}
+
+# Test JUST tx enablement with a given mode.
+# Globals:
+# RET - Used by test infra, set by `check_err` functions.
+# Arguments:
+# mode - The mode to set the team interfaces to.
+team_test_mode_tx_enablement()
+{
+ local mode="$1"
+ export RET=0
+
+ # Set up the sender team with the correct mode.
+ setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \
+ "${PREFIX_LENGTH}" "${MEMBERS[@]}"
+ check_err $? "Failed to set up sender team"
+
+ ### Scenario 1: Member interface initially enabled.
+ # Expect ping to pass
+ try_ping
+ check_err $? "Ping failed when TX enabled"
+
+ ### Scenario 2: One tx-side interface disabled.
+ # Expect ping to fail.
+ set_option_value "${NS1}" tx_enabled false test_team1 eth0
+ check_err $? "Failed to disable TX"
+ tcpdump_start eth0 "${NS2}"
+ try_ping
+ check_fail $? "Ping succeeded when TX disabled"
+ tcpdump_stop eth0
+ # Expect no packets to be transmitted, since TX is disabled.
+ did_interface_receive_icmp eth0 "${NS2_IP}"
+ check_fail $? "eth0 IS transmitting when TX disabled"
+ tcpdump_cleanup eth0
+
+ ### Scenario 3: The interface has tx re-enabled.
+ # Expect ping to pass.
+ set_option_value "${NS1}" tx_enabled true test_team1 eth0
+ check_err $? "Failed to reenable TX"
+ try_ping
+ check_err $? "Ping failed when TX reenabled"
+
+ log_test "TX failover of '${mode}' test"
+}
+
+# Test JUST rx enablement with a given mode.
+# Globals:
+# RET - Used by test infra, set by `check_err` functions.
+# Arguments:
+# mode - The mode to set the team interfaces to.
+team_test_mode_rx_enablement()
+{
+ local mode="$1"
+ export RET=0
+
+ # Set up the sender team with the correct mode.
+ setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \
+ "${PREFIX_LENGTH}" "${MEMBERS[@]}"
+ check_err $? "Failed to set up sender team"
+
+ ### Scenario 1: Member interface initially enabled.
+ # Expect ping to pass
+ try_ping
+ check_err $? "Ping failed when RX enabled"
+
+ ### Scenario 2: One rx-side interface disabled.
+ # Expect ping to fail.
+ set_option_value "${NS1}" rx_enabled false test_team1 eth0
+ check_err $? "Failed to disable RX"
+ tcpdump_start eth0 "${NS2}"
+ try_ping
+ check_fail $? "Ping succeeded when RX disabled"
+ tcpdump_stop eth0
+ # Expect packets to be transmitted, since only RX is disabled.
+ did_interface_receive_icmp eth0 "${NS2_IP}"
+ check_err $? "eth0 not transmitting when RX disabled"
+ tcpdump_cleanup eth0
+
+ ### Scenario 3: The interface has rx re-enabled.
+ # Expect ping to pass.
+ set_option_value "${NS1}" rx_enabled true test_team1 eth0
+ check_err $? "Failed to reenable RX"
+ try_ping
+ check_err $? "Ping failed when RX reenabled"
+
+ log_test "RX failover of '${mode}' test"
+}
+
+team_test_tx_enablement()
+{
+ team_test_mode_tx_enablement broadcast
+ team_test_mode_tx_enablement roundrobin
+ team_test_mode_tx_enablement random
+}
+
+team_test_rx_enablement()
+{
+ team_test_mode_rx_enablement broadcast
+ team_test_mode_rx_enablement roundrobin
+ team_test_mode_rx_enablement random
+}
+
+require_command teamnl
+require_command tcpdump
+require_command ping
+environment_create
+tests_run
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh
index 44888f32b513..66c0cb896dad 100755
--- a/tools/testing/selftests/drivers/net/team/options.sh
+++ b/tools/testing/selftests/drivers/net/team/options.sh
@@ -11,10 +11,14 @@ if [[ $# -eq 0 ]]; then
exit $?
fi
-ALL_TESTS="
+export ALL_TESTS="
team_test_options
+ team_test_enabled_implicit_changes
+ team_test_rx_enabled_implicit_changes
+ team_test_tx_enabled_implicit_changes
"
+# shellcheck disable=SC1091
source "${test_dir}/../../../net/lib.sh"
TEAM_PORT="team0"
@@ -176,12 +180,105 @@ team_test_options()
team_test_option mcast_rejoin_count 0 5
team_test_option mcast_rejoin_interval 0 5
team_test_option enabled true false "${MEMBER_PORT}"
+ team_test_option rx_enabled true false "${MEMBER_PORT}"
+ team_test_option tx_enabled true false "${MEMBER_PORT}"
team_test_option user_linkup true false "${MEMBER_PORT}"
team_test_option user_linkup_enabled true false "${MEMBER_PORT}"
team_test_option priority 10 20 "${MEMBER_PORT}"
team_test_option queue_id 0 1 "${MEMBER_PORT}"
}
+team_test_enabled_implicit_changes()
+{
+ export RET=0
+
+ attach_port_if_specified "${MEMBER_PORT}"
+ check_err $? "Couldn't attach ${MEMBER_PORT} to master"
+
+ # Set enabled to true.
+ set_and_check_get enabled true "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'enabled' to true"
+
+ # Show that both rx enabled and tx enabled are true.
+ get_and_check_value rx_enabled true "--port=${MEMBER_PORT}"
+ check_err $? "'Rx_enabled' wasn't implicitly set to true"
+ get_and_check_value tx_enabled true "--port=${MEMBER_PORT}"
+ check_err $? "'Tx_enabled' wasn't implicitly set to true"
+
+ # Set enabled to false.
+ set_and_check_get enabled false "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'enabled' to false"
+
+ # Show that both rx enabled and tx enabled are false.
+ get_and_check_value rx_enabled false "--port=${MEMBER_PORT}"
+ check_err $? "'Rx_enabled' wasn't implicitly set to false"
+ get_and_check_value tx_enabled false "--port=${MEMBER_PORT}"
+ check_err $? "'Tx_enabled' wasn't implicitly set to false"
+
+ log_test "'Enabled' implicit changes"
+}
+
+team_test_rx_enabled_implicit_changes()
+{
+ export RET=0
+
+ attach_port_if_specified "${MEMBER_PORT}"
+ check_err $? "Couldn't attach ${MEMBER_PORT} to master"
+
+ # Set enabled to true.
+ set_and_check_get enabled true "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'enabled' to true"
+
+ # Set rx_enabled to false.
+ set_and_check_get rx_enabled false "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'rx_enabled' to false"
+
+ # Show that enabled is false.
+ get_and_check_value enabled false "--port=${MEMBER_PORT}"
+ check_err $? "'enabled' wasn't implicitly set to false"
+
+ # Set rx_enabled to true.
+ set_and_check_get rx_enabled true "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'rx_enabled' to true"
+
+ # Show that enabled is true.
+ get_and_check_value enabled true "--port=${MEMBER_PORT}"
+ check_err $? "'enabled' wasn't implicitly set to true"
+
+ log_test "'Rx_enabled' implicit changes"
+}
+
+team_test_tx_enabled_implicit_changes()
+{
+ export RET=0
+
+ attach_port_if_specified "${MEMBER_PORT}"
+ check_err $? "Couldn't attach ${MEMBER_PORT} to master"
+
+ # Set enabled to true.
+ set_and_check_get enabled true "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'enabled' to true"
+
+ # Set tx_enabled to false.
+ set_and_check_get tx_enabled false "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'tx_enabled' to false"
+
+ # Show that enabled is false.
+ get_and_check_value enabled false "--port=${MEMBER_PORT}"
+ check_err $? "'enabled' wasn't implicitly set to false"
+
+ # Set tx_enabled to true.
+ set_and_check_get tx_enabled true "--port=${MEMBER_PORT}"
+ check_err $? "Failed to set 'tx_enabled' to true"
+
+ # Show that enabled is true.
+ get_and_check_value enabled true "--port=${MEMBER_PORT}"
+ check_err $? "'enabled' wasn't implicitly set to true"
+
+ log_test "'Tx_enabled' implicit changes"
+}
+
+
require_command teamnl
setup
tests_run
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related
* [PATCH net-next v7 09/10] net: team: Add new tx_enabled team port option
From: Marc Harvey @ 2026-04-09 2:59 UTC (permalink / raw)
To: Jiri Pirko, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Shuah Khan, Simon Horman
Cc: netdev, linux-kernel, linux-kselftest, Kuniyuki Iwashima,
Marc Harvey, Jiri Pirko
In-Reply-To: <20260409-teaming-driver-internal-v7-0-f47e7589685d@google.com>
This option allows independent control over tx enablement without
affecting rx enablement. Like the rx_enabled option, this also
implicitly affects the enabled option.
If this option is not used, then the enabled option will continue to
behave as it did before.
Tested in a follow-up patch with a new selftest.
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
---
Changes in v4:
- New patch: split from the original monolithic v3 patch "net: team:
Decouple rx and tx enablement in the team driver".
- Link to v3: https://lore.kernel.org/netdev/20260402-teaming-driver-internal-v3-6-e8cfdec3b5c2@google.com/
---
drivers/net/team/team_core.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 67f77de4cf10..0c87f9972457 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -978,6 +978,21 @@ static void __team_port_enable_tx(struct team *team,
team_tx_port_index_hash(team, port->tx_index));
}
+static void team_port_enable_tx(struct team *team,
+ struct team_port *port)
+{
+ if (team_port_tx_enabled(port))
+ return;
+
+ __team_port_enable_tx(team, port);
+ team_adjust_ops(team);
+ team_queue_override_port_add(team, port);
+
+ /* Don't rejoin multicast, since this port might not be receiving. */
+ team_notify_peers(team);
+ team_lower_state_changed(port);
+}
+
static void __reconstruct_port_hlist(struct team *team, int rm_index)
{
struct hlist_head *tx_port_index_hash;
@@ -1007,6 +1022,19 @@ static void __team_port_disable_tx(struct team *team,
WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1);
}
+static void team_port_disable_tx(struct team *team,
+ struct team_port *port)
+{
+ if (!team_port_tx_enabled(port))
+ return;
+
+ __team_port_disable_tx(team, port);
+
+ team_queue_override_port_del(team, port);
+ team_adjust_ops(team);
+ team_lower_state_changed(port);
+}
+
/*
* Enable TX AND RX on the port.
*/
@@ -1529,6 +1557,26 @@ static int team_port_rx_en_option_set(struct team *team,
return 0;
}
+static void team_port_tx_en_option_get(struct team *team,
+ struct team_gsetter_ctx *ctx)
+{
+ struct team_port *port = ctx->info->port;
+
+ ctx->data.bool_val = team_port_tx_enabled(port);
+}
+
+static int team_port_tx_en_option_set(struct team *team,
+ struct team_gsetter_ctx *ctx)
+{
+ struct team_port *port = ctx->info->port;
+
+ if (ctx->data.bool_val)
+ team_port_enable_tx(team, port);
+ else
+ team_port_disable_tx(team, port);
+ return 0;
+}
+
static void team_user_linkup_option_get(struct team *team,
struct team_gsetter_ctx *ctx)
{
@@ -1657,6 +1705,13 @@ static const struct team_option team_options[] = {
.getter = team_port_rx_en_option_get,
.setter = team_port_rx_en_option_set,
},
+ {
+ .name = "tx_enabled",
+ .type = TEAM_OPTION_TYPE_BOOL,
+ .per_port = true,
+ .getter = team_port_tx_en_option_get,
+ .setter = team_port_tx_en_option_set,
+ },
{
.name = "user_linkup",
.type = TEAM_OPTION_TYPE_BOOL,
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related
* [PATCH net-next v7 08/10] net: team: Add new rx_enabled team port option
From: Marc Harvey @ 2026-04-09 2:59 UTC (permalink / raw)
To: Jiri Pirko, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Shuah Khan, Simon Horman
Cc: netdev, linux-kernel, linux-kselftest, Kuniyuki Iwashima,
Marc Harvey, Jiri Pirko
In-Reply-To: <20260409-teaming-driver-internal-v7-0-f47e7589685d@google.com>
Allow independent control over rx enablement via the rx_enabled option
without affecting tx enablement. This affects the normal enabled
option since a port is only considered enabled if both tx and rx are
enabled.
If this option is not used, then the enabled option will continue to
behave exactly as it did before.
Tested in a follow-up patch with a new selftest.
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Marc Harvey <marcharvey@google.com>
---
Changes in v4:
- New patch: split from the original monolithic v3 patch "net: team:
Decouple rx and tx enablement in the team driver".
- Link to v3: https://lore.kernel.org/netdev/20260402-teaming-driver-internal-v3-6-e8cfdec3b5c2@google.com/
---
drivers/net/team/team_core.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index e437099a5a17..67f77de4cf10 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -941,6 +941,28 @@ static void __team_port_disable_rx(struct team *team,
WRITE_ONCE(port->rx_enabled, false);
}
+static void team_port_enable_rx(struct team *team,
+ struct team_port *port)
+{
+ if (team_port_rx_enabled(port))
+ return;
+
+ __team_port_enable_rx(team, port);
+ team_adjust_ops(team);
+ team_notify_peers(team);
+ team_mcast_rejoin(team);
+}
+
+static void team_port_disable_rx(struct team *team,
+ struct team_port *port)
+{
+ if (!team_port_rx_enabled(port))
+ return;
+
+ __team_port_disable_rx(team, port);
+ team_adjust_ops(team);
+}
+
/*
* Enable just TX on the port by adding to tx-enabled port hashlist and
* setting port->tx_index (Might be racy so reader could see incorrect
@@ -1487,6 +1509,26 @@ static int team_port_en_option_set(struct team *team,
return 0;
}
+static void team_port_rx_en_option_get(struct team *team,
+ struct team_gsetter_ctx *ctx)
+{
+ struct team_port *port = ctx->info->port;
+
+ ctx->data.bool_val = team_port_rx_enabled(port);
+}
+
+static int team_port_rx_en_option_set(struct team *team,
+ struct team_gsetter_ctx *ctx)
+{
+ struct team_port *port = ctx->info->port;
+
+ if (ctx->data.bool_val)
+ team_port_enable_rx(team, port);
+ else
+ team_port_disable_rx(team, port);
+ return 0;
+}
+
static void team_user_linkup_option_get(struct team *team,
struct team_gsetter_ctx *ctx)
{
@@ -1608,6 +1650,13 @@ static const struct team_option team_options[] = {
.getter = team_port_en_option_get,
.setter = team_port_en_option_set,
},
+ {
+ .name = "rx_enabled",
+ .type = TEAM_OPTION_TYPE_BOOL,
+ .per_port = true,
+ .getter = team_port_rx_en_option_get,
+ .setter = team_port_rx_en_option_set,
+ },
{
.name = "user_linkup",
.type = TEAM_OPTION_TYPE_BOOL,
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox