* [PATCH net-next v2 7/8] net: mdio: realtek-rtl9300: Add support for RTL838x
From: Markus Stockhausen @ 2026-06-29 15:23 UTC (permalink / raw)
To: andrew, hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
Cc: Markus Stockhausen
In-Reply-To: <20260629152336.2239826-1-markus.stockhausen@gmx.de>
The MDIO driver has been prepared for multiple device support. Add all
required bits for the RTL838x (aka maple) series. This is straightforward
but some things are worth mentioning.
- The device has a lot in common with the RTL930x series. 28 ports, 4096
(Realtek) pages, 4 MMIO registers
- The MDIO engine has no fail bit. Thus the mask is set to zero
- There is only one SMI bus for 1G PHYs. No bus_map_base register exists.
- The setup_controller() function needs no c45 setup but must activate
the PHY access.
Signed-off-by: Markus Stockhausen <markus.stockhausen@gmx.de>
---
drivers/net/mdio/mdio-realtek-rtl9300.c | 108 ++++++++++++++++++++++++
1 file changed, 108 insertions(+)
diff --git a/drivers/net/mdio/mdio-realtek-rtl9300.c b/drivers/net/mdio/mdio-realtek-rtl9300.c
index 206f4e85b82d..24a281b46526 100644
--- a/drivers/net/mdio/mdio-realtek-rtl9300.c
+++ b/drivers/net/mdio/mdio-realtek-rtl9300.c
@@ -117,6 +117,28 @@
#include <linux/property.h>
#include <linux/regmap.h>
+#define RTL8380_NUM_BUSES 1
+#define RTL8380_NUM_PAGES 4096
+#define RTL8380_NUM_PORTS 28
+#define RTL8380_SMI_GLB_CTRL 0xa100
+#define RTL8380_SMI_PHY_PATCH_DONE BIT(15)
+#define RTL8380_SMI_ACCESS_PHY_CTRL_0 0xa1b8
+#define RTL8380_SMI_ACCESS_PHY_CTRL_1 0xa1bc
+#define RTL8380_PHY_CTRL_REG_ADDR GENMASK(24, 20)
+#define RTL8380_PHY_CTRL_PARK_PAGE GENMASK(19, 15)
+#define RTL8380_PHY_CTRL_MAIN_PAGE GENMASK(14, 3)
+#define RTL8380_PHY_CTRL_WRITE BIT(2)
+#define RTL8380_PHY_CTRL_READ 0
+#define RTL8380_PHY_CTRL_TYPE_C45 BIT(1)
+#define RTL8380_PHY_CTRL_TYPE_C22 0
+#define RTL8380_PHY_CTRL_FAIL 0 /* no fail indicator */
+#define RTL8380_SMI_ACCESS_PHY_CTRL_2 0xa1c0
+#define RTL8380_PHY_CTRL_INDATA GENMASK(31, 16)
+#define RTL8380_PHY_CTRL_DATA GENMASK(15, 0)
+#define RTL8380_SMI_ACCESS_PHY_CTRL_3 0xa1c4
+#define RTL8380_SMI_POLL_CTRL 0xa17c
+#define RTL8380_SMI_PORT0_5_ADDR_CTRL 0xa1c8
+
#define RTL9300_NUM_BUSES 4
#define RTL9300_NUM_PAGES 4096
#define RTL9300_NUM_PORTS 28
@@ -389,6 +411,60 @@ static int otto_emdio_write_cmd(struct mii_bus *bus, u32 cmd,
return otto_emdio_run_cmd(bus, cmd | priv->info->cmd_write, cmd_data);
}
+static int otto_emdio_8380_read_c22(struct mii_bus *bus, int port, int regnum, u32 *value)
+{
+ struct otto_emdio_priv *priv = otto_emdio_bus_to_priv(bus);
+ struct otto_emdio_cmd_regs cmd_data = {
+ .c22_data = FIELD_PREP(RTL8380_PHY_CTRL_REG_ADDR, regnum) |
+ FIELD_PREP(RTL8380_PHY_CTRL_PARK_PAGE, 0x1f) |
+ FIELD_PREP(RTL8380_PHY_CTRL_MAIN_PAGE, priv->page[port]),
+ .io_data = FIELD_PREP(RTL8380_PHY_CTRL_INDATA, port),
+ };
+
+ return otto_emdio_read_cmd(bus, RTL8380_PHY_CTRL_TYPE_C22, &cmd_data,
+ RTL8380_PHY_CTRL_DATA, value);
+}
+
+static int otto_emdio_8380_write_c22(struct mii_bus *bus, int port, int regnum, u16 value)
+{
+ struct otto_emdio_priv *priv = otto_emdio_bus_to_priv(bus);
+ struct otto_emdio_cmd_regs cmd_data = {
+ .c22_data = FIELD_PREP(RTL8380_PHY_CTRL_REG_ADDR, regnum) |
+ FIELD_PREP(RTL8380_PHY_CTRL_PARK_PAGE, 0x1f) |
+ FIELD_PREP(RTL8380_PHY_CTRL_MAIN_PAGE, priv->page[port]),
+ .io_data = FIELD_PREP(RTL8380_PHY_CTRL_INDATA, value),
+ .port_mask_low = BIT(port),
+ };
+
+ return otto_emdio_write_cmd(bus, RTL8380_PHY_CTRL_TYPE_C22, &cmd_data);
+}
+
+static int otto_emdio_8380_read_c45(struct mii_bus *bus, int port,
+ int dev_addr, int regnum, u32 *value)
+{
+ struct otto_emdio_cmd_regs cmd_data = {
+ .c45_data = FIELD_PREP(PHY_CTRL_MMD_DEVAD, dev_addr) |
+ FIELD_PREP(PHY_CTRL_MMD_REG, regnum),
+ .io_data = FIELD_PREP(RTL8380_PHY_CTRL_INDATA, port),
+ };
+
+ return otto_emdio_read_cmd(bus, RTL8380_PHY_CTRL_TYPE_C45, &cmd_data,
+ RTL8380_PHY_CTRL_DATA, value);
+}
+
+static int otto_emdio_8380_write_c45(struct mii_bus *bus, int port,
+ int dev_addr, int regnum, u16 value)
+{
+ struct otto_emdio_cmd_regs cmd_data = {
+ .c45_data = FIELD_PREP(PHY_CTRL_MMD_DEVAD, dev_addr) |
+ FIELD_PREP(PHY_CTRL_MMD_REG, regnum),
+ .io_data = FIELD_PREP(RTL8380_PHY_CTRL_INDATA, value),
+ .port_mask_low = BIT(port),
+ };
+
+ return otto_emdio_write_cmd(bus, RTL8380_PHY_CTRL_TYPE_C45, &cmd_data);
+}
+
static int otto_emdio_9300_read_c22(struct mii_bus *bus, int port, int regnum, u32 *value)
{
struct otto_emdio_priv *priv = otto_emdio_bus_to_priv(bus);
@@ -619,6 +695,15 @@ static int otto_emdio_setup_topology(struct otto_emdio_priv *priv)
return 0;
}
+static int otto_emdio_8380_setup_controller(struct otto_emdio_priv *priv)
+{
+ /*
+ * PHY_PATCH_DONE enables PHY control via SoC. This is required for PHY access, including
+ * patching and must be set before the PHYs are probed.
+ */
+ return regmap_set_bits(priv->regmap, RTL8380_SMI_GLB_CTRL, RTL8380_SMI_PHY_PATCH_DONE);
+}
+
static int otto_emdio_9300_setup_controller(struct otto_emdio_priv *priv)
{
u32 glb_ctrl_mask = 0, glb_ctrl_val = 0;
@@ -916,6 +1001,28 @@ static int otto_emdio_probe(struct platform_device *pdev)
return 0;
}
+static const struct otto_emdio_info otto_emdio_8380_info = {
+ .addr_map_base = RTL8380_SMI_PORT0_5_ADDR_CTRL,
+ .cmd_fail = RTL8380_PHY_CTRL_FAIL,
+ .cmd_read = RTL8380_PHY_CTRL_READ,
+ .cmd_write = RTL8380_PHY_CTRL_WRITE,
+ .cmd_regs = {
+ .c22_data = RTL8380_SMI_ACCESS_PHY_CTRL_1,
+ .c45_data = RTL8380_SMI_ACCESS_PHY_CTRL_3,
+ .io_data = RTL8380_SMI_ACCESS_PHY_CTRL_2,
+ .port_mask_low = RTL8380_SMI_ACCESS_PHY_CTRL_0,
+ },
+ .num_buses = RTL8380_NUM_BUSES,
+ .num_pages = RTL8380_NUM_PAGES,
+ .num_ports = RTL8380_NUM_PORTS,
+ .poll_ctrl = RTL8380_SMI_POLL_CTRL,
+ .setup_controller = otto_emdio_8380_setup_controller,
+ .read_c22 = otto_emdio_8380_read_c22,
+ .read_c45 = otto_emdio_8380_read_c45,
+ .write_c22 = otto_emdio_8380_write_c22,
+ .write_c45 = otto_emdio_8380_write_c45,
+};
+
static const struct otto_emdio_info otto_emdio_9300_info = {
.addr_map_base = RTL9300_SMI_PORT0_5_ADDR_CTRL,
.bus_map_base = RTL9300_SMI_PORT0_15_POLLING_SEL,
@@ -966,6 +1073,7 @@ static const struct otto_emdio_info otto_emdio_9310_info = {
};
static const struct of_device_id otto_emdio_ids[] = {
+ { .compatible = "realtek,rtl8380-mdio", .data = &otto_emdio_8380_info },
{ .compatible = "realtek,rtl9301-mdio", .data = &otto_emdio_9300_info },
{ .compatible = "realtek,rtl9311-mdio", .data = &otto_emdio_9310_info },
{}
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v2 4/8] net: mdio: realtek-rtl9300: Configure hardware polling during probing
From: Markus Stockhausen @ 2026-06-29 15:23 UTC (permalink / raw)
To: andrew, hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
Cc: Markus Stockhausen
In-Reply-To: <20260629152336.2239826-1-markus.stockhausen@gmx.de>
During PHY probing and configuration complex configuration sequences
might be issued and firmware might be loaded. Hardware polling can
interfere badly with that. E.g. a hardware polling MMD c45 over c22
request might break an ongoing firmware loading sequence.
To avoid such issues the polling of the Realtek Otto switches can be
(de)activated with one or two 32 bit mask registers. Each bit enables
(=1) or disables (=0) the polling of the corresponding port. Make use
of this as follows:
- Disable polling for all ports when the MDIO driver starts.
- Reenable polling just after the PHY has been attached.
- Disable polling just before the PHY is being detached.
The different devices will need an individual polling setup. For
this provide two callbacks that will be used later for coding
similar to [1] or [2].
- init_polling(): After polling has been disabled during probing.
- tune_polling(): Before polling gets reactivated for one PHY.
This synchronizes the kernel and hardware polling to some extent.
It gracefully handles deferred probing of PHYs in case the driver
is loaded asynchronously during boot. Additionally it brings the
hardware polling into a consistent operation mode for devices
where U-Boot does not take care.
[1] https://github.com/openwrt/openwrt/blob/main/target/linux/realtek/files-6.18/drivers/net/mdio/mdio-realtek-otto.c#L818
[2] https://lore.kernel.org/netdev/680696024a8648535ce6dee771fe4de67802e0e8.1769053496.git.daniel@makrotopia.org/
Signed-off-by: Markus Stockhausen <markus.stockhausen@gmx.de>
---
drivers/net/mdio/mdio-realtek-rtl9300.c | 87 +++++++++++++++++++++++++
1 file changed, 87 insertions(+)
diff --git a/drivers/net/mdio/mdio-realtek-rtl9300.c b/drivers/net/mdio/mdio-realtek-rtl9300.c
index 616edcde15d9..a8e9a497a0dc 100644
--- a/drivers/net/mdio/mdio-realtek-rtl9300.c
+++ b/drivers/net/mdio/mdio-realtek-rtl9300.c
@@ -137,6 +137,7 @@
#define RTL9300_PHY_CTRL_INDATA GENMASK(31, 16)
#define RTL9300_PHY_CTRL_DATA GENMASK(15, 0)
#define RTL9300_SMI_ACCESS_PHY_CTRL_3 0xcb7c
+#define RTL9300_SMI_POLL_CTRL 0xca90
#define RTL9300_SMI_PORT0_5_ADDR_CTRL 0xcb80
#define RTL9310_NUM_BUSES 4
@@ -162,6 +163,7 @@
#define RTL9310_PHY_CTRL_INDATA GENMASK(15, 0)
#define RTL9310_SMI_INDRT_ACCESS_MMD_CTRL 0x0c18
#define RTL9310_SMI_PORT_ADDR_CTRL 0x0c74
+#define RTL9310_SMI_PORT_POLLING_CTRL 0x0ccc
#define RTL9310_SMI_PORT_POLLING_SEL 0x0c9c
#define PHY_CTRL_CMD BIT(0)
@@ -192,6 +194,7 @@ struct otto_emdio_priv {
const struct otto_emdio_info *info;
struct regmap *regmap;
struct mutex lock; /* protect HW access */
+ DECLARE_BITMAP(phy_poll, MAX_PORTS);
DECLARE_BITMAP(valid_ports, MAX_PORTS);
u16 page[MAX_PORTS];
u8 smi_bus[MAX_PORTS];
@@ -210,6 +213,9 @@ struct otto_emdio_info {
u8 num_buses;
u8 num_ports;
u16 num_pages;
+ u32 poll_ctrl;
+ int (*init_polling)(int port);
+ int (*tune_polling)(struct phy_device *phydev);
int (*setup_controller)(struct otto_emdio_priv *priv);
int (*read_c22)(struct mii_bus *bus, int port, int regnum, u32 *value);
int (*read_c45)(struct mii_bus *bus, int port, int dev_addr, int regnum, u32 *value);
@@ -245,6 +251,14 @@ static struct otto_emdio_priv *otto_emdio_bus_to_priv(struct mii_bus *bus)
return chan->priv;
}
+static int otto_emdio_set_port_polling(struct otto_emdio_priv *priv, int port, bool active)
+{
+ lockdep_assert_held(&priv->lock);
+
+ return regmap_assign_bits(priv->regmap, priv->info->poll_ctrl + (port / 32) * 4,
+ BIT(port % 32), active);
+}
+
static int otto_emdio_run_cmd(struct mii_bus *bus, u32 cmd,
struct otto_emdio_cmd_regs *cmd_data)
{
@@ -588,6 +602,49 @@ static int otto_emdio_9310_setup_controller(struct otto_emdio_priv *priv)
return 0;
}
+static int otto_emdio_notify_phy_attach(struct phy_device *phydev)
+{
+ struct otto_emdio_priv *priv = otto_emdio_bus_to_priv(phydev->mdio.bus);
+ int port = otto_emdio_phy_to_port(phydev->mdio.bus, phydev->mdio.addr);
+ int ret;
+
+ if (port < 0)
+ return port;
+
+ if (test_bit(port, priv->phy_poll))
+ return 0;
+
+ scoped_guard(mutex, &priv->lock) {
+ if (priv->info->tune_polling) {
+ ret = priv->info->tune_polling(phydev);
+ if (ret)
+ return ret;
+ }
+
+ ret = otto_emdio_set_port_polling(priv, port, true);
+ if (!ret)
+ __set_bit(port, priv->phy_poll);
+ }
+
+ return ret;
+}
+
+static void otto_emdio_notify_phy_detach(struct phy_device *phydev)
+{
+ struct otto_emdio_priv *priv = otto_emdio_bus_to_priv(phydev->mdio.bus);
+ int port = otto_emdio_phy_to_port(phydev->mdio.bus, phydev->mdio.addr);
+ struct mii_bus *bus = phydev->mdio.bus;
+
+ if (port < 0)
+ return;
+
+ scoped_guard(mutex, &priv->lock) {
+ __clear_bit(port, priv->phy_poll);
+ if (otto_emdio_set_port_polling(priv, port, false))
+ dev_err(bus->parent, "failed to disable polling for port %d\n", port);
+ }
+}
+
static int otto_emdio_probe_one(struct device *dev, struct otto_emdio_priv *priv,
struct fwnode_handle *node)
{
@@ -617,6 +674,9 @@ static int otto_emdio_probe_one(struct device *dev, struct otto_emdio_priv *priv
bus->write = otto_emdio_write_c22;
}
bus->parent = dev;
+ bus->notify_phy_attach = otto_emdio_notify_phy_attach;
+ bus->notify_phy_detach = otto_emdio_notify_phy_detach;
+
chan = bus->priv;
chan->mdio_bus = mdio_bus;
chan->priv = priv;
@@ -733,6 +793,27 @@ static int otto_emdio_map_ports(struct device *dev)
return err;
}
+static int otto_emdio_init_polling(struct otto_emdio_priv *priv)
+{
+ int err;
+
+ scoped_guard(mutex, &priv->lock) {
+ for (int port = 0; port < priv->info->num_ports; port++) {
+ err = otto_emdio_set_port_polling(priv, port, false);
+ if (err)
+ return err;
+
+ if (priv->info->init_polling) {
+ err = priv->info->init_polling(port);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int otto_emdio_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -752,6 +833,10 @@ static int otto_emdio_probe(struct platform_device *pdev)
if (IS_ERR(priv->regmap))
return PTR_ERR(priv->regmap);
+ err = otto_emdio_init_polling(priv);
+ if (err)
+ return err;
+
platform_set_drvdata(pdev, priv);
err = otto_emdio_map_ports(dev);
@@ -792,6 +877,7 @@ static const struct otto_emdio_info otto_emdio_9300_info = {
.num_buses = RTL9300_NUM_BUSES,
.num_ports = RTL9300_NUM_PORTS,
.num_pages = RTL9300_NUM_PAGES,
+ .poll_ctrl = RTL9300_SMI_POLL_CTRL,
.setup_controller = otto_emdio_9300_setup_controller,
.read_c22 = otto_emdio_9300_read_c22,
.read_c45 = otto_emdio_9300_read_c45,
@@ -817,6 +903,7 @@ static const struct otto_emdio_info otto_emdio_9310_info = {
.num_buses = RTL9310_NUM_BUSES,
.num_pages = RTL9310_NUM_PAGES,
.num_ports = RTL9310_NUM_PORTS,
+ .poll_ctrl = RTL9310_SMI_PORT_POLLING_CTRL,
.setup_controller = otto_emdio_9310_setup_controller,
.read_c22 = otto_emdio_9310_read_c22,
.read_c45 = otto_emdio_9310_read_c45,
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v2 3/8] net: phy: add (*notify_phy_attach/detach)() hooks to struct mii_bus
From: Markus Stockhausen @ 2026-06-29 15:23 UTC (permalink / raw)
To: andrew, hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
Cc: Markus Stockhausen
In-Reply-To: <20260629152336.2239826-1-markus.stockhausen@gmx.de>
From: Daniel Golle <daniel@makrotopia.org>
Some MDIO buses require to program PHY polling registers depending on
the PHY type. RealTek switch SoCs are the most prominent example of a
DSA switch which doesn't allow to program MAC speed, duplex and
flow-control settings without using PHY polling to do so [1].
Avoid a half-baked solution in the MDIO bus driver because
- it must reinvent the bus scanning to determine the PHYs and
- it must anticipate the right point in time (e.g. deferred PHYs).
Hence there is a need to inform the MDIO bus driver that a PHY is
being attached or detached. Provide two simple hooks in struct
mii_bus which are called
- right after a PHY has been attached
- just before the PHY is going to be detached
Remark! A slightly different version of this patch was part of a
former series [2]. The discussion already showed that an initialization
hook should be placed somewhere late during the whole setup. This
commit implants it right after phy_init_hw() as suggested. On top of
this it adds the detach hook.
[1] https://github.com/openwrt/openwrt/pull/21515#discussion_r2714069716
[2] https://lore.kernel.org/netdev/cover.1769053496.git.daniel@makrotopia.org/
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Markus Stockhausen <markus.stockhausen@gmx.de>
---
drivers/net/phy/phy_device.c | 9 +++++++++
include/linux/phy.h | 4 ++++
2 files changed, 13 insertions(+)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0615228459ef..676cbf183350 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1876,6 +1876,12 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
if (err)
goto error;
+ if (phydev->mdio.bus->notify_phy_attach) {
+ err = phydev->mdio.bus->notify_phy_attach(phydev);
+ if (err)
+ goto error;
+ }
+
phy_resume(phydev);
/**
@@ -1919,6 +1925,9 @@ void phy_detach(struct phy_device *phydev)
struct module *ndev_owner = NULL;
struct mii_bus *bus;
+ if (phydev->mdio.bus->notify_phy_detach)
+ phydev->mdio.bus->notify_phy_detach(phydev);
+
if (phydev->devlink) {
device_link_del(phydev->devlink);
phydev->devlink = NULL;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 199a7aaa341b..3160ca99deab 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -376,6 +376,10 @@ struct mii_bus {
int regnum, u16 val);
/** @reset: Perform a reset of the bus */
int (*reset)(struct mii_bus *bus);
+ /** @notify_phy_attach: Perform post-attach handling */
+ int (*notify_phy_attach)(struct phy_device *phydev);
+ /** @notify_phy_detach: Perform pre-detach handling */
+ void (*notify_phy_detach)(struct phy_device *phydev);
/** @stats: Statistic counters per device on the bus */
struct mdio_bus_stats stats[PHY_MAX_ADDR];
--
2.54.0
^ permalink raw reply related
* Re: [PATCH net-next] Documentation: networking: Add a test plan for ethtool pause validation
From: Maxime Chevallier @ 2026-06-29 15:24 UTC (permalink / raw)
To: Andrew Lunn, Jakub Kicinski
Cc: davem, Eric Dumazet, Paolo Abeni, Simon Horman, Russell King,
Heiner Kallweit, Jonathan Corbet, Shuah Khan, Oleksij Rempel,
Vladimir Oltean, Florian Fainelli, thomas.petazzoni, netdev,
linux-kernel, linux-doc
In-Reply-To: <3b8abe17-5da7-4a7e-a42c-eb39a631843e@lunn.ch>
Hi Jakub, Andrew,
On 6/28/26 01:46, Andrew Lunn wrote:
> On Sat, Jun 27, 2026 at 02:30:28PM -0700, Jakub Kicinski wrote:
>> On Sat, 27 Jun 2026 07:34:31 +0200 Maxime Chevallier wrote:
>>>> This is very far from what existing python tests do in netdev.
>>>
>>> We can probably drop the class, as it is with this discussion, it's merely a way
>>> to regroup doc common to similar tests. The rest really is the usual set of
>>> ksft funcs you can feed to the run function, with a set of ksft_ethtool_*
>>> annotators for generic checks.
>>
>> The common way of checking prereqs in the tests is to call a function
>> called require_xyz() which then raises a skip. At a quick glance - the
>> rss_api and xdp_metadata are good tests to get a sense of the usual format.
>
> The counter example is the ksft_disruptive() decorator.
>
> Pythons own unittest framework makes use of decorators to skip
> tests. Its the Pythonic way.
So maybe in the end, we can try to have something a bit less python-y, while still
using extensive documentation using sphynx doc format ?
Let me send a V2 with the full test list, we'll see how much scaffolding
we can build for ethtool testing, and how. I suspect that running/skipping based on
the device's capabilities is going to be used throughout lots of tests
beyond pause.
For now the important part is to get that test list right, and iterate on the
test implementation once we agree on what to test, why and how.
Maxime
^ permalink raw reply
* Re: [PATCH net 1/1] sctp: avoid auth_enable sysctl UAF during netns teardown
From: tt roxy @ 2026-06-29 15:31 UTC (permalink / raw)
To: Xin Long
Cc: Ren Wei, linux-sctp, netdev, marcelo.leitner, davem, edumazet,
pabeni, horms, matttbe, yuantan098, yifanwucs, tomapufckgml, bird
In-Reply-To: <CADvbK_dn+1qsxgF_LXyBFC+Lep91bCgBDdynx_8c5QnQHp85eA@mail.gmail.com>
On Mon, Jun 29, 2026 at 10:23 PM Xin Long <lucien.xin@gmail.com> wrote:
>
> On Mon, Jun 29, 2026 at 10:04 AM Xin Long <lucien.xin@gmail.com> wrote:
> >
> > On Sun, Jun 28, 2026 at 4:40 AM Ren Wei <n05ec@lzu.edu.cn> wrote:
> > >
> > > From: Zhiling Zou <roxy520tt@gmail.com>
> > >
> > > proc_sctp_do_auth() updates the SCTP control socket after changing
> > > net.sctp.auth_enable. The handler gets the per-net SCTP state from
> > > ctl->data, so an already opened sysctl file can still target a network
> > > namespace while that namespace is being torn down.
> > >
> > > SCTP unregisters its per-net sysctls from sctp_defaults_exit(), but
> > > sctp_ctrlsock_exit() runs earlier because the control-socket pernet ops
> > > are registered after the defaults ops. This leaves a teardown window
> > > where auth_enable is still writable after inet_ctl_sock_destroy() has
> > > released net->sctp.ctl_sock, leading to a use-after-free when the sysctl
> > > handler locks and dereferences the stale socket.
> > >
> > > Unregister the per-net SCTP sysctl table before destroying the control
> > > socket. Make sctp_sysctl_net_unregister() tolerate a missing header and
> > > clear the saved pointer so the later defaults exit path and init-error
> > > path can safely share the same unregister helper.
> > >
> > > Fixes: 15649fd5415e ("sctp: sysctl: auth_enable: avoid using current->nsproxy")
> > > Cc: stable@vger.kernel.org
> > > Reported-by: Yuan Tan <yuantan098@gmail.com>
> > > Reported-by: Yifan Wu <yifanwucs@gmail.com>
> > > Reported-by: Juefei Pu <tomapufckgml@gmail.com>
> > > Reported-by: Xin Liu <bird@lzu.edu.cn>
> > > Assisted-by: Codex:gpt-5.4
> > > Signed-off-by: Zhiling Zou <roxy520tt@gmail.com>
> > > Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
> > > ---
> > > net/sctp/protocol.c | 3 +++
> > > net/sctp/sysctl.c | 9 +++++++--
> > > 2 files changed, 10 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> > > index 587b0017a67d..ae381d304bd5 100644
> > > --- a/net/sctp/protocol.c
> > > +++ b/net/sctp/protocol.c
> > > @@ -1457,8 +1457,11 @@ static int __net_init sctp_ctrlsock_init(struct net *net)
> > >
> > > static void __net_exit sctp_ctrlsock_exit(struct net *net)
> > > {
> > > + sctp_sysctl_net_unregister(net);
> > > +
> > > /* Free the control endpoint. */
> > > inet_ctl_sock_destroy(net->sctp.ctl_sock);
> > > + net->sctp.ctl_sock = NULL;
> > > }
> > >
> > > static struct pernet_operations sctp_ctrlsock_ops = {
> > > diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> > > index 15e7db9a3ab2..fca840484ebf 100644
> > > --- a/net/sctp/sysctl.c
> > > +++ b/net/sctp/sysctl.c
> > > @@ -615,11 +615,16 @@ int sctp_sysctl_net_register(struct net *net)
> > >
> > > void sctp_sysctl_net_unregister(struct net *net)
> > > {
> > > + struct ctl_table_header *header = net->sctp.sysctl_header;
> > > const struct ctl_table *table;
> > >
> > > - table = net->sctp.sysctl_header->ctl_table_arg;
> > > - unregister_net_sysctl_table(net->sctp.sysctl_header);
> > > + if (!header)
> > > + return;
> > > +
> > > + table = header->ctl_table_arg;
> > > + unregister_net_sysctl_table(header);
> > > kfree(table);
> > > + net->sctp.sysctl_header = NULL;
> > > }
> > >
> > > static struct ctl_table_header *sctp_sysctl_header;
> > > --
> > > 2.43.0
> > >
> >
> > Please also move sctp_sysctl_net_register() to sctp_ctrlsock_init(), and call
> > it AFTER sctp_ctl_sock_init().
> >
> > This is not just for being symmetric, but also fixes two problems:
> >
> > 1. A regression caused by this patch:
> >
> > If sctp_v4_protosw_init() or sctp_v6_protosw_init() fails in sctp_init(),
> > there's no place to call sctp_sysctl_net_unregister() on the err path.
> >
> > 2. A pre-existing issue reported by sashiko-gemini:
> >
> > > diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> > > index 15e7db9a3ab2e..fca840484ebf7 100644
> > > --- a/net/sctp/sysctl.c
> > > +++ b/net/sctp/sysctl.c
> > > @@ -615,11 +615,16 @@ int sctp_sysctl_net_register(struct net *net)
> > >
> > > void sctp_sysctl_net_unregister(struct net *net)
> > > {
> > > + struct ctl_table_header *header = net->sctp.sysctl_header;
> > > const struct ctl_table *table;
> > This is a pre-existing issue, but I noticed a potential race condition
> > during SCTP module initialization related to the sysctls modified here.
> > During sctp_init(), sctp_defaults_ops registers the sysctls globally before
> > sctp_ctrlsock_ops allocates net->sctp.ctl_sock:
> > sctp_init() {
> > ...
> > status = register_pernet_subsys(&sctp_defaults_ops);
> > if (status)
> > goto err_register_defaults;
> > ...
> > status = register_pernet_subsys(&sctp_ctrlsock_ops);
> > ...
> > }
> > If userspace accesses the sysctls in this window, proc_sctp_do_auth() could
> > dereference a NULL pointer since it assumes ctl_sock is ready:
> > proc_sctp_do_auth() {
> > ...
> > struct sock *sk = net->sctp.ctl_sock;
> > net->sctp.auth_enable = new_value;
> > /* Update the value in the control socket */
> > lock_sock(sk);
> > ...
> > }
> > Can we hit a kernel panic here if the sysctl is modified during automatic
> > module loading?
> > [...]
> >
>
> Also, if you don't mind, please try to address another issue reported
> in sashiko-gemini:
>
> > diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> > index 587b0017a67d5..ae381d304bd53 100644
> > --- a/net/sctp/protocol.c
> > +++ b/net/sctp/protocol.c
> > @@ -1457,8 +1457,11 @@ static int __net_init sctp_ctrlsock_init(struct net *net)
> >
> > static void __net_exit sctp_ctrlsock_exit(struct net *net)
> > {
> This isn't a bug introduced by this patch, but while reviewing the netns
> teardown sequence, I noticed the SCTP UDP tunnel sockets appear to leak.
> In sctp_defaults_exit():
> sctp_defaults_exit() {
> /* Free the local address list */
> sctp_free_addr_wq(net);
> sctp_free_local_addr_list(net);
> ...
> }
> Should sctp_defaults_exit() call sctp_udp_sock_stop(net) to ensure the
> UDP tunnel sockets are closed?
> If a user creates a network namespace, writes to the net.sctp.udp_port sysctl
> to allocate the sockets, and then destroys the namespace, could these sockets
> remain active and cause a use-after-free of struct net when packets arrive?
> [...]
>
> maybe by adding sctp_udp_sock_stop() in sctp_ctrlsock_exit(), and call it AFTER
> sctp_sysctl_net_unregister() in a separate patch.
>
> Thanks.
Thanks for the review.
I addressed both comments in v2. Patch 1 moves the per-net SCTP sysctl
registration after sctp_ctl_sock_init() and keeps the unregister before
destroying the control socket. Patch 2 separately stops the SCTP UDP tunnel
sockets after sysctl unregistration during netns teardown.
I will send the v2 series as a new threaded 0/2, 1/2, 2/2 patch series.
Thanks,
Zhiling
^ permalink raw reply
* [PATCH net] net/sched: act_bpf: use rcu_dereference_bh() to read the filter
From: Sechang Lim @ 2026-06-29 15:41 UTC (permalink / raw)
To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Jamal Hadi Salim, Jiri Pirko
Cc: Daniel Borkmann, John Fastabend, Stanislav Fomichev,
Alexei Starovoitov, Andrii Nakryiko, Martin KaFai Lau,
Simon Horman, bpf, netdev, linux-kernel
tcf_bpf_act() can run from the tc egress path, which holds only
rcu_read_lock_bh(), but reads prog->filter with rcu_dereference() and
trips lockdep:
WARNING: suspicious RCU usage
net/sched/act_bpf.c:47 suspicious rcu_dereference_check() usage!
1 lock held by syz.2.1588/12756:
#0: (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit net/core/dev.c:4792
tcf_bpf_act+0x6ae/0x940 net/sched/act_bpf.c:47
tcf_classify+0x6e4/0x1080 net/sched/cls_api.c:1860
sch_handle_egress net/core/dev.c:4545 [inline]
__dev_queue_xmit+0x2185/0x2c00 net/core/dev.c:4808
packet_sendmsg+0x3dfa/0x5120 net/packet/af_packet.c:3114
The other tc actions and cls_bpf already use rcu_dereference_bh() here.
Do the same.
Fixes: 1f211a1b929c ("net, sched: add clsact qdisc")
Signed-off-by: Sechang Lim <rhkrqnwk98@gmail.com>
---
net/sched/act_bpf.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 58a074651176..09d46e195e33 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -44,7 +44,7 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
tcf_lastuse_update(&prog->tcf_tm);
bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- filter = rcu_dereference(prog->filter);
+ filter = rcu_dereference_bh(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
filter_res = bpf_prog_run_data_pointers(filter, skb);
--
2.43.0
^ permalink raw reply related
* Re: [PATCH bpf-next v5 1/3] bpf: Add BPF_FIB_LOOKUP_VLAN flag to bpf_fib_lookup() helper
From: David Ahern @ 2026-06-29 15:49 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, Avinash Duduskar, ast, daniel,
andrii
Cc: eddyz87, memxor, martin.lau, song, yonghong.song, jolsa, emil,
john.fastabend, sdf, davem, edumazet, kuba, pabeni, horms, shuah,
hawk, yatsenko, leon.hwang, kpsingh, a.s.protopopov, ameryhung,
rongtao, eyal.birger, bpf, netdev, linux-kernel, linux-kselftest
In-Reply-To: <87se65bd04.fsf@toke.dk>
On 6/29/26 9:08 AM, Toke Høiland-Jørgensen wrote:
> David Ahern <dsahern@kernel.org> writes:
>
>> On 6/23/26 9:05 PM, Avinash Duduskar wrote:
>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>> index 89b36de5fdbb..e00f0392e728 100644
>>> --- a/include/uapi/linux/bpf.h
>>> +++ b/include/uapi/linux/bpf.h
>>> @@ -3532,6 +3532,29 @@ union bpf_attr {
>>> * Use the mark present in *params*->mark for the fib lookup.
>>> * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
>>> * as it only has meaning for full lookups.
>>> + * **BPF_FIB_LOOKUP_VLAN**
>>
>> This flag should not be needed. Patches for vlan support were never
>> submitted (I have them in some old branch). Since the vlan params are
>> initialized to 0, no new flag should be needed. Besides, these are
>> output parameters.
>
> There's no enforcement from the kernel side of the parameters being
> zero, though? So we do need the flag for feature detection; unless we
> expect applications to do that out of band? But then we'd need a
> mechanism to do that which could be... the presence of the flag in the
> ENUM (and thus in BTF)? :)
>
This is output direction - return from the fib lookup. It does not make
sense to require a flag to get lookup output. vlan proto of 0 is not
valid, so it is a clear indication that the vlan output parameters were
not set during the lookup.
^ permalink raw reply
* [PATCH] net: neighbour: add neigh_parms_lookup_dev() helper
From: Paritosh Potukuchi @ 2026-06-29 15:57 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, Paritosh Potukuchi, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Kuniyuki Iwashima,
Ido Schimmel, Petr Machata
Provide a helper to lookup neigh_parms associated
with a given (neigh_table, net_device) pair.
The existing lookup_neigh_parms() helper is internal to the
neighbour subsystem and cannot be used by other subsystems.
Some stacked/virtual devices like bond require access to the
underlying device's neigh_parms.
neigh_parms_lookup_dev() is designed to be a wrapper around
lookup_neigh_parms(). The function provides controlled access
to per device neigh_parms.
The caller is expected to hold rcu_read_lock().
This does not break any existing functionality.
Signed-off-by: Paritosh Potukuchi <paritosh.potukuchi@amd.com>
---
include/net/neighbour.h | 2 ++
net/core/neighbour.c | 8 ++++++++
2 files changed, 10 insertions(+)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 8860cc2175fc..1b3b06eda886 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -438,6 +438,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
proc_handler *proc_handler);
void neigh_sysctl_unregister(struct neigh_parms *p);
+struct neigh_parms *neigh_parms_lookup_dev(struct neigh_table *tbl, struct net_device *dev);
+
static inline void __neigh_parms_put(struct neigh_parms *parms)
{
refcount_dec(&parms->refcnt);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1349c0eedb64..6d32c2668af3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1757,6 +1757,14 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
return NULL;
}
+/* Caller must hold rcu_read_lock()*/
+
+struct neigh_parms *neigh_parms_lookup_dev(struct neigh_table *tbl, struct net_device *dev)
+{
+ return lookup_neigh_parms(tbl, dev_net(dev), dev->ifindex);
+}
+EXPORT_SYMBOL(neigh_parms_lookup_dev);
+
struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
struct neigh_table *tbl)
{
--
2.43.0
^ permalink raw reply related
* RE: Ethtool : PRBS feature
From: Das, Shubham @ 2026-06-29 16:15 UTC (permalink / raw)
To: Alexander Duyck, Andrew Lunn
Cc: Lee Trager, Maxime Chevallier, netdev@vger.kernel.org,
mkubecek@suse.cz, D H, Siddaraju, Chintalapalle, Balaji,
Lindberg, Magnus, niklas.damberg@ericsson.com
In-Reply-To: <CAKgT0Ufpp+AVrW4raMw=_PEWQdu+dkp+9xhrt1se2G7=CS83iA@mail.gmail.com>
Hi All,
Below are the proposed modifications to the UAPI, data structures, and Netlink messages to support PRBS/BERT and test pattern configuration.
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 5e9135e3774f..cb11e139dd81 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -30,6 +30,36 @@ definitions:
+ name: phy-test-pattern
+ enum-name: phy-test-pattern
+ type: enum
+ name-prefix: phy-test-pattern-
+ doc: PRBS and other PHY test patterns
+ entries:
+ - off
+ - prbs7
+ - prbs9
+ - prbs11
+ - prbs13
+ - prbs15
+ - prbs23
+ - prbs31
+ - ssprq
+ - prbs13q
+ - prbs31q
+ - square
+ -
+ name: phy-test-action
+ enum-name: phy-test-action
+ type: enum
+ name-prefix: phy-test-action-
+ doc: Actions for PHY BERT test control
+ entries:
+ - none
+ - start
+ - stop
+ - stats
-
name: header-flags
type: flags
@@ -1818,6 +1848,58 @@ attribute-sets:
type: u32
enum: loopback-type
+ -
+ name: phy-test
+ attr-cnt-name: __ethtool-a-phy-test-cnt
+ doc: |
+ PHY test configuration for pattern generation/checking,
+ BERT (Bit Error Rate Test), and statistics.
+ attributes:
+ -
+ name: unspec
+ type: unused
+ value: 0
+ -
+ name: header
+ type: nest
+ nested-attributes: header
+ -
+ name: tx-pattern
+ type: u32
+ doc: TX test pattern type (PRBS or square wave)
+ enum: phy-test-pattern
+ -
+ name: rx-pattern
+ type: u32
+ doc: RX checker pattern type (PRBS or square wave)
+ enum: phy-test-pattern
+ -
+ name: bert-action
+ type: u32
+ doc: BERT test start/stop/stats
+ enum: phy-test-action
+ -
+ name: inject-error-count
+ type: u32
+ doc: |
+ Number of errors to inject. Each invocation injects the specified
+ number of bit errors into the data stream.
+ -
+ name: ber-lock-status
+ type: u8
+ doc: PRBS lock status (1=locked, 0=not locked)
+ -
+ name: ber-error-count
+ type: u64
+ doc: BERT bit error count
+ -
+ name: ber-total-bits-sent
+ type: u64
+ doc: BERT total bits tested
+ -
+ name: supported-test-patterns
+ type: u32
+ doc: Bitmask of supported test patterns
-
name: phy-tunable
@@ -2924,6 +3006,53 @@ operations:
- header
- enabled
- type
+ -
+ name: phy-test-act
+ doc: |
+ Configure PHY test parameters. Each attribute is optional and only
+ specified attributes are applied. TX/RX patterns are set on the
+ local port. BERT and error injection operate on the receiver port.
+ When bert-action is stats, a reply with BERT counters is returned.
+ Typical workflow:
+ ethtool --phy-test eth1 tx-pattern prbs7 (TX side)
+ ethtool --phy-test eth2 rx-pattern prbs7 (RX side)
+ ethtool --phy-test eth2 bert start (start BERT on RX)
+ ethtool --phy-test eth2 bert stats (read counters and lock status)
+ ethtool --phy-test eth2 bert stop (stop BERT)
+
+ attribute-set: phy-test
+
+ do:
+ request:
+ attributes:
+ - header
+ - tx-pattern
+ - rx-pattern
+ - bert-action
+ - inject-error-count
+ reply:
+ attributes:
+ - header
+ - ber-lock-status
+ - ber-error-count
+ - ber-total-bits-sent
+ -
+ name: phy-test-get
+ doc: |
+ Get PHY test configuration status and supported patterns.
+
+ attribute-set: phy-test
+
+ do:
+ request:
+ attributes:
+ - header
+ reply:
+ attributes:
+ - header
+ - tx-pattern
+ - rx-pattern
+ - supported-test-patterns
mcast-groups:
list:
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 1ac85b8aebd7..3bcca506cf7b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
+/* Bitmask of which ethtool_phy_test fields were explicitly specified */
+#define PHY_TEST_CMD_TX_PATTERN BIT(0)
+#define PHY_TEST_CMD_RX_PATTERN BIT(1)
+#define PHY_TEST_CMD_BERT_ACTION BIT(2)
+#define PHY_TEST_CMD_INJECT_COUNT BIT(3)
+
+/**
+ * struct ethtool_phy_test - PHY test configuration and status
+ * @cmd: Bitmask of PHY_TEST_CMD_* indicating which fields to apply (SET)
+ * @tx_pattern: TX test pattern
+ * @rx_pattern: RX checker pattern
+ * @bert_action: BERT start/stop/stats action
+ * @inject_error_count: Number of bit errors to inject (SET only)
+ * @supported_test_patterns: Bitmask of supported patterns (GET only)
+ * @ber_lock_status: BER lock status 1=locked, 0=not locked (GET only)
+ * @ber_error_count: BERT bit error count (GET only)
+ * @ber_total_bits_sent: BERT total bits tested (GET only)
+ */
+struct ethtool_phy_test {
+ u32 cmd;
+ enum phy_test_pattern tx_pattern;
+ enum phy_test_pattern rx_pattern;
+ enum phy_test_action bert_action;
+ u32 inject_error_count;
+ u32 supported_test_patterns;
+ u8 ber_lock_status;
+ u64 ber_error_count;
+ u64 ber_total_bits_sent;
+};
+
/**
* struct ethtool_ops - optional netdev operations
* @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*.
@@ -1091,7 +1121,8 @@ struct ethtool_loopback {
* @get_mm: Query the 802.3 MAC Merge layer state.
* @set_mm: Set the 802.3 MAC Merge layer parameters.
* @get_mm_stats: Query the 802.3 MAC Merge layer statistics.
- *
+ * @get_phy_test: Get PHY test status, patterns, and BERT counters.
+ * @set_phy_test: Configure PHY test (pattern, BERT, error injection). *
* All operations are optional (i.e. the function pointer may be set
* to %NULL) and callers must take this into account. Callers must
* hold the RTNL lock.
@@ -1260,6 +1291,10 @@ struct ethtool_ops {
void (*get_mm_stats)(struct net_device *dev, struct ethtool_mm_stats *stats);
+ int (*get_phy_test)(struct net_device *dev,
+ struct ethtool_phy_test *test);
+ int (*set_phy_test)(struct net_device *dev,
+ struct ethtool_phy_test *test);
};
The 'tx_prbs' and 'rx_prbs' command parameters have been renamed to 'tx_pattern' and 'rx_pattern' to allow support
for additional test patterns defined in the RFC, such as square patterns, in addition to PRBS.
The statistics have been moved to the 'ber' test command.
I also think it would be better to expose 'tx_pattern' and 'rx_pattern' as separate commands,
since the TX and RX ports can be different. They are only the same when operating in loopback mode.
> You need to think about the units for inject errors. There is no floating point support. Also, is this corrupt packets?
> Or single bit flips in the stream? It needs to be well defined what it actually means. The driver can then convert it to whatever the hardware supports. How does 802.3 specify this?
I believe it is not mentioned in IEEE specs, But it will be helpful in debug in both data and PRBS mode.
Maybe we can have number of errors injected in steam when we issue command rather than error rate ?
> Traditionally, Unix does not offer a way to clear statistic counters back to zero. So i'm not sure about clear-stats.
> We also need to think about hardware which does not support that. And there is locking issues, can the stats be cleared while a test is active?
I think we can auto clear in PHY FW or in implementation when we start the test.
Also, as previously suggested we need new status to indicate device is under test for net device.
- Shubham D
> -----Original Message-----
> From: Alexander Duyck <alexander.duyck@gmail.com>
> Sent: 24 June 2026 21:06
> To: Andrew Lunn <andrew@lunn.ch>
> Cc: Lee Trager <lee@trager.us>; Das, Shubham <shubham.das@intel.com>;
> Maxime Chevallier <maxime.chevallier@bootlin.com>; netdev@vger.kernel.org;
> mkubecek@suse.cz; D H, Siddaraju <siddaraju.dh@intel.com>; Chintalapalle,
> Balaji <balaji.chintalapalle@intel.com>; Lindberg, Magnus
> <magnus.k.lindberg@ericsson.com>; niklas.damberg@ericsson.com
> Subject: Re: Ethtool : PRBS feature
>
> On Tue, Jun 23, 2026 at 7:30 PM Andrew Lunn <andrew@lunn.ch> wrote:
> >
> > > To avoid race conditions, maybe some of these commands need combining.
> > > ethtool --phy-test eth1 tx-prbs prbs7 rx-prbs prbs7 bert start
> > >
> > > The configuration is then atomic, with respect to the uAPI, so we
> > > don't get two users configuring it at the same time, ending up with a
> > > messed up configuration.
> > >
> > > Testing consumes the link so you really don't want anything done to
> > > the netdev while testing is running. fbnic does the following.
> > >
> > > 1. Testing cannot start when the link is up
> >
> > That is not going to work in the generic case. Many MAC drivers don't
> > bind to there PCS or PHY until open() is called. So there is no way to
> > pass the uAPI calls onto the PCS or PHY if the interface is down.
> > There are also some MACs which connect to multiple PCSs, and there can
> > be multiple PHYs. So you need to somehow indicate which PCS/PHY should
> > perform the PRBS. There was a discussion about loopback recently,
> > which has the same issue, you can perform loopback testing in multiple
> > places. So i expect the same concept will be used for this.
>
> I would think something like this would still be usable. You would just need to
> specify the phy address and possibly device address in the case that you support
> doing such testing at multiple layers.
> Basically it would be up to the driver to provide a way to connect the request with
> the desired interface. I would imagine something similar is the case for the
> loopback handling since there are so many layers where you can hairpin things
> back to the port it came in on.
>
> > > 2. Once testing starts the driver removes the netdev to prevent use.
> > > The netdev is only added back when testing stops. The upstream
> > > solution will need something that can keep the netdev but lock
> > > everything down while testing is running.
> >
> > Probably IF_OPER_TESTING would be part of this. If the interface is in
> > this state, you want many other things blocked. However, probably
> > ksettings get/set need to work, so you can force the link into a
> > specific mode.
>
> I would imagine it depends on if you want to enforce ordering on this or not. I
> would say the set would probably need to be blocked as you wouldn't normally
> want to be changing the setting in the middle of a test as it would cause the error
> stats to climb quickly.
>
> > > 3. Once testing starts you cannot change the test, even on an
> > > individual lane basis. You must stop testing first.
> > >
> > >
> > > Traditionally, Unix does not offer a way to clear statistic counters
> > > back to zero. So i'm not sure about clear-stats. We also need to think
> > > about hardware which does not support that. And there is locking
> > > issues, can the stats be cleared while a test is active?
> > >
> > > fbnic actually has separate registers for PRBS test results. Results
> > > do need to be clean between runs but I never created an explicit
> > > clear interface. Firmware automatically reset the registers when a
> > > new test was started. This also allows results to be viewed after testing has
> stopped.
> >
> > We should really take 802.3 as the model, but i've not had time yet to
> > read what it says about the statistics.
>
> I think most of this is all called out in the IEEE 802.3-2022 spec under section
> 45.2.1.169 - 45.2.1.174. Basically the ability and controls live in the 1500 range,
> Tx error statistics in the 1600, and Rx statistics in the 1700 range.
>
> > > Reading results was a little tricky due to roll over between two
> > > 32bit registers.
> >
> > 802.3 is make this even more interesting, since those registers are 16
> > bits.
>
> Yeah, normally to deal with something like that we would likely be looking at
> having to maintain a fairly high read frequency. Although in theory the error
> counts shouldn't be climbing that fast anyway. The spec calls out that the registers
> are clear on read and held at ~0 in the event of overflow which would be a failing
> case for any reasonable test anyway.
>
> > > When I spoke to hardware engineers at Meta they did not want a
> > > timeout. Testing often occurred over days, so they wanted to be able
> > > to start it and explicitly stop it. I'm not against a time out but I do think it
> should be optional.
> > >
> > > Since PRBS testing is handled by firmware one safety measure I added
> > > is if firmware lost contact with the host testing was automatically
> > > stopped and TX FIR values were reset to factory. This ensured that
> > > the NIC won't get stuck in testing and on initialization the driver
> > > doesn't have to worry about testing state.
> >
> > That will work for firmware, but not when Linux is driving the
> > hardware. I don't know if netlink will allow it, or if RTNL will get
> > in the way etc, but it could be we actually don't want a start and
> > stop commands at all, it is a blocking netlink call, and the test runs
> > until the user space process closes the socket?
>
> What we would probably need to do is look at testing as a state rather than an
> operation. Basically the NIC would be put into the testing state and as a result it
> would just be sitting there emitting whatever test pattern it is supposed to emit,
> and validating it is receiving the pattern it expects to receive.
>
> The statistics could probably just be a subset of the PHY statistics that could be
> collected separately. Actually now that I think about it I wonder if we couldn't
> look at putting together the interface similar to how we currently handle FEC
> where you have the --set-fec interface to configure things and the --show-fec
> interface with the -I option to show the current state and also dump the
> statistics.
^ permalink raw reply related
* [PATCH net-next v6 0/2] airoha: add the capability to configure GDM3/GDM4 as WAN/LAN on demand
From: Lorenzo Bianconi @ 2026-06-29 16:17 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Lorenzo Bianconi
Cc: Simon Horman, Alexander Lobakin, linux-arm-kernel, linux-mediatek,
netdev, Madhur Agrawal
Add the capability to configure GDM3/GDM4 as WAN/LAN on demand when QoS
offload is created or destroyed.
Make dev->qdma an RCU pointer so the TX path can safely dereference it
without holding RTNL.
Introduce airoha_qdma_start() and airoha_qdma_stop() helpers.
---
Changes in v6:
- Rebase on top of next-next
- Add patch 1/3: "rename airoha_priv_flags to airoha_dev_flags"
- Drop patch 2/3: "refactor QDMA start/stop into reusable helpers"
- Link to v5: https://lore.kernel.org/r/20260611-airoha-ethtool-priv_flags-v5-0-c11de08486d1@kernel.org
Changes in v5:
- Add patch 1/3: use int instead of atomic_t for qdma users counter
- Protect dev->flags with flow_offload_mutex mutex.
- Introduce AIROHA_PRIV_F_QOS in order to handle better WAN/LAN
switching.
- Link to v4: https://lore.kernel.org/r/20260610-airoha-ethtool-priv_flags-v4-0-60e89cf28fea@kernel.org
Changes in v4:
- Move back QDMA TX/RX DMA enable to airoha_dev_open()/airoha_dev_stop().
- Configure GDM3/4 as WAN if GDM2 is not available in ndo_init()
callback.
- Protect qdma pointer in airoha_gdm_dev struct using RCU.
- Rely on rtnl_dereference() to access qdma pointer in the control path.
- Add airoha_qdma_start() and airoha_qdma_stop() utility routines in
patch 1/2
- Link to v3: https://lore.kernel.org/r/20260608-airoha-ethtool-priv_flags-v3-1-3e8e3dc3f715@kernel.org
Changes in v3:
- Do not introduce ethtool private flags support to configure LAN/WAN
for GDM3/4 and rely on tc qdisc offload for it instead.
- Set GDM3/4 ports as LAN by default.
- Move QDMA TX/RX DMA enable from airoha_dev_open() to airoha_probe()
and the corresponding disable from airoha_dev_stop() to airoha_qdma_cleanup().
- Link to v2: https://lore.kernel.org/r/20260607-airoha-ethtool-priv_flags-v2-1-742c7aa1e182@kernel.org
Changes in v2:
- Rework airoha_dev_set_wan_flag routine
- Enable GDM_STRIP_CRC_MASK in airoha_disable_gdm2_loopback()
- Do not always reset REG_SRC_PORT_FC_MAP6 in
airoha_disable_gdm2_loopback() but use the same condition used in
airoha_enable_gdm2_loopback().
- Link to v1: https://lore.kernel.org/r/20260606-airoha-ethtool-priv_flags-v1-1-401b2c9fe9f1@kernel.org
---
Lorenzo Bianconi (2):
net: airoha: rename airoha_priv_flags to airoha_dev_flags
net: airoha: defer GDM3/GDM4 WAN mode and GDM2 loopback to QoS offload
drivers/net/ethernet/airoha/airoha_eth.c | 231 ++++++++++++++++++++++++++----
drivers/net/ethernet/airoha/airoha_eth.h | 19 ++-
drivers/net/ethernet/airoha/airoha_ppe.c | 9 +-
drivers/net/ethernet/airoha/airoha_regs.h | 1 +
4 files changed, 223 insertions(+), 37 deletions(-)
---
base-commit: 805185b7c7a1069e407b6f7b3bc98e44d415f484
change-id: 20260606-airoha-ethtool-priv_flags-b6aa70caa780
Best regards,
--
Lorenzo Bianconi <lorenzo@kernel.org>
^ permalink raw reply
* [PATCH net-next v6 1/2] net: airoha: rename airoha_priv_flags to airoha_dev_flags
From: Lorenzo Bianconi @ 2026-06-29 16:17 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Lorenzo Bianconi
Cc: Simon Horman, Alexander Lobakin, linux-arm-kernel, linux-mediatek,
netdev
In-Reply-To: <20260629-airoha-ethtool-priv_flags-v6-0-86bc600d31bc@kernel.org>
Rename the airoha_priv_flags enum to airoha_dev_flags and the
AIROHA_PRIV_F_WAN flag to AIROHA_DEV_F_WAN. The "priv_flags" naming
dates back to an earlier design that used ethtool private flags; since
this series switched to tc qdisc offload for LAN/WAN configuration,
align the naming to reflect that these are per-device flags rather than
ethtool private flags. No functional change.
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/ethernet/airoha/airoha_eth.c | 2 +-
drivers/net/ethernet/airoha/airoha_eth.h | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 932b3a3df2e5..8bba54ebcf07 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2048,7 +2048,7 @@ static int airoha_dev_init(struct net_device *netdev)
fallthrough;
case AIROHA_GDM2_IDX:
/* GDM2 is always used as wan */
- dev->flags |= AIROHA_PRIV_F_WAN;
+ dev->flags |= AIROHA_DEV_F_WAN;
break;
default:
break;
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index d7ff8c5200e2..87ab3ea10664 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -535,8 +535,8 @@ struct airoha_qdma {
DECLARE_BITMAP(qos_channel_map, AIROHA_NUM_QOS_CHANNELS);
};
-enum airoha_priv_flags {
- AIROHA_PRIV_F_WAN = BIT(0),
+enum airoha_dev_flags {
+ AIROHA_DEV_F_WAN = BIT(0),
};
struct airoha_gdm_dev {
@@ -659,7 +659,7 @@ static inline u16 airoha_qdma_get_txq(struct airoha_qdma *qdma, u16 qid)
static inline bool airoha_is_lan_gdm_dev(struct airoha_gdm_dev *dev)
{
- return !(dev->flags & AIROHA_PRIV_F_WAN);
+ return !(dev->flags & AIROHA_DEV_F_WAN);
}
static inline bool airoha_is_7581(struct airoha_eth *eth)
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v6 2/2] net: airoha: defer GDM3/GDM4 WAN mode and GDM2 loopback to QoS offload
From: Lorenzo Bianconi @ 2026-06-29 16:17 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Lorenzo Bianconi
Cc: Simon Horman, Alexander Lobakin, linux-arm-kernel, linux-mediatek,
netdev, Madhur Agrawal
In-Reply-To: <20260629-airoha-ethtool-priv_flags-v6-0-86bc600d31bc@kernel.org>
GDM3 and GDM4 ports require GDM2 loopback to be enabled for hardware
QoS offload to function. Without it, HTB and ETS offload on these ports
do not work.
Previously, GDM3/GDM4 ports were automatically configured as WAN with
GDM2 loopback enabled during ndo_init(). Add the capability to configure
GDM3/GDM4 as WAN/LAN on demand when QoS offload is created or destroyed.
Hook airoha_enable_qos_for_gdm34() into TC_HTB_CREATE so that requesting
HTB offload on a GDM3/GDM4 LAN port switches it to WAN mode and enables
GDM2 loopback, with proper rollback on failure. Introduce the
AIROHA_DEV_F_QOS flag to track whether a device has an active HTB
qdisc; clear it on TC_HTB_DESTROY. The device keeps its WAN role after
qdisc teardown so that its configuration is preserved until another
device explicitly needs the WAN role for QoS offload.
If another GDM3/GDM4 device already holds the WAN role without an active
QoS qdisc, demote it to LAN before promoting the requesting device. Skip
the demotion when the requesting device is itself already the WAN device.
Since airoha_dev_set_qdma() can now be called on a running device to
migrate between QDMA blocks, make dev->qdma an RCU pointer so the TX
path can safely dereference it without holding RTNL.
Hold flow_offload_mutex in airoha_enable_qos_for_gdm34() and
airoha_disable_qos_for_gdm34() around the dev->flags update,
airoha_dev_set_qdma() and GDM2 loopback configuration, serializing
against concurrent airoha_ppe_hw_init() in the TC_SETUP_CLSFLOWER
offload path.
Introduce airoha_qdma_deref() helper that wraps rcu_dereference_protected()
with a lockdep condition accepting either rtnl_lock or flow_offload_mutex,
and use it across all control-path dereferences of the RCU-protected
dev->qdma pointer.
Add airoha_disable_gdm2_loopback() to disable GDM2 hw loopback.
Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/ethernet/airoha/airoha_eth.c | 229 ++++++++++++++++++++++++++----
drivers/net/ethernet/airoha/airoha_eth.h | 13 +-
drivers/net/ethernet/airoha/airoha_ppe.c | 9 +-
drivers/net/ethernet/airoha/airoha_regs.h | 1 +
4 files changed, 219 insertions(+), 33 deletions(-)
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 8bba54ebcf07..231c8f2f20dd 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -929,7 +929,7 @@ static void airoha_qdma_wake_netdev_txqs(struct airoha_queue *q)
if (!dev)
continue;
- if (dev->qdma != qdma)
+ if (rcu_access_pointer(dev->qdma) != qdma)
continue;
netdev = netdev_from_priv(dev);
@@ -1837,13 +1837,14 @@ static int airoha_dev_open(struct net_device *netdev)
struct airoha_gdm_dev *dev = netdev_priv(netdev);
struct airoha_gdm_port *port = dev->port;
u32 cur_len, pse_port = FE_PSE_PORT_PPE1;
- struct airoha_qdma *qdma = dev->qdma;
+ struct airoha_qdma *qdma;
netif_tx_start_all_queues(netdev);
err = airoha_set_vip_for_gdm_port(dev, true);
if (err)
return err;
+ qdma = airoha_qdma_deref(dev);
if (netdev_uses_dsa(netdev))
airoha_fe_set(qdma->eth, REG_GDM_INGRESS_CFG(port->id),
GDM_STAG_EN_MASK);
@@ -1903,7 +1904,6 @@ static int airoha_dev_stop(struct net_device *netdev)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
struct airoha_gdm_port *port = dev->port;
- struct airoha_qdma *qdma = dev->qdma;
netif_tx_disable(netdev);
airoha_set_vip_for_gdm_port(dev, false);
@@ -1911,7 +1911,7 @@ static int airoha_dev_stop(struct net_device *netdev)
if (--port->users)
airoha_set_port_mtu(dev->eth, port);
else
- airoha_set_gdm_port_fwd_cfg(qdma->eth,
+ airoha_set_gdm_port_fwd_cfg(dev->eth,
REG_GDM_FWD_CFG(port->id),
FE_PSE_PORT_DROP);
return 0;
@@ -1998,6 +1998,53 @@ static int airoha_enable_gdm2_loopback(struct airoha_gdm_dev *dev)
return 0;
}
+static int airoha_disable_gdm2_loopback(struct airoha_gdm_dev *dev)
+{
+ struct airoha_gdm_port *port = dev->port;
+ struct airoha_eth *eth = dev->eth;
+ int i, src_port;
+ u32 pse_port;
+
+ src_port = eth->soc->ops.get_sport(dev->port, dev->nbq);
+ if (src_port < 0)
+ return src_port;
+
+ airoha_fe_clear(eth,
+ REG_SP_DFT_CPORT(src_port >> fls(SP_CPORT_DFT_MASK)),
+ SP_CPORT_MASK(src_port & SP_CPORT_DFT_MASK));
+
+ airoha_fe_set(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX),
+ GDM_STRIP_CRC_MASK);
+ airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX),
+ FE_PSE_PORT_DROP);
+ airoha_fe_clear(eth, REG_GDM_LPBK_CFG(AIROHA_GDM2_IDX),
+ LPBK_CHAN_MASK | LPBK_MODE_MASK | LPBK_EN_MASK);
+ pse_port = airoha_ppe_is_enabled(eth, 1) ? FE_PSE_PORT_PPE2
+ : FE_PSE_PORT_PPE1;
+ airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX),
+ pse_port);
+
+ airoha_fe_rmw(eth, REG_FE_WAN_PORT, WAN0_MASK,
+ FIELD_PREP(WAN0_MASK, AIROHA_GDM2_IDX));
+
+ for (i = 0; i < eth->soc->num_ppe; i++)
+ airoha_fe_clear(eth, REG_PPE_DFT_CPORT(i, AIROHA_GDM2_IDX),
+ DFT_CPORT_MASK(AIROHA_GDM2_IDX));
+
+ /* Enable VIP and IFC for GDM2 */
+ airoha_fe_set(eth, REG_FE_VIP_PORT_EN, BIT(AIROHA_GDM2_IDX));
+ airoha_fe_set(eth, REG_FE_IFC_PORT_EN, BIT(AIROHA_GDM2_IDX));
+
+ if (port->id == AIROHA_GDM4_IDX && airoha_is_7581(eth)) {
+ u32 mask = FC_ID_OF_SRC_PORT_MASK(dev->nbq);
+
+ airoha_fe_rmw(eth, REG_SRC_PORT_FC_MAP6, mask,
+ FC_MAP6_DEF_VALUE & mask);
+ }
+
+ return 0;
+}
+
static struct airoha_gdm_dev *
airoha_get_wan_gdm_dev(struct airoha_eth *eth)
{
@@ -2024,15 +2071,25 @@ airoha_get_wan_gdm_dev(struct airoha_eth *eth)
static void airoha_dev_set_qdma(struct airoha_gdm_dev *dev)
{
struct net_device *netdev = netdev_from_priv(dev);
+ struct airoha_qdma *cur_qdma, *qdma;
struct airoha_eth *eth = dev->eth;
int ppe_id;
/* QDMA0 is used for lan ports while QDMA1 is used for WAN ports */
- dev->qdma = ð->qdma[!airoha_is_lan_gdm_dev(dev)];
- netdev->irq = dev->qdma->irq_banks[0].irq;
+ qdma = ð->qdma[!airoha_is_lan_gdm_dev(dev)];
+ cur_qdma = airoha_qdma_deref(dev);
+
+ rcu_assign_pointer(dev->qdma, qdma);
+ netdev->irq = qdma->irq_banks[0].irq;
ppe_id = !airoha_is_lan_gdm_dev(dev) && airoha_ppe_is_enabled(eth, 1);
airoha_ppe_set_cpu_port(dev, ppe_id, airoha_get_fe_port(dev));
+
+ if (!cur_qdma)
+ return;
+
+ synchronize_rcu();
+ netif_tx_wake_all_queues(netdev);
}
static int airoha_dev_init(struct net_device *netdev)
@@ -2187,9 +2244,9 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
- struct airoha_qdma *qdma = dev->qdma;
u32 nr_frags, tag, msg0, msg1, len;
struct airoha_queue_entry *e;
+ struct airoha_qdma *qdma;
struct netdev_queue *txq;
struct airoha_queue *q;
LIST_HEAD(tx_list);
@@ -2198,6 +2255,8 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
u16 index;
u8 fport;
+ rcu_read_lock();
+ qdma = rcu_dereference(dev->qdma);
qid = airoha_qdma_get_txq(qdma, skb_get_queue_mapping(skb));
tag = airoha_get_dsa_tag(skb, netdev);
@@ -2247,6 +2306,8 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
netif_tx_stop_queue(txq);
q->txq_stopped = true;
spin_unlock_bh(&q->lock);
+ rcu_read_unlock();
+
return NETDEV_TX_BUSY;
}
@@ -2309,6 +2370,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
FIELD_PREP(TX_RING_CPU_IDX_MASK, index));
spin_unlock_bh(&q->lock);
+ rcu_read_unlock();
return NETDEV_TX_OK;
@@ -2324,6 +2386,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
error:
dev_kfree_skb_any(skb);
netdev->stats.tx_dropped++;
+ rcu_read_unlock();
return NETDEV_TX_OK;
}
@@ -2403,17 +2466,19 @@ static int airoha_qdma_set_chan_tx_sched(struct net_device *netdev,
const u16 *weights, u8 n_weights)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
+ struct airoha_qdma *qdma;
int i;
+ qdma = airoha_qdma_deref(dev);
for (i = 0; i < AIROHA_NUM_QOS_QUEUES; i++)
- airoha_qdma_clear(dev->qdma, REG_QUEUE_CLOSE_CFG(channel),
+ airoha_qdma_clear(qdma, REG_QUEUE_CLOSE_CFG(channel),
TXQ_DISABLE_CHAN_QUEUE_MASK(channel, i));
for (i = 0; i < n_weights; i++) {
u32 status;
int err;
- airoha_qdma_wr(dev->qdma, REG_TXWRR_WEIGHT_CFG,
+ airoha_qdma_wr(qdma, REG_TXWRR_WEIGHT_CFG,
TWRR_RW_CMD_MASK |
FIELD_PREP(TWRR_CHAN_IDX_MASK, channel) |
FIELD_PREP(TWRR_QUEUE_IDX_MASK, i) |
@@ -2421,12 +2486,12 @@ static int airoha_qdma_set_chan_tx_sched(struct net_device *netdev,
err = read_poll_timeout(airoha_qdma_rr, status,
status & TWRR_RW_CMD_DONE,
USEC_PER_MSEC, 10 * USEC_PER_MSEC,
- true, dev->qdma, REG_TXWRR_WEIGHT_CFG);
+ true, qdma, REG_TXWRR_WEIGHT_CFG);
if (err)
return err;
}
- airoha_qdma_rmw(dev->qdma, REG_CHAN_QOS_MODE(channel >> 3),
+ airoha_qdma_rmw(qdma, REG_CHAN_QOS_MODE(channel >> 3),
CHAN_QOS_MODE_MASK(channel),
__field_prep(CHAN_QOS_MODE_MASK(channel), mode));
@@ -2490,13 +2555,15 @@ static int airoha_qdma_get_tx_ets_stats(struct net_device *netdev, int channel,
struct tc_ets_qopt_offload *opt)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
- struct airoha_qdma *qdma = dev->qdma;
+ u64 cpu_tx_packets, fwd_tx_packets, tx_packets;
+ struct airoha_qdma *qdma;
- u64 cpu_tx_packets = airoha_qdma_rr(qdma, REG_CNTR_VAL(channel << 1));
- u64 fwd_tx_packets = airoha_qdma_rr(qdma,
- REG_CNTR_VAL((channel << 1) + 1));
- u64 tx_packets = (cpu_tx_packets - dev->cpu_tx_packets) +
- (fwd_tx_packets - dev->fwd_tx_packets);
+ qdma = airoha_qdma_deref(dev);
+ cpu_tx_packets = airoha_qdma_rr(qdma, REG_CNTR_VAL(channel << 1));
+ fwd_tx_packets = airoha_qdma_rr(qdma,
+ REG_CNTR_VAL((channel << 1) + 1));
+ tx_packets = (cpu_tx_packets - dev->cpu_tx_packets) +
+ (fwd_tx_packets - dev->fwd_tx_packets);
_bstats_update(opt->stats.bstats, 0, tx_packets);
dev->cpu_tx_packets = cpu_tx_packets;
@@ -2756,16 +2823,18 @@ static int airoha_qdma_set_tx_rate_limit(struct net_device *netdev,
u32 bucket_size)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
+ struct airoha_qdma *qdma;
int i, err;
+ qdma = airoha_qdma_deref(dev);
for (i = 0; i <= TRTCM_PEAK_MODE; i++) {
- err = airoha_qdma_set_trtcm_config(dev->qdma, channel,
+ err = airoha_qdma_set_trtcm_config(qdma, channel,
REG_EGRESS_TRTCM_CFG, i,
!!rate, TRTCM_METER_MODE);
if (err)
return err;
- err = airoha_qdma_set_trtcm_token_bucket(dev->qdma, channel,
+ err = airoha_qdma_set_trtcm_token_bucket(qdma, channel,
REG_EGRESS_TRTCM_CFG,
i, rate, bucket_size);
if (err)
@@ -2801,11 +2870,12 @@ static int airoha_tc_htb_alloc_leaf_queue(struct net_device *netdev,
u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
int err, num_tx_queues = AIROHA_NUM_TX_RING + channel + 1;
struct airoha_gdm_dev *dev = netdev_priv(netdev);
- struct airoha_qdma *qdma = dev->qdma;
+ struct airoha_qdma *qdma;
/* Here we need to check the requested QDMA channel is not already
* in use by another net_device running on the same QDMA block.
*/
+ qdma = airoha_qdma_deref(dev);
if (test_and_set_bit(channel, qdma->qos_channel_map)) {
NL_SET_ERR_MSG_MOD(opt->extack,
"qdma qos channel already in use");
@@ -2841,7 +2911,7 @@ static int airoha_qdma_set_rx_meter(struct airoha_gdm_dev *dev,
u32 rate, u32 bucket_size,
enum trtcm_unit_type unit_type)
{
- struct airoha_qdma *qdma = dev->qdma;
+ struct airoha_qdma *qdma = airoha_qdma_deref(dev);
int i;
for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
@@ -3016,10 +3086,11 @@ static void airoha_tc_remove_htb_queue(struct net_device *netdev, int queue)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
int num_tx_queues = AIROHA_NUM_TX_RING;
- struct airoha_qdma *qdma = dev->qdma;
+ struct airoha_qdma *qdma;
airoha_qdma_set_tx_rate_limit(netdev, queue, 0, 0);
+ qdma = airoha_qdma_deref(dev);
clear_bit(queue, qdma->qos_channel_map);
clear_bit(queue, dev->qos_sq_bmap);
@@ -3045,6 +3116,95 @@ static int airoha_tc_htb_delete_leaf_queue(struct net_device *netdev,
return 0;
}
+static void airoha_disable_qos_for_gdm34(struct net_device *netdev)
+{
+ struct airoha_gdm_dev *dev = netdev_priv(netdev);
+ struct airoha_gdm_port *port = dev->port;
+ int err;
+
+ if (port->id != AIROHA_GDM3_IDX &&
+ port->id != AIROHA_GDM4_IDX)
+ return;
+
+ err = airoha_disable_gdm2_loopback(dev);
+ if (err)
+ netdev_warn(netdev,
+ "failed disabling GDM2 loopback: %d\n", err);
+
+ dev->flags &= ~AIROHA_DEV_F_WAN;
+ airoha_dev_set_qdma(dev);
+
+ airoha_set_macaddr(dev, netdev->dev_addr);
+ if (netif_running(netdev))
+ airoha_set_gdm_port_fwd_cfg(dev->eth,
+ REG_GDM_FWD_CFG(port->id),
+ FE_PSE_PORT_PPE1);
+}
+
+static int airoha_enable_qos_for_gdm34(struct net_device *netdev,
+ struct netlink_ext_ack *extack)
+{
+ struct airoha_gdm_dev *wan_dev, *dev = netdev_priv(netdev);
+ struct airoha_gdm_port *port = dev->port;
+ struct airoha_eth *eth = dev->eth;
+ int err = -EBUSY;
+
+ if (port->id != AIROHA_GDM3_IDX &&
+ port->id != AIROHA_GDM4_IDX) {
+ /* HW QoS is always supported by GDM1 and GDM2 */
+ return 0;
+ }
+
+ if (!airoha_is_lan_gdm_dev(dev)) /* Already enabled */
+ return 0;
+
+ mutex_lock(&flow_offload_mutex);
+
+ wan_dev = airoha_get_wan_gdm_dev(eth);
+ if (wan_dev) {
+ if ((wan_dev->flags & AIROHA_DEV_F_QOS) ||
+ wan_dev->port->id == AIROHA_GDM2_IDX) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "QoS configured for WAN device");
+ goto error_unlock;
+ }
+ airoha_disable_qos_for_gdm34(netdev_from_priv(wan_dev));
+ }
+
+ dev->flags |= AIROHA_DEV_F_WAN;
+ airoha_dev_set_qdma(dev);
+ err = airoha_enable_gdm2_loopback(dev);
+ if (err)
+ goto error_disable_wan;
+
+ err = airoha_set_macaddr(dev, netdev->dev_addr);
+ if (err)
+ goto error_disable_loopback;
+
+ if (netif_running(netdev)) {
+ u32 pse_port;
+
+ pse_port = airoha_ppe_is_enabled(eth, 1) ? FE_PSE_PORT_PPE2
+ : FE_PSE_PORT_PPE1;
+ airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(port->id),
+ pse_port);
+ }
+
+ mutex_unlock(&flow_offload_mutex);
+
+ return 0;
+
+error_disable_loopback:
+ airoha_disable_gdm2_loopback(dev);
+error_disable_wan:
+ dev->flags &= ~AIROHA_DEV_F_WAN;
+ airoha_dev_set_qdma(dev);
+error_unlock:
+ mutex_unlock(&flow_offload_mutex);
+
+ return err;
+}
+
static int airoha_tc_htb_destroy(struct net_device *netdev)
{
struct airoha_gdm_dev *dev = netdev_priv(netdev);
@@ -3053,6 +3213,8 @@ static int airoha_tc_htb_destroy(struct net_device *netdev)
for_each_set_bit(q, dev->qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS)
airoha_tc_remove_htb_queue(netdev, q);
+ dev->flags &= ~AIROHA_DEV_F_QOS;
+
return 0;
}
@@ -3072,24 +3234,33 @@ static int airoha_tc_get_htb_get_leaf_queue(struct net_device *netdev,
return 0;
}
-static int airoha_tc_setup_qdisc_htb(struct net_device *dev,
+static int airoha_tc_setup_qdisc_htb(struct net_device *netdev,
struct tc_htb_qopt_offload *opt)
{
switch (opt->command) {
- case TC_HTB_CREATE:
+ case TC_HTB_CREATE: {
+ struct airoha_gdm_dev *dev = netdev_priv(netdev);
+ int err;
+
+ err = airoha_enable_qos_for_gdm34(netdev, opt->extack);
+ if (err)
+ return err;
+
+ dev->flags |= AIROHA_DEV_F_QOS;
break;
+ }
case TC_HTB_DESTROY:
- return airoha_tc_htb_destroy(dev);
+ return airoha_tc_htb_destroy(netdev);
case TC_HTB_NODE_MODIFY:
- return airoha_tc_htb_modify_queue(dev, opt);
+ return airoha_tc_htb_modify_queue(netdev, opt);
case TC_HTB_LEAF_ALLOC_QUEUE:
- return airoha_tc_htb_alloc_leaf_queue(dev, opt);
+ return airoha_tc_htb_alloc_leaf_queue(netdev, opt);
case TC_HTB_LEAF_DEL:
case TC_HTB_LEAF_DEL_LAST:
case TC_HTB_LEAF_DEL_LAST_FORCE:
- return airoha_tc_htb_delete_leaf_queue(dev, opt);
+ return airoha_tc_htb_delete_leaf_queue(netdev, opt);
case TC_HTB_LEAF_QUERY_QUEUE:
- return airoha_tc_get_htb_get_leaf_queue(dev, opt);
+ return airoha_tc_get_htb_get_leaf_queue(netdev, opt);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index 87ab3ea10664..b97224a5495c 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -537,11 +537,12 @@ struct airoha_qdma {
enum airoha_dev_flags {
AIROHA_DEV_F_WAN = BIT(0),
+ AIROHA_DEV_F_QOS = BIT(1),
};
struct airoha_gdm_dev {
+ struct airoha_qdma __rcu *qdma;
struct airoha_gdm_port *port;
- struct airoha_qdma *qdma;
struct airoha_eth *eth;
DECLARE_BITMAP(qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS);
@@ -676,6 +677,16 @@ int airoha_get_fe_port(struct airoha_gdm_dev *dev);
bool airoha_is_valid_gdm_dev(struct airoha_eth *eth,
struct airoha_gdm_dev *dev);
+extern struct mutex flow_offload_mutex;
+
+static inline struct airoha_qdma *
+airoha_qdma_deref(struct airoha_gdm_dev *dev)
+{
+ return rcu_dereference_protected(dev->qdma,
+ lockdep_rtnl_is_held() ||
+ lockdep_is_held(&flow_offload_mutex));
+}
+
void airoha_ppe_set_cpu_port(struct airoha_gdm_dev *dev, u8 ppe_id, u8 fport);
bool airoha_ppe_is_enabled(struct airoha_eth *eth, int index);
void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 42f4b0f21d17..0f260c50ac3c 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -15,7 +15,10 @@
#include "airoha_regs.h"
#include "airoha_eth.h"
-static DEFINE_MUTEX(flow_offload_mutex);
+/* Serialize airoha_gdm_dev flags, QDMA pointer and PPE CPU port
+ * configuration.
+ */
+DEFINE_MUTEX(flow_offload_mutex);
static DEFINE_SPINLOCK(ppe_lock);
static const struct rhashtable_params airoha_flow_table_params = {
@@ -86,8 +89,8 @@ static u32 airoha_ppe_get_timestamp(struct airoha_ppe *ppe)
void airoha_ppe_set_cpu_port(struct airoha_gdm_dev *dev, u8 ppe_id, u8 fport)
{
- struct airoha_qdma *qdma = dev->qdma;
- struct airoha_eth *eth = qdma->eth;
+ struct airoha_qdma *qdma = airoha_qdma_deref(dev);
+ struct airoha_eth *eth = dev->eth;
u8 qdma_id = qdma - ð->qdma[0];
u32 fe_cpu_port;
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index 436f3c8779c1..4e17dfbcf2b8 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -376,6 +376,7 @@
#define REG_SRC_PORT_FC_MAP6 0x2298
#define FC_ID_OF_SRC_PORT_MASK(_n) GENMASK(4 + ((_n) << 3), ((_n) << 3))
+#define FC_MAP6_DEF_VALUE 0x1b1a1918
#define REG_CDM5_RX_OQ1_DROP_CNT 0x29d4
--
2.54.0
^ permalink raw reply related
* Re: [PATCH net-next v2 1/8] net: mdio: realtek-rtl9300: Add polling documentation
From: Andrew Lunn @ 2026-06-29 16:21 UTC (permalink / raw)
To: Markus Stockhausen
Cc: hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
In-Reply-To: <20260629152336.2239826-2-markus.stockhausen@gmx.de>
> + * Each device has a SMI_POLL_CTRL register. A per-port bitmask decides if the hardware polling of
> + * the associated bus/address is active or not. The hardware runs a tight loop over this and for
> + * each set polling bit it issues a status check for the PHY. Attaching a logic analyzer to the
> + * MDIO bus of an RTL8380 and RTL8393 gives the following commands (in kernel notation):
> + *
> + * RTL8380 RTL8393
> + * --------------------------- ---------------------------
> + * phy_write(phy, 31, 0x0); phy_read(phy, 0);
> + * phy_write(phy, 13, 0x7); phy_read(phy, 1);
> + * phy_write(phy, 14, 0x3c); phy_read(phy, 4);
> + * phy_write(phy, 13, 0x8007); phy_read(phy, 5);
> + * phy_read(phy, 14); phy_read(phy, 6);
> + * phy_write(phy, 13, 0x7); phy_read(phy, 9);
> + * phy_write(phy, 14, 0x3d); phy_read(phy, 10);
> + * phy_write(phy, 13, 0x8007); phy_read(phy, 15);
> + * phy_read(phy, 14); phy_write(phy, 13, 0x7);
> + * phy_read(phy, 9); phy_write(phy, 14, 0x3c);
> + * phy_read(phy, 10); phy_write(phy, 13, 0x4007);
> + * phy_read(phy, 15); phy_read(phy, 14);
> + * phy_read(phy, 0); phy_write(phy, 13, 0x7);
> + * phy_read(phy, 1); phy_write(phy, 14, 0x3d);
> + * phy_read(phy, 4); phy_write(phy, 13, 0x4007);
> + * phy_read(phy, 5); phy_read(phy, 14);
> + * phy_read(phy, 6);
Great to see this reverse engineering.
> + *
> + * The c45 over c22 register 13/14 sequences read MDIO_AN_EEE_ADV and MDIO_AN_EEE_LPABLE.
How do you tell it that C45 over C22 is actually supported by the PHY?
Not all PHYs do. Some PHYs use those registers for other things.
> + * How does MDIO access from kernel work?
> + *
> + * When issuing MDIO accesses via an MMIO based interface the final write to the command register
> + * sets a "run command now" bit. Between two polling sequences for different PHYs the hardware
> + * checks if a user command needs to run and sends it onto the bus. Afterwards it simply continues
> + * its polling work. Inspecting the command sequence for a paged read on the logic analyzer gives:
> + *
> + * RTL8380 RTL8393
> + * --------------------------- ---------------------------
> + * phy_write(phy, 31, page); phy_write(phy, 31, page);
> + * phy_write(phy, reg, value); phy_write(phy, reg, value);
> + * phy_write(phy, 31, 0);
> + *
> + * What does this mean?
> + *
> + * There are slight differences in polling and PHY access between the models but the challenge
> + * stays the same. On the one hand that greatly simplifies the MAC layer, on the other hand it
> + * has some implications for the kernel PHY subsystem.
> + *
> + * - Without the polling and a proper MAC status, some of the link handling features do not work.
> + * Especially an unpopulated MAC_LINK_STS register cancels operations to other MAC registers.
> + * - The Realtek page register 31 is magically modified in the background. On the RTL838x it is
> + * simply reset. Other devices have hardware mitigations for this in place.
> + * - A c45 over c22 kernel access sequence is most likely to fail because chances are high that
> + * the polling engine overwrites registers 13/14 in between.
> + * - PHY firmware loading can have issues. Especially if a PHY is designed to expect a clean
> + * sequence of registers and values without deviation.
> + * - An access to one PHY will need to wait for the next free slot of the polling engine.
* - PHYs which make use of pages will break the hardware polling,
* because it is not aware a different page is currently selected, and
* the values it reads from the PHY do not mean what it expects.
> + *
> + * Conclusion: Kernel access to the PHYs must know and handle any interference that arises from
> + * the above described hardware polling.
This is not the best of wording. We need to narrow it down from
'kernel', to Realtek MDIO bus driver. What we cannot do is have PHY
drivers need to know anything about this. Working around this needs to
be limited to the Realtek MDIO driver, and probably the MDIO bus
locking operations.
When the PHY driver does a paged access, it takes the MDIO bus
lock. We need that to disable the HW polling. Once the paged access is
complete and the MDIO bus lock is released, we can re-enable HW
polling. I'm pretty sure C45 over C22 already takes the MDIO lock, so
that also solves the issue you pointed out above.
We also need an understanding of how the hardware uses the values it
reads during poll. One obvious issue i see is that it is not reading
register 26, so how does it know what speed the realtek PHYs are
using, in order to correctly configure the MAC?
Andrew
---
pw-bot: cr
^ permalink raw reply
* Re: [PATCH v2 19/19] driver core: platform: count references to all kinds of firmware nodes
From: Andy Shevchenko @ 2026-06-29 16:21 UTC (permalink / raw)
To: Bartosz Golaszewski
Cc: Lee Jones, Mark Brown, Thierry Reding, Sebastian Hesselbarth,
Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Srinivas Kandagatla, Greg Kroah-Hartman, Vinod Koul,
Rafael J. Wysocki, Danilo Krummrich, Rob Herring, Saravana Kannan,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy (CS GROUP), Andi Shyti, Joerg Roedel,
Will Deacon, Robin Murphy, Doug Berger, Florian Fainelli,
Broadcom internal kernel review list, Ulf Hansson, Frank Li,
Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
Matthew Brost, Thomas Hellström, Rodrigo Vivi, David Airlie,
Simona Vetter, Peter Chen, Paul Cercueil, Bin Liu, Philipp Zabel,
Maximilian Luz, Hans de Goede, Ilpo Järvinen,
Krzysztof Kozlowski, Benjamin Herrenschmidt, brgl, linux-kernel,
netdev, linux-arm-msm, linux-sound, driver-core, devicetree,
linuxppc-dev, linux-i2c, iommu, linux-pm, imx, linux-arm-kernel,
intel-xe, dri-devel, linux-usb, linux-mips, platform-driver-x86
In-Reply-To: <20260629-pdev-fwnode-ref-v2-19-8abe2513f96e@oss.qualcomm.com>
On Mon, Jun 29, 2026 at 11:12:42AM +0200, Bartosz Golaszewski wrote:
> When using platform_device_register_full(), we currently only increase
> the reference count of the OF node associated with a platform device. We
> symmetrically decrease it in platform_device_release(). With all users in
> tree now converted to using provided platform device helpers for
> assigning OF and firmware nodes, we can now switch to counting references
> of all kinds of firmware nodes.
Yep, that's the expected result, thanks!
...
> void platform_device_set_fwnode(struct platform_device *pdev,
> struct fwnode_handle *fwnode)
> {
> - if (is_of_node(fwnode))
> - platform_device_set_of_node(pdev, to_of_node(fwnode));
> - else
> - pdev->dev.fwnode = fwnode;
> + fwnode_handle_put(pdev->dev.fwnode);
> + pdev->dev.fwnode = fwnode_handle_get(fwnode);
> + pdev->dev.of_node = to_of_node(fwnode);
device_set_node(&pdev->dev, fwnode_handle_get(fwnode));
> }
...
> pdev->dev.parent = pdevinfo->parent;
> - pdev->dev.fwnode = pdevinfo->fwnode;
> - pdev->dev.of_node = of_node_get(to_of_node(pdev->dev.fwnode));
> + pdev->dev.fwnode = fwnode_handle_get(pdevinfo->fwnode);
> + pdev->dev.of_node = to_of_node(pdev->dev.fwnode);
See above.
...
With them being addressed,
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* Re: [PATCH v2 00/19] driver core: count references of the platform device's fwnode, not OF node
From: Andy Shevchenko @ 2026-06-29 16:23 UTC (permalink / raw)
To: Bartosz Golaszewski
Cc: Lee Jones, Mark Brown, Thierry Reding, Sebastian Hesselbarth,
Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Srinivas Kandagatla, Greg Kroah-Hartman, Vinod Koul,
Rafael J. Wysocki, Danilo Krummrich, Rob Herring, Saravana Kannan,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy (CS GROUP), Andi Shyti, Joerg Roedel,
Will Deacon, Robin Murphy, Doug Berger, Florian Fainelli,
Broadcom internal kernel review list, Ulf Hansson, Frank Li,
Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
Matthew Brost, Thomas Hellström, Rodrigo Vivi, David Airlie,
Simona Vetter, Peter Chen, Paul Cercueil, Bin Liu, Philipp Zabel,
Maximilian Luz, Hans de Goede, Ilpo Järvinen,
Krzysztof Kozlowski, Benjamin Herrenschmidt, brgl, linux-kernel,
netdev, linux-arm-msm, linux-sound, driver-core, devicetree,
linuxppc-dev, linux-i2c, iommu, linux-pm, imx, linux-arm-kernel,
intel-xe, dri-devel, linux-usb, linux-mips, platform-driver-x86,
stable, Wolfram Sang
In-Reply-To: <20260629-pdev-fwnode-ref-v2-0-8abe2513f96e@oss.qualcomm.com>
On Mon, Jun 29, 2026 at 11:12:23AM +0200, Bartosz Golaszewski wrote:
> Platform device core provides helper interfaces for dealing with
> dynamically created platform devices. Most users should use
> platform_device_register_full() which encapsulates most of the
> operations but some modules will want to use the split approach of
> calling platform_device_alloc() + platform_device_add() separately for
> various reasons.
>
> With many platform devices now using dynamic software nodes as their
> primary firmware nodes and with the platform device interface being
> extended to also better cover the use-cases of secondary software nodes,
> I believe it makes sense to switch to counting the references of all
> kinds of firmware nodes.
>
> To that end, I identified all users of platform_device_alloc() that also
> assign dev.of_node or dev.fwnode manually. I noticed five cases where
> the references are not increased as they should (patches 1-5 fix these
> users) and provided three new functions in platform_device.h that now
> become the preferred interfaces for assigning firmware nodes to dynamic
> platform devices (in line with platform_device_add_data(),
> platform_device_add_resources(), etc.). The bulk of the patches in this
> series are small driver conversions to port all users to going through
> the new functions that now encapsulate the refcount logic. With that
> done, the final patch seamlessly switches to counting the references of
> all firmware node types.
>
> This effort is prerequisite of removing platform_device_release_full()
> and unifying the release path for dynamic platform devices using
> unmanaged software nodes.
>
> Merging strategy: The entire series should go through the driver core
> tree, possibly with an immutable branch provided to solve any potential
> conflicts though these are rather unlikely.
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
for patches 2-4 assuming they will be accompanied with patch 19 at the same
time.
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* Re: [PATCH net-next v2 2/8] net: mdio: realtek-rtl9300: Add page tracking
From: Andrew Lunn @ 2026-06-29 16:28 UTC (permalink / raw)
To: Markus Stockhausen
Cc: hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
In-Reply-To: <20260629152336.2239826-3-markus.stockhausen@gmx.de>
On Mon, Jun 29, 2026 at 05:23:30PM +0200, Markus Stockhausen wrote:
> The hardware polling unit of the Realtek switches has a very special
> handling for PHY register 31 (aka Realtek page register) in place.
>
> - On the RTL838x it is permanently reset to zero.
> - On other devices there is some magic saving/restoring (aka parking)
> in the background in place.
>
> This makes access to PHYs a gamble.
>
> As of now all known existing hardware designs have Realtek based 1G PHYs.
> Otherwise the polling engine and the MAC status update will not work at
> all and the vendor SDK would fail totally.
If you are going this direction, then please somehow validate the PHY
is a realtek PHY, and error out if it is not.
> This driver differentiates clearly between c22 and c45 buses. During
> probing it enables only one of the protocols for a bus. So it is safe
> to assume that any c22 access will only target a Realtek based 1G PHY.
>
> Intercept access to register 31 and store the desired value for each port
> in the driver. When issuing access to other registers add the saved page.
> This given, the hardware will run two consecutive c22 commands that are
> not interrupted by polling.
>
> ... hardware poll ...
> phy_write(phy, 31, page)
> phy_write(phy, reg, value)
> ... hardware poll ...
How do you guarantee the polling will not get between?
Andrew
^ permalink raw reply
* Re: [PATCH net-next v2 4/8] net: mdio: realtek-rtl9300: Configure hardware polling during probing
From: Andrew Lunn @ 2026-06-29 16:38 UTC (permalink / raw)
To: Markus Stockhausen
Cc: hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
In-Reply-To: <20260629152336.2239826-5-markus.stockhausen@gmx.de>
> +static int otto_emdio_notify_phy_attach(struct phy_device *phydev)
> +{
> + struct otto_emdio_priv *priv = otto_emdio_bus_to_priv(phydev->mdio.bus);
> + int port = otto_emdio_phy_to_port(phydev->mdio.bus, phydev->mdio.addr);
> + int ret;
> +
> + if (port < 0)
> + return port;
Here seems like a good place to check the PHY is a realtek PHY, and
return -ENODEV if not. That should cause phy_attach_direct() to fail,
making it impossible to configure the port up.
Andrew
^ permalink raw reply
* Re: [PATCH net] net/sched: act_bpf: use rcu_dereference_bh() to read the filter
From: Amery Hung @ 2026-06-29 16:38 UTC (permalink / raw)
To: Sechang Lim
Cc: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Jamal Hadi Salim, Jiri Pirko, Daniel Borkmann, John Fastabend,
Stanislav Fomichev, Alexei Starovoitov, Andrii Nakryiko,
Martin KaFai Lau, Simon Horman, bpf, netdev, linux-kernel
In-Reply-To: <20260629154112.1164986-1-rhkrqnwk98@gmail.com>
On Mon, Jun 29, 2026 at 8:54 AM Sechang Lim <rhkrqnwk98@gmail.com> wrote:
>
> tcf_bpf_act() can run from the tc egress path, which holds only
> rcu_read_lock_bh(), but reads prog->filter with rcu_dereference() and
> trips lockdep:
>
> WARNING: suspicious RCU usage
> net/sched/act_bpf.c:47 suspicious rcu_dereference_check() usage!
> 1 lock held by syz.2.1588/12756:
> #0: (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit net/core/dev.c:4792
> tcf_bpf_act+0x6ae/0x940 net/sched/act_bpf.c:47
> tcf_classify+0x6e4/0x1080 net/sched/cls_api.c:1860
> sch_handle_egress net/core/dev.c:4545 [inline]
> __dev_queue_xmit+0x2185/0x2c00 net/core/dev.c:4808
> packet_sendmsg+0x3dfa/0x5120 net/packet/af_packet.c:3114
>
> The other tc actions and cls_bpf already use rcu_dereference_bh() here.
> Do the same.
>
> Fixes: 1f211a1b929c ("net, sched: add clsact qdisc")
> Signed-off-by: Sechang Lim <rhkrqnwk98@gmail.com>
Reviewed-by: Amery Hung <ameryhung@gmail.com>
> ---
> net/sched/act_bpf.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
> index 58a074651176..09d46e195e33 100644
> --- a/net/sched/act_bpf.c
> +++ b/net/sched/act_bpf.c
> @@ -44,7 +44,7 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
> tcf_lastuse_update(&prog->tcf_tm);
> bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
>
> - filter = rcu_dereference(prog->filter);
> + filter = rcu_dereference_bh(prog->filter);
> if (at_ingress) {
> __skb_push(skb, skb->mac_len);
> filter_res = bpf_prog_run_data_pointers(filter, skb);
> --
> 2.43.0
>
>
^ permalink raw reply
* Re: [PATCH net-next v2 5/8] net: mdio: realtek-rtl9300: Add c45 over c22 mitigation
From: Andrew Lunn @ 2026-06-29 16:40 UTC (permalink / raw)
To: Markus Stockhausen
Cc: hkallweit1, linux, davem, edumazet, kuba, pabeni, netdev,
chris.packham, daniel, robh, krzk+dt, conor+dt, devicetree
In-Reply-To: <20260629152336.2239826-6-markus.stockhausen@gmx.de>
> Enhance the driver to detect this register 13/14/13/14 access sequence.
I still think this is the wrong way to do this, and you should look at
MDIO bus lock/unlock.
Andrew
^ permalink raw reply
* Re: [Intel-wired-lan] [PATCH 1/2] igc: Wait for MAC passthrough after reset
From: Ruinskiy, Dima @ 2026-06-29 16:42 UTC (permalink / raw)
To: Chia-Lin Kao (AceLan), Loktionov, Aleksandr, Nguyen, Anthony L,
Kitszel, Przemyslaw, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, intel-wired-lan@lists.osuosl.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <ajiHH-RaHUjgraMh@acelan-Precision-5480>
Hi AceLan,
Some more comments below.
On 22/06/2026 4:57, Chia-Lin Kao (AceLan) wrote:
>>
>> Because 100 iterations of 100msec each - this translates to up-to 10
>> seconds, no?
> Yes, just in case it takes longer.
> I think 5 seconds should be enough if you feel this is feasible.
5 seconds also sounds excessive, assuming the majority of the systems do
not support / enable MAC passthrough via the FW feature (and I have a
hunch it is the case). 2 seconds... maybe, but then you say that in some
rare cases it exceeds 2 seconds as well. What if in some even rare cases
it exceeds 5/10 seconds? Perhaps because of some glitch it will not come
up at all on this cycle. Should the driver always wait?
> I wish we can detect if the MAC passthrough is enabled, so that we
> know if we need to poll for the MAC address.
Unfortunately, like you I am not aware of any way for the driver to know
whether MAC passthrough via FW is enabled. Because of this we have been
exploring a simpler way to support this feature via ACPI objects (which
are set by the BIOS when MAC passthrough is enabled and are easy for the
driver to query). I know some vendors have already implemented it, and I
am currently drafting a patch to send.
> For the FW interrupt mechanism also needs BIOS support, and we don't
> have the power to push this.
The mechanism I have in mind does not require BIOS support - the
I225/I226 FW already supports the required interrupt, AFAIK - it merely
needs to be enabled in the igc driver. With that said, there still
remains the question of how to notify the network stack above us that
the MAC address has changed post-probe - and whether it is even supported.
--Dima
>
>>
>> Thanks,
>> Dima.
>>
>>>
>>>> Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
>>>> ---
>>>> drivers/net/ethernet/intel/igc/igc_main.c | 48
>>>> +++++++++++++++++++++++
>>>> 1 file changed, 48 insertions(+)
>>>>
>>>> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c
>>>> b/drivers/net/ethernet/intel/igc/igc_main.c
>>>> index 2c9e2dfd8499..fa9752ed8bc5 100644
>>>> --- a/drivers/net/ethernet/intel/igc/igc_main.c
>>>> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
>>>> @@ -11,6 +11,7 @@
>>>> #include <net/pkt_sched.h>
>>>> #include <linux/bpf_trace.h>
>>>> #include <net/xdp_sock_drv.h>
>>>> +#include <linux/etherdevice.h>
>>>> #include <linux/pci.h>
>>>> #include <linux/mdio.h>
>>>>
>>>> @@ -69,6 +70,52 @@ static const struct pci_device_id igc_pci_tbl[] = {
>>>>
>>>> MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
>>>>
>>>> +static void igc_read_rar0(struct igc_hw *hw, u8 *addr, u32 *ral, u32
>>>> +*rah) {
>>>> + *ral = rd32(IGC_RAL(0));
>>>> + *rah = rd32(IGC_RAH(0));
>>>> +
>>>> + addr[0] = *ral & 0xff;
>>>> + addr[1] = (*ral >> 8) & 0xff;
>>>> + addr[2] = (*ral >> 16) & 0xff;
>>>> + addr[3] = (*ral >> 24) & 0xff;
>>>> + addr[4] = *rah & 0xff;
>>>> + addr[5] = (*rah >> 8) & 0xff;
>>>> +}
>>>> +
>>>> +static bool igc_is_lmvp_device(struct pci_dev *pdev) {
>>>> + switch (pdev->device) {
>>>> + case IGC_DEV_ID_I225_LMVP:
>>>> + case IGC_DEV_ID_I226_LMVP:
>>>> + return true;
>>>> + default:
>>>> + return false;
>>>> + }
>>>> +}
>>>> +
>>>> +static void igc_wait_for_lmvp_mac_passthrough(struct pci_dev *pdev,
>>>> + struct igc_hw *hw)
>>>> +{
>>>> + u8 addr[ETH_ALEN] __aligned(2);
>>>> + u32 orig_ral, orig_rah;
>>>> + u32 ral, rah;
>>>> + int i;
>>>> +
>>>> + if (!igc_is_lmvp_device(pdev))
>>>> + return;
>>>> +
>>>> + igc_read_rar0(hw, addr, &orig_ral, &orig_rah);
>>>> +
>>>> + for (i = 0; i < 100; i++) {
>>>> + msleep(100);
>>>> + igc_read_rar0(hw, addr, &ral, &rah);
>>>> + if ((ral != orig_ral || rah != orig_rah) &&
>>>> + is_valid_ether_addr(addr))
>>>> + return;
>>>> + }
>>>> +}
>>>> +
>>>> enum latency_range {
>>>> lowest_latency = 0,
>>>> low_latency = 1,
>>>> @@ -7259,6 +7306,7 @@ static int igc_probe(struct pci_dev *pdev,
>>>> * known good starting state
>>>> */
>>>> hw->mac.ops.reset_hw(hw);
>>>> + igc_wait_for_lmvp_mac_passthrough(pdev, hw);
>>>>
>>>> if (igc_get_flash_presence_i225(hw)) {
>>>> if (hw->nvm.ops.validate(hw) < 0) {
>>>> --
>>>> 2.53.0
>>>
>>
^ permalink raw reply
* [PATCH v2] net/sched: hhf: clear heavy-hitter state on reset
From: Samuel Moelius @ 2026-06-29 16:44 UTC (permalink / raw)
To: Jamal Hadi Salim
Cc: Samuel Moelius, Jiri Pirko, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Terry Lam,
open list:TC subsystem, open list
HHF reset does not clear the classifier state used to identify heavy
hitters. Packets after reset can therefore be scheduled using flow
history from before the reset.
The reset operation should return the qdisc to an empty state.
Clear the heavy-hitter classifier tables when HHF is reset.
Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc")
Assisted-by: Codex:gpt-5.5-cyber-preview
Signed-off-by: Samuel Moelius <sam.moelius@trailofbits.com>
---
Changes in v2:
- Ensure no NULL dereference when initialization fails
net/sched/sch_hhf.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 1e25b75daae2..d85cb0263b67 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -462,12 +462,39 @@ static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
return skb;
}
+static void hhf_reset_classifier(struct hhf_sched_data *q)
+{
+ int i;
+
+ if (!q->hh_flows)
+ return;
+
+ for (i = 0; i < HH_FLOWS_CNT; i++) {
+ struct hh_flow_state *flow, *next;
+ struct list_head *head = &q->hh_flows[i];
+
+ list_for_each_entry_safe(flow, next, head, flowchain) {
+ list_del(&flow->flowchain);
+ kfree(flow);
+ }
+ }
+ WRITE_ONCE(q->hh_flows_current_cnt, 0);
+
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ if (q->hhf_valid_bits[i])
+ bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
+ }
+ q->hhf_arrays_reset_timestamp = hhf_time_stamp();
+}
+
static void hhf_reset(struct Qdisc *sch)
{
+ struct hhf_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
while ((skb = hhf_dequeue(sch)) != NULL)
rtnl_kfree_skbs(skb, skb);
+ hhf_reset_classifier(q);
}
static void hhf_destroy(struct Qdisc *sch)
--
2.43.0
^ permalink raw reply related
* Re: [PATCH net-next v11 1/7] dt-bindings: phy: document the serdes PHY on sa8255p
From: Bartosz Golaszewski @ 2026-06-29 16:54 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Magnus Damm, Maxime Ripard, Christophe Roullier, Radu Rendec,
linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <CAMuHMdVUBgG0EFB16OxHisbxx-sBvDKvBPNZdpyDnmBrnX4ptQ@mail.gmail.com>
On Mon, Jun 29, 2026 at 4:58 PM Geert Uytterhoeven <geert@linux-m68k.org> wrote:
>
> Hi Bartosz,
>
> On Mon, 29 Jun 2026 at 16:07, Bartosz Golaszewski <brgl@kernel.org> wrote:
> > On Mon, 29 Jun 2026 15:51:31 +0200, Geert Uytterhoeven
> > <geert@linux-m68k.org> said:
> > > On Mon, 29 Jun 2026 at 13:29, Bartosz Golaszewski
> > > <bartosz.golaszewski@oss.qualcomm.com> wrote:
> > >> Describe the SGMII/SerDes PHY present on the Qualcomm sa8255p platforms.
> > >> This is essentially the same hardware as sa8775p rev3 but the PHY is
> > >> managed by firmware over SCMI.
> > >
> > > So why can't it be reuse the DT bindings, and be compatible with
> > > qcom,sa8775p-dwmac-sgmii-phy?
> > >
> > >> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
> > >
> > >> --- /dev/null
> > >> +++ b/Documentation/devicetree/bindings/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml
> > >
> > >> + power-domains:
> > >> + maxItems: 1
> > >> +
> > >> + power-domain-names:
> > >> + items:
> > >> + - const: serdes
> > >
> > >> +examples:
> > >> + - |
> > >> + phy@8901000 {
> > >> + compatible = "qcom,sa8255p-dwmac-sgmii-phy";
> > >> + reg = <0x08901000 0xe10>;
> > >> + #phy-cells = <0>;
> > >> + power-domains = <&scmi7_dvfs 0>;
> > >> + power-domain-names = "serdes";
> > >
> > > Ah, this uses power-domains, while the existing bindings for
> > > qcom,sa8775p-dwmac-sgmii-phy use a clock.
> > > I guess the clock is the correct hardware description?
> > >
> > > Adding to my list of examples for backing a hardware-to-SCMI remapping
> > > driver...
> > >
> >
> > Russell King asked me to put the PHY logic for SCMI pm domains into the PHY
> > driver instead of the MAC driver where it was previously. Instead of cramming
> > both HLOS and firmware handling into the same driver, I figured it makes more
> > sense to have a dedicated, cleaner driver as the two share very little code (if
> > any).
>
> I think you are mixing up DT bindings and driver implementation?
>
Ah indeed, but the bindings don't share a lot of content either.
Bartosz
^ permalink raw reply
* Re: Ethtool : PRBS feature
From: Andrew Lunn @ 2026-06-29 16:56 UTC (permalink / raw)
To: Das, Shubham
Cc: Alexander Duyck, Lee Trager, Maxime Chevallier,
netdev@vger.kernel.org, mkubecek@suse.cz, D H, Siddaraju,
Chintalapalle, Balaji, Lindberg, Magnus,
niklas.damberg@ericsson.com
In-Reply-To: <SN7PR11MB810921E7DA70DB3F6FD1C41DFFE82@SN7PR11MB8109.namprd11.prod.outlook.com>
> + name: inject-error-count
> + type: u32
> + doc: |
> + Number of errors to inject. Each invocation injects the specified
> + number of bit errors into the data stream.
Sorry, but i could not implement that, in a sensible way, given its
current specification.
I suppose i could simply flip the first `inject-error-count` bits, and
make the rest of the stream perfect? I could also wait until the stop
command is received, and then flip that many bits before i stop the
stream? But none of these seem sensible.
Please make this specification have sufficient details, or references
to 802.3, that you could give it to another engineer and get back a
reasonable implementation, without having to answer any questions.
> + name: phy-test-act
> + doc: |
> + Configure PHY test parameters. Each attribute is optional and only
> + specified attributes are applied. TX/RX patterns are set on the
> + local port. BERT and error injection operate on the receiver port.
Error injection operates on the receive port? That is not what i
expected. I should go read 802.3, and understand how this is used.
Andrew
^ permalink raw reply
* RE: [PATCH net-next v5 14/15] dt-bindings: net: add onsemi's S2500
From: Selvamani Rajagopal @ 2026-06-29 17:07 UTC (permalink / raw)
To: Rob Herring
Cc: Andrew Lunn, Piergiorgio Beruto, Heiner Kallweit, Russell King,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Andrew Lunn, Parthiban Veerasooran, Richard Cochran,
Krzysztof Kozlowski, Conor Dooley, Simon Horman, Jonathan Corbet,
Shuah Khan, netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
devicetree@vger.kernel.org, linux-doc@vger.kernel.org, Jerry Ray
In-Reply-To: <20260615041056.GA1426553-robh@kernel.org>
> -----Original Message-----
> From: Rob Herring <robh@kernel.org>
> Sent: Sunday, June 14, 2026 9:11 PM
> To: Selvamani Rajagopal <Selvamani.Rajagopal@onsemi.com>
> Subject: Re: [PATCH net-next v5 14/15] dt-bindings: net: add onsemi's S2500
>
>
>
> And you are missing tags from prior versions. It is your responsibility
> to add them.
I added the prior version's link under each version. Somehow "b4 prep --show-revision" command doesn't pickup the older
versions. It shows v4 and v5 correctly as the emails containing patches were sent in a threaded manner.
But with v1,v2,v3, as each patch was sent through individual email (with proper subject line, of course) using outlook.
I don't know if there is a way to fix this.
>
> > changes in v5
> > - no changes
> > changes in v4:
> > - added spi-max-frequency as suggested by AI review
> > - changed interrupt to IRQ_TYPE_EDGE_FALLING as it is
> > being taken care in net (stable) branch
> > changes in v3
> > - Removed URL link that failed verification
> > changes in v2
> > - removed spi-max-frequency entry
> > - changed the compatible string to s2500
> > changes in v1
> > - Added the first version of YAML file for onsemi MAC-PHY
> >
^ permalink raw reply
* [PATCH net-next v6 01/15] net: phy: Helper to read and write through C45 without lock
From: Selvamani Rajagopal via B4 Relay @ 2026-06-29 17:23 UTC (permalink / raw)
To: Andrew Lunn, Piergiorgio Beruto, Heiner Kallweit, Russell King,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Andrew Lunn, Parthiban Veerasooran, Selva Rajagopal,
Richard Cochran, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
Simon Horman, Jonathan Corbet, Shuah Khan
Cc: netdev, linux-kernel, devicetree, linux-doc, Jerry Ray,
Selvamani Rajagopal
In-Reply-To: <20260629-s2500-mac-phy-support-v6-0-18ce79500371@onsemi.com>
From: Selvamani Rajagopal <Selvamani.Rajagopal@onsemi.com>
Generic helper function to initiate read and write through C45 bus
protocol without mdio bus lock. This will help PHYs to avoid indirect C22
API calls for C45 bus protocol which may not be supported by the PHY.
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Selvamani Rajagopal <Selvamani.Rajagopal@onsemi.com>
---
changes in v6
- No change
changes in v5
- no change
changes in v4
- lockdep_assert_held added to ensure correct calling convention
changes in v3
- Added the genphy APIs to initiate Clause 45 register read/write
- first patch
---
drivers/net/phy/phy_device.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/phy.h | 4 ++++
2 files changed, 59 insertions(+)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0615228459ef..b82b99d08132 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2787,6 +2787,61 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
}
EXPORT_SYMBOL(genphy_write_mmd_unsupported);
+/**
+ * genphy_phy_read_mmd - Helper for reading a register without lock
+ * from the given MMD and PHY.
+ * @phydev: The phy_device struct
+ * @devnum: The MMD to read from
+ * @regnum: The register on the MMD to read
+ *
+ * Description: PHYs can have both C22 and C45 registers space. Once PHY
+ * is discovered via C22 bus protocol, it uses C22 indirect access to
+ * access C45 registers. Some PHYs, like 10Base-T1S PHYs defined by OPEN
+ * Alliance 10BASE‑T1x, support only direct access.
+ *
+ * If PHY indicates C45 support through DTS entry, it avoid C22 APIs
+ * entirely and therefore generic MDIO registers are inaccessible.
+ *
+ * MDIO bus isn't locked here because when called through read_mmd
+ * callback of phy_driver, caller is expected to lock the bus as
+ * implemented in phy_read_mmd.
+ *
+ * Returns: Register value if successful, negative error code on failure.
+ */
+int genphy_phy_read_mmd(struct phy_device *phydev, int devnum,
+ u16 regnum)
+{
+ struct mii_bus *bus = phydev->mdio.bus;
+ int addr = phydev->mdio.addr;
+
+ lockdep_assert_held(&bus->mdio_lock);
+ return __mdiobus_c45_read(bus, addr, devnum, regnum);
+}
+EXPORT_SYMBOL(genphy_phy_read_mmd);
+
+/**
+ * genphy_phy_write_mmd - Helper for writing a register without lock
+ * to the given MMD and PHY.
+ * @phydev: The phy_device struct
+ * @devnum: The MMD to write to
+ * @regnum: The register on the MMD to write
+ * @val: Value to write
+ *
+ * Description: Similar to genphy_phy_read_mmd
+ *
+ * Returns: 0 if successful, negative error code on failure.
+ */
+int genphy_phy_write_mmd(struct phy_device *phydev, int devnum,
+ u16 regnum, u16 val)
+{
+ struct mii_bus *bus = phydev->mdio.bus;
+ int addr = phydev->mdio.addr;
+
+ lockdep_assert_held(&bus->mdio_lock);
+ return __mdiobus_c45_write(bus, addr, devnum, regnum, val);
+}
+EXPORT_SYMBOL(genphy_phy_write_mmd);
+
int genphy_suspend(struct phy_device *phydev)
{
return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 199a7aaa341b..8266dd4a8dbe 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2301,6 +2301,10 @@ int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
u16 regnum);
int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
u16 regnum, u16 val);
+int genphy_phy_write_mmd(struct phy_device *phydev, int devnum,
+ u16 regnum, u16 val);
+int genphy_phy_read_mmd(struct phy_device *phydev, int devnum,
+ u16 regnum);
/* Clause 37 */
int genphy_c37_config_aneg(struct phy_device *phydev);
--
2.43.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox