* [PATCH net-next v4 04/13] dpaa2-switch: create a separate dpaa2_switch_port_fdb_event() function
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
Create a separate dpaa2_switch_port_fdb_event() function that will only
handle the FDB related events. With this change, the
dpaa2_switch_port_event() notifier handler can be written in a way that
it's easier to follow.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- none
Changes in v3:
- Get hold on port_priv->ethsw_data only after we know the device is a
dpaa2-switch one
Changes in v2:
- none
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 28 ++++++++++++++-----
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index eacab00b586a..c7c84bf2fde7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2337,21 +2337,18 @@ static void dpaa2_switch_event_work(struct work_struct *work)
dev_put(dev);
}
-/* Called under rcu_read_lock() */
-static int dpaa2_switch_port_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct ethsw_port_priv *port_priv = netdev_priv(dev);
struct ethsw_switchdev_event_work *switchdev_work;
struct switchdev_notifier_fdb_info *fdb_info = ptr;
- struct ethsw_core *ethsw = port_priv->ethsw_data;
-
- if (event == SWITCHDEV_PORT_ATTR_SET)
- return dpaa2_switch_port_attr_set_event(dev, ptr);
+ struct ethsw_core *ethsw;
if (!dpaa2_switch_port_dev_check(dev))
return NOTIFY_DONE;
+ ethsw = port_priv->ethsw_data;
switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC);
if (!switchdev_work)
@@ -2390,6 +2387,23 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
return NOTIFY_BAD;
}
+/* Called under rcu_read_lock() */
+static int dpaa2_switch_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ return dpaa2_switch_port_attr_set_event(dev, ptr);
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ return dpaa2_switch_port_fdb_event(nb, event, ptr);
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int dpaa2_switch_port_obj_event(unsigned long event,
struct net_device *netdev,
struct switchdev_notifier_port_obj_info *port_obj_info)
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 05/13] dpaa2-switch: check early if an FDB entry should be added
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
Instead of waiting until the last moment to check if an FDB entry should
be added to HW, move the check earlier (before even scheduling the work
item) so that we don't just waste time.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- none
Changes in v3:
- none
Changes in v2:
- none
---
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index c7c84bf2fde7..d4975d08fa44 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2308,8 +2308,6 @@ static void dpaa2_switch_event_work(struct work_struct *work)
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (!fdb_info->added_by_user || fdb_info->is_local)
- break;
if (is_unicast_ether_addr(fdb_info->addr))
err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
fdb_info->addr);
@@ -2323,8 +2321,6 @@ static void dpaa2_switch_event_work(struct work_struct *work)
&fdb_info->info, NULL);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (!fdb_info->added_by_user || fdb_info->is_local)
- break;
if (is_unicast_ether_addr(fdb_info->addr))
dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
else
@@ -2350,6 +2346,9 @@ static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
return NOTIFY_DONE;
ethsw = port_priv->ethsw_data;
+ if (!fdb_info->added_by_user || fdb_info->is_local)
+ return NOTIFY_DONE;
+
switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC);
if (!switchdev_work)
return NOTIFY_BAD;
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 06/13] dpaa2-switch: add dpaa2_switch_port_to_bridge_port() helper
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
In preparation for adding offloading support for upper bond devices we
have to let the switchdev framework know if a specific bridge port is
offloaded or not, even if that brport is an upper device.
For this to happen, create the dpaa2_switch_port_to_bridge_port function
which will determine the bridge port corresponding to a particular DPAA2
switch interface and use it in the switchdev_bridge_port_offload call.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- Split the patch so that the first part only adds the base function and
its call sites and the logic aroung lag is added later in the patch
which actually adds the support for LAG.
- Moved the patch so that it's a preparatory patch
Changes in v3:
- Access lag field through rtnl_dereference() so that we adapt to the
__rcu change.
- Check that the brport is non-NULL before calling
switchdev_bridge_port_unoffload() on it.
Changes in v2:
- none
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 23 ++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index d4975d08fa44..88d199befbd9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2017,6 +2017,15 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
return notifier_from_errno(err);
}
+static struct net_device *
+dpaa2_switch_port_to_bridge_port(struct ethsw_port_priv *port_priv)
+{
+ if (!port_priv->fdb->bridge_dev)
+ return NULL;
+
+ return port_priv->netdev;
+}
+
static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
struct net_device *upper_dev,
struct netlink_ext_ack *extack)
@@ -2024,6 +2033,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct net_device *brport_dev;
bool learn_ena;
int err;
@@ -2035,7 +2045,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
dpaa2_switch_port_set_fdb(port_priv, upper_dev, true);
/* Inherit the initial bridge port learning state */
- learn_ena = br_port_flag_is_set(netdev, BR_LEARNING);
+ brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
+ learn_ena = br_port_flag_is_set(brport_dev, BR_LEARNING);
err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
port_priv->learn_ena = learn_ena;
@@ -2049,7 +2060,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
if (err)
goto err_egress_flood;
- err = switchdev_bridge_port_offload(netdev, netdev, NULL,
+ brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
+ err = switchdev_bridge_port_offload(brport_dev, netdev, NULL,
NULL, NULL, false, extack);
if (err)
goto err_switchdev_offload;
@@ -2086,8 +2098,13 @@ static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct net_device *brport_dev;
+
+ brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
+ if (!brport_dev)
+ return;
- switchdev_bridge_port_unoffload(netdev, NULL, NULL, NULL);
+ switchdev_bridge_port_unoffload(brport_dev, NULL, NULL, NULL);
/* Make sure that any FDB add/del operations are completed before the
* bridge layout changes
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 07/13] dpaa2-switch: consolidate unicast and multicast management
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
This patch consolidates the unicast and multicast management by creating
two new functions - dpaa2_switch_port_fdb_[add|del]() - which can be
used for either uc or mc addresses. Having this common entrypoint for
both types of addresses will help us in the next patches to streamline
the same addresses but on LAG ports.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- Moved the commit ordering, no actual code changes
Changes in v3:
- none
Changes in v2:
- The rollback in dpaa2_switch_port_mdb_add() uses the newly introduced
dpaa2_switch_port_fdb_del() helper instead of the _mc counterpart.
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 39 +++++++++++++------
1 file changed, 27 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 88d199befbd9..3472f5d5b08a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -552,6 +552,28 @@ static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
return err;
}
+static int dpaa2_switch_port_fdb_add(struct ethsw_port_priv *port_priv,
+ const unsigned char *addr)
+{
+ int err;
+
+ if (is_unicast_ether_addr(addr))
+ err = dpaa2_switch_port_fdb_add_uc(port_priv, addr);
+ else
+ err = dpaa2_switch_port_fdb_add_mc(port_priv, addr);
+
+ return err;
+}
+
+static int dpaa2_switch_port_fdb_del(struct ethsw_port_priv *port_priv,
+ const unsigned char *addr)
+{
+ if (is_unicast_ether_addr(addr))
+ return dpaa2_switch_port_fdb_del_uc(port_priv, addr);
+ else
+ return dpaa2_switch_port_fdb_del_mc(port_priv, addr);
+}
+
static void dpaa2_switch_port_get_stats(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -1880,7 +1902,7 @@ static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
- return dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr);
+ return dpaa2_switch_port_fdb_add(port_priv, mdb->addr);
}
static int dpaa2_switch_port_obj_add(struct net_device *netdev,
@@ -1984,7 +2006,7 @@ static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
- return dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+ return dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
}
static int dpaa2_switch_port_obj_del(struct net_device *netdev,
@@ -2325,12 +2347,8 @@ static void dpaa2_switch_event_work(struct work_struct *work)
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (is_unicast_ether_addr(fdb_info->addr))
- err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
- fdb_info->addr);
- else
- err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
- fdb_info->addr);
+ err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
+ fdb_info->addr);
if (err)
break;
fdb_info->offloaded = true;
@@ -2338,10 +2356,7 @@ static void dpaa2_switch_event_work(struct work_struct *work)
&fdb_info->info, NULL);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (is_unicast_ether_addr(fdb_info->addr))
- dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
- else
- dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr);
+ dpaa2_switch_port_fdb_del(netdev_priv(dev), fdb_info->addr);
break;
}
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 08/13] dpaa2-switch: add LAG configuration API
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
Add the necessary APIs to configure and control the LAG support on the
DPAA2 switch object.
- The dpsw_lag_set() function will be used to either verify that a LAG
configuration can be support or to actually apply it in HW.
- The dpsw_if_set_lag_state() will get used in the next patches to
change the per port LAG state of a specific DPSW interface.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- None
Changes in v3:
- Add a check in dpsw_lag_set() for cfg->num_ifs against
DPSW_MAX_LAG_IFS
- Add kerneldoc for the dpsw_lag_cfg structure.
Changes in v2:
- none
---
.../net/ethernet/freescale/dpaa2/dpsw-cmd.h | 18 +++++-
drivers/net/ethernet/freescale/dpaa2/dpsw.c | 60 +++++++++++++++++++
drivers/net/ethernet/freescale/dpaa2/dpsw.h | 30 ++++++++++
3 files changed, 107 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 397d55f2bd99..9a2055c64983 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -12,7 +12,7 @@
/* DPSW Version */
#define DPSW_VER_MAJOR 8
-#define DPSW_VER_MINOR 9
+#define DPSW_VER_MINOR 13
#define DPSW_CMD_BASE_VERSION 1
#define DPSW_CMD_VERSION_2 2
@@ -92,11 +92,14 @@
#define DPSW_CMDID_CTRL_IF_SET_POOLS DPSW_CMD_ID(0x0A1)
#define DPSW_CMDID_CTRL_IF_ENABLE DPSW_CMD_ID(0x0A2)
#define DPSW_CMDID_CTRL_IF_DISABLE DPSW_CMD_ID(0x0A3)
+#define DPSW_CMDID_SET_LAG DPSW_CMD_V2(0x0A4)
#define DPSW_CMDID_CTRL_IF_SET_QUEUE DPSW_CMD_ID(0x0A6)
#define DPSW_CMDID_SET_EGRESS_FLOOD DPSW_CMD_ID(0x0AC)
#define DPSW_CMDID_IF_SET_LEARNING_MODE DPSW_CMD_ID(0x0AD)
+#define DPSW_CMDID_IF_SET_LAG_STATE DPSW_CMD_ID(0x0B0)
+
/* Macros for accessing command fields smaller than 1byte */
#define DPSW_MASK(field) \
GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \
@@ -552,5 +555,18 @@ struct dpsw_cmd_if_reflection {
/* only 2 bits from the LSB */
u8 filter;
};
+
+struct dpsw_cmd_lag {
+ u8 group_id;
+ u8 num_ifs;
+ u8 pad[6];
+ u8 if_id[DPSW_MAX_LAG_IFS];
+ u8 phase;
+};
+
+struct dpsw_cmd_if_set_lag_state {
+ __le16 if_id;
+ u8 tx_enabled;
+};
#pragma pack(pop)
#endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index ab921d75deb2..f75cbdce42ba 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1659,3 +1659,63 @@ int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
return mc_send_command(mc_io, &cmd);
}
+
+/**
+ * dpsw_lag_set() - Set LAG configuration
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @cfg: pointer to LAG configuration
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpsw_lag_set(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ const struct dpsw_lag_cfg *cfg)
+{
+ struct fsl_mc_command cmd = { 0 };
+ struct dpsw_cmd_lag *cmd_params;
+ int i = 0;
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_LAG, cmd_flags, token);
+
+ if (cfg->num_ifs > DPSW_MAX_LAG_IFS)
+ return -EOPNOTSUPP;
+
+ cmd_params = (struct dpsw_cmd_lag *)cmd.params;
+ cmd_params->group_id = cfg->group_id;
+ cmd_params->num_ifs = cfg->num_ifs;
+ cmd_params->phase = cfg->phase;
+
+ for (i = 0; i < cfg->num_ifs; i++)
+ cmd_params->if_id[i] = cfg->if_id[i];
+
+ return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_set_lag_state() - Change per port LAG state
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @if_id: ID of the switch interface
+ * @tx_enabled: Value of the per port LAG state
+ * - 0 if the interface will not be active as part of the LAG group
+ * - 1 if the interface will be active in the LAG group
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_lag_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, u8 tx_enabled)
+{
+ struct dpsw_cmd_if_set_lag_state *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LAG_STATE,
+ cmd_flags, token);
+
+ cmd_params = (struct dpsw_cmd_if_set_lag_state *)cmd.params;
+ cmd_params->if_id = cpu_to_le16(if_id);
+ cmd_params->tx_enabled = tx_enabled;
+
+ return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index b90bd363f47a..89f0267de8e9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -20,6 +20,8 @@ struct fsl_mc_io;
#define DPSW_MAX_IF 64
+#define DPSW_MAX_LAG_IFS 8
+
int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token);
int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
@@ -788,4 +790,32 @@ int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
u16 if_id, const struct dpsw_reflection_cfg *cfg);
+
+/* Link Aggregation Group configuration */
+
+#define DPSW_LAG_SET_PHASE_APPLY 0
+#define DPSW_LAG_SET_PHASE_CHECK 1
+
+/**
+ * struct dpsw_lag_cfg - Configuration structure for a LAG group
+ * @group_id: Link aggregation group ID. Valid values are in the
+ * [1, DPSW_MAX_LAG_IFS] range.
+ * @num_ifs: Number of interfaces in this LAG group, valid range is
+ * [0, DPSW_MAX_LAG_IFS].
+ * @if_id: Array containing the interface IDs of the ports part of a LAG group
+ * @phase: Use DPSW_LAG_SET_PHASE_APPLY for LAG configuration processing or
+ * DPSW_LAG_SET_PHASE_CHECK for LAG configuration validation.
+ */
+struct dpsw_lag_cfg {
+ u8 group_id;
+ u8 num_ifs;
+ u8 if_id[DPSW_MAX_LAG_IFS];
+ u8 phase;
+};
+
+int dpsw_lag_set(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ const struct dpsw_lag_cfg *cfg);
+
+int dpsw_if_set_lag_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, u8 tx_enabled);
#endif /* __FSL_DPSW_H */
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 09/13] dpaa2-switch: add support for LAG offload
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
This patch adds the bulk of the changes needed in order to support
offloading of an upper bond device.
First of all, handling of the NETDEV_CHANGEUPPER and
NETDEV_PRECHANGEUPPER events is extended so that the driver is capable
to handle joining or leaving an upper bond device.
All the restrictions around the LAG offload support are added in the
newly added dpaa2_switch_pre_lag_join() function.
The same events are extended to also detect if one of our upper bond
devices changes its own upper device. In this case, on each lower device
that is DPAA2 the corresponding dpaa2_switch_port_[pre]changeupper()
function will be called. This will start the process of joining the same
FDB as the one used by the bridge device.
Setting the 'offload_fwd_mark' field on the skbs is also extended to be
setup not only when the port is under a bridge but also under a bond
device that is offloaded.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- Add a defensive check in dpaa2_switch_port_bond_leave() for a NULL
port_priv->lag
- Extend the dpaa2_switch_prevent_bridging_with_8021q_upper() function
so that we prevent a bond device with VLAN uppers joinging a bridge.
The restriction is related to VLAN management in terms of the FDB which
can change upon a topology change. VLAN uppers can only be added once
the bridge topology is setup.
- Remove all FDB management from the bond join/leave paths. Decided to
reconfigure the FDB only on bridge join/leave since the FDB determines
the forwarding domain and when a bond is not bridged, from a
configuration standpoint, the individual lowers can be viewed as
standalone.
- Moved here the update to the dpaa2_switch_port_to_bridge_port()
function so that the LAG state is taken into account.
- Add a new per LAG field - primary - which is used to keep track of the
primary port of a LAG group instead of determining each time we need to
use it.
- Set 'skb->offload_fwd_mark' only when the port is under a bridge.
Changes in v3:
- Fix logic in prechangeupper callback in order to not call
dpaa2_switch_prechangeupper_sanity_checks() on !info->linking
- Fixed up the logic in the dpaa2_switch_port_bond_join()'s error path
so that the FDBs are cleaned-up properly and we do not end-up with FDB's
leaked, meaning that they could have been marked as in-use but actually
no port was using it.
- Mark the port_priv->lag field as __rcu and use the proper accesors for
it. This will eventually become useful in a later patch when the lag
field will be accessed concurrently from the NAPI context and the
join/leave paths
Changes in v2:
- Extend dpaa2_switch_prechangeupper_sanity_checks() with
netdev_walk_all_lower_dev() so that checks are done on all lower devices
of a bridge, even for the lowers of a bridged bond.
- Manage better the default VLAN on bond join
- Clean-up the error path in dpaa2_switch_port_bond_join()
- Call dpaa2_switch_port_bridge_leave() in case a port is leaving a bond
which is also a bridged port
- Update dpaa2_switch_port_bond_leave() so that in case of any failure
the driver tries to cleanup the LAG offload configuration.
- Call switchdev_bridge_port_unoffload() in a switch port is leaving a
bridge bond device.
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 473 +++++++++++++++++-
.../ethernet/freescale/dpaa2/dpaa2-switch.h | 15 +-
2 files changed, 476 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 3472f5d5b08a..949a7241a00f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -51,6 +51,17 @@ dpaa2_switch_filter_block_get_unused(struct ethsw_core *ethsw)
return NULL;
}
+static struct dpaa2_switch_lag *
+dpaa2_switch_lag_get_unused(struct ethsw_core *ethsw)
+{
+ int i;
+
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+ if (!ethsw->lags[i].in_use)
+ return ðsw->lags[i];
+ return NULL;
+}
+
static bool dpaa2_switch_fdb_in_use_by_others(struct ethsw_core *ethsw,
struct dpaa2_switch_fdb *fdb,
struct ethsw_port_priv *except)
@@ -2042,9 +2053,15 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
static struct net_device *
dpaa2_switch_port_to_bridge_port(struct ethsw_port_priv *port_priv)
{
+ struct dpaa2_switch_lag *lag;
+
if (!port_priv->fdb->bridge_dev)
return NULL;
+ lag = rtnl_dereference(port_priv->lag);
+ if (lag)
+ return lag->bond_dev;
+
return port_priv->netdev;
}
@@ -2193,30 +2210,53 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
false);
}
+static int
+dpaa2_switch_have_vlan_upper(struct net_device *upper_dev,
+ __always_unused struct netdev_nested_priv *priv)
+{
+ return is_vlan_dev(upper_dev);
+}
+
static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *netdev)
{
- struct net_device *upper_dev;
- struct list_head *iter;
+ struct netdev_nested_priv priv = {};
/* RCU read lock not necessary because we have write-side protection
- * (rtnl_mutex), however a non-rcu iterator does not exist.
+ * (rtnl_mutex), however a non-rcu iterator does not exist. Walk the
+ * entire upper chain so that a VLAN device stacked on a intermediate
+ * bond is caught too.
*/
- netdev_for_each_upper_dev_rcu(netdev, upper_dev, iter)
- if (is_vlan_dev(upper_dev))
- return -EOPNOTSUPP;
+ if (netdev_walk_all_upper_dev_rcu(netdev, dpaa2_switch_have_vlan_upper,
+ &priv))
+ return -EOPNOTSUPP;
return 0;
}
+static int dpaa2_switch_check_dpsw_instance(struct net_device *dev,
+ struct netdev_nested_priv *priv)
+{
+ struct ethsw_port_priv *port_priv = (struct ethsw_port_priv *)priv->data;
+ struct ethsw_port_priv *other_priv = netdev_priv(dev);
+
+ if (!dpaa2_switch_port_dev_check(dev))
+ return 0;
+
+ if (other_priv->ethsw_data == port_priv->ethsw_data)
+ return 0;
+
+ return 1;
+}
+
static int
dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
struct net_device *upper_dev,
struct netlink_ext_ack *extack)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
- struct ethsw_port_priv *other_port_priv;
- struct net_device *other_dev;
- struct list_head *iter;
+ struct netdev_nested_priv data = {
+ .data = (void *)port_priv,
+ };
int err;
if (!br_vlan_enabled(upper_dev)) {
@@ -2231,6 +2271,70 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
return err;
}
+ err = netdev_walk_all_lower_dev(upper_dev,
+ dpaa2_switch_check_dpsw_instance,
+ &data);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interface from a different DPSW is in the bridge already");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dpaa2_switch_pre_lag_join(struct net_device *netdev,
+ struct net_device *upper_dev,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct ethsw_port_priv *other_port_priv;
+ struct dpaa2_switch_lag *lag = NULL;
+ struct dpsw_lag_cfg cfg = {0};
+ struct net_device *other_dev;
+ int i, num_ifs = 0, err;
+ struct list_head *iter;
+
+ if (!(ethsw->features & ETHSW_FEATURE_LAG_OFFLOAD)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG offload is supported only for DPSW >= v8.13");
+ return -EOPNOTSUPP;
+ }
+
+ if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only offload LAG using hash TX type");
+ return -EOPNOTSUPP;
+ }
+
+ if (info->hash_type != NETDEV_LAG_HASH_L23) {
+ NL_SET_ERR_MSG_MOD(extack, "Can only offload L2+L3 Tx hash");
+ return -EOPNOTSUPP;
+ }
+
+ if (!dpaa2_switch_port_has_mac(port_priv)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only switch interfaces connected to MACs can be under a LAG");
+ return -EINVAL;
+ }
+
+ if (vlan_uses_dev(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot join a LAG upper that has a VLAN");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ if (!ethsw->lags[i].in_use)
+ continue;
+ if (ethsw->lags[i].bond_dev != upper_dev)
+ continue;
+ lag = ðsw->lags[i];
+ break;
+ }
+
netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
if (!dpaa2_switch_port_dev_check(other_dev))
continue;
@@ -2238,11 +2342,229 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
other_port_priv = netdev_priv(other_dev);
if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
NL_SET_ERR_MSG_MOD(extack,
- "Interface from a different DPSW is in the bridge already");
+ "Interface from a different DPSW is in the bond already");
+ return -EINVAL;
+ }
+
+ cfg.if_id[num_ifs++] = other_port_priv->idx;
+
+ if (num_ifs >= DPSW_MAX_LAG_IFS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot add more than 8 DPAA2 switch ports under the same bond");
return -EINVAL;
}
}
+ if (lag) {
+ cfg.group_id = lag->id;
+ cfg.if_id[num_ifs++] = port_priv->idx;
+ cfg.num_ifs = num_ifs;
+ cfg.phase = DPSW_LAG_SET_PHASE_CHECK;
+
+ err = dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload LAG configuration");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static void dpaa2_switch_port_set_lag_group(struct ethsw_port_priv *port_priv,
+ struct net_device *bond_dev)
+{
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct ethsw_port_priv *other_port_priv = NULL;
+ struct dpaa2_switch_lag *lag = NULL;
+ struct dpaa2_switch_lag *other_lag;
+ struct net_device *other_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(bond_dev, other_dev, iter) {
+ if (!dpaa2_switch_port_dev_check(other_dev))
+ continue;
+
+ other_port_priv = netdev_priv(other_dev);
+ other_lag = rtnl_dereference(other_port_priv->lag);
+ if (!other_lag)
+ continue;
+
+ if (other_lag->bond_dev == bond_dev) {
+ rcu_assign_pointer(port_priv->lag, other_lag);
+ return;
+ }
+ }
+
+ /* This is the first interface to be added under a bond device. Find an
+ * unused LAG group. No need to check for NULL since there are the same
+ * amount of DPSW ports as LAG groups, meaning that each port can have
+ * its own LAG group.
+ */
+ lag = dpaa2_switch_lag_get_unused(ethsw);
+ lag->in_use = true;
+ lag->bond_dev = bond_dev;
+ lag->primary = port_priv;
+ rcu_assign_pointer(port_priv->lag, lag);
+}
+
+static bool dpaa2_switch_port_in_lag(struct ethsw_port_priv *port_priv,
+ struct net_device *bond_dev)
+{
+ struct dpaa2_switch_lag *lag;
+
+ if (!port_priv)
+ return false;
+
+ lag = rtnl_dereference(port_priv->lag);
+ return lag && lag->bond_dev == bond_dev;
+}
+
+static int dpaa2_switch_set_lag_cfg(struct net_device *bond_dev, u8 lag_id,
+ struct ethsw_core *ethsw)
+{
+ struct dpaa2_switch_lag *lag = ðsw->lags[lag_id - 1];
+ struct ethsw_port_priv *primary, *new_primary = NULL;
+ struct ethsw_port_priv *port_priv = NULL;
+ struct dpsw_lag_cfg cfg = {0};
+ u8 num_ifs = 0;
+ int err, i;
+
+ cfg.group_id = lag_id;
+
+ /* Determine the primary port. The caller clears ->lag on the port that
+ * is leaving, so a NULL ->lag on the current primary means it is the
+ * one leaving: elect the first remaining member as the new primary.
+ * Otherwise keep the current primary.
+ */
+ if (rtnl_dereference(lag->primary->lag)) {
+ primary = lag->primary;
+ } else {
+ primary = NULL;
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ if (dpaa2_switch_port_in_lag(ethsw->ports[i], bond_dev)) {
+ new_primary = ethsw->ports[i];
+ primary = new_primary;
+ break;
+ }
+ }
+ }
+
+ /* Build the interface list, always placing the primary first */
+ if (primary)
+ cfg.if_id[num_ifs++] = primary->idx;
+
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ port_priv = ethsw->ports[i];
+ if (port_priv == primary)
+ continue;
+ if (!dpaa2_switch_port_in_lag(port_priv, bond_dev))
+ continue;
+
+ cfg.if_id[num_ifs++] = port_priv->idx;
+ }
+ cfg.num_ifs = num_ifs;
+
+ /* No more interfaces under this LAG group, mark it as not in use. Wait
+ * for a grace period so that any readers of the lag structure finished.
+ */
+ if (!num_ifs) {
+ synchronize_net();
+
+ lag->bond_dev = NULL;
+ lag->primary = NULL;
+ lag->in_use = false;
+ }
+
+ err = dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg);
+ if (err)
+ return err;
+
+ if (new_primary) {
+ synchronize_net();
+ lag->primary = new_primary;
+ }
+
+ return 0;
+}
+
+static int dpaa2_switch_port_bond_join(struct net_device *netdev,
+ struct net_device *bond_dev,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct net_device *bridge_dev;
+ struct dpaa2_switch_lag *lag;
+ int err = 0;
+ u8 lag_id;
+
+ /* Setup the port_priv->lag pointer for this switch port */
+ dpaa2_switch_port_set_lag_group(port_priv, bond_dev);
+
+ /* Create the LAG configuration and apply it in MC */
+ lag = rtnl_dereference(port_priv->lag);
+ lag_id = lag->id;
+ err = dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw);
+ if (err)
+ goto err_lag_cfg;
+
+ /* If the bond device is a switch port, join the bridge as well */
+ bridge_dev = netdev_master_upper_dev_get(bond_dev);
+ if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+ return 0;
+
+ err = dpaa2_switch_port_bridge_join(netdev, bridge_dev, extack);
+ if (err)
+ goto err_lag_cfg;
+
+ return err;
+
+err_lag_cfg:
+ rcu_assign_pointer(port_priv->lag, NULL);
+ dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw);
+
+ return err;
+}
+
+static int dpaa2_switch_port_bond_leave(struct net_device *netdev,
+ struct net_device *bond_dev)
+{
+ struct net_device *bridge_dev = netdev_master_upper_dev_get(bond_dev);
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct dpaa2_switch_lag *lag = rtnl_dereference(port_priv->lag);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct net_device *brpdev;
+ bool learn_ena;
+ int err;
+
+ if (!lag)
+ return 0;
+
+ /* Recreate the LAG configuration for the LAG group that we left. */
+ rcu_assign_pointer(port_priv->lag, NULL);
+ dpaa2_switch_set_lag_cfg(bond_dev, lag->id, ethsw);
+
+ if (bridge_dev && netif_is_bridge_master(bridge_dev)) {
+ /* Make sure that the new primary inherits the learning state */
+ if (lag->primary) {
+ brpdev = dpaa2_switch_port_to_bridge_port(lag->primary);
+ learn_ena = br_port_flag_is_set(brpdev, BR_LEARNING);
+ err = dpaa2_switch_port_set_learning(lag->primary,
+ learn_ena);
+ if (err)
+ return err;
+ lag->primary->learn_ena = learn_ena;
+ }
+
+ /* In case the bond is a bridge port, leave the upper bridge as
+ * well.
+ */
+ return dpaa2_switch_port_bridge_leave(netdev);
+ }
+
return 0;
}
@@ -2250,8 +2572,8 @@ static int dpaa2_switch_port_prechangeupper(struct net_device *netdev,
struct netdev_notifier_changeupper_info *info)
{
struct ethsw_port_priv *port_priv;
+ struct net_device *upper_dev, *br;
struct netlink_ext_ack *extack;
- struct net_device *upper_dev;
int err;
if (!dpaa2_switch_port_dev_check(netdev))
@@ -2268,6 +2590,24 @@ static int dpaa2_switch_port_prechangeupper(struct net_device *netdev,
if (!info->linking)
dpaa2_switch_port_pre_bridge_leave(netdev);
+ } else if (netif_is_lag_master(upper_dev)) {
+ if (!info->linking) {
+ if (netif_is_bridge_port(upper_dev))
+ dpaa2_switch_port_pre_bridge_leave(netdev);
+ return 0;
+ }
+
+ if (netif_is_bridge_port(upper_dev)) {
+ br = netdev_master_upper_dev_get(upper_dev);
+ err = dpaa2_switch_prechangeupper_sanity_checks(netdev,
+ br,
+ extack);
+ if (err)
+ return err;
+ }
+
+ return dpaa2_switch_pre_lag_join(netdev, upper_dev,
+ info->upper_info, extack);
} else if (is_vlan_dev(upper_dev)) {
port_priv = netdev_priv(netdev);
if (port_priv->fdb->bridge_dev) {
@@ -2299,6 +2639,80 @@ static int dpaa2_switch_port_changeupper(struct net_device *netdev,
extack);
else
return dpaa2_switch_port_bridge_leave(netdev);
+ } else if (netif_is_lag_master(upper_dev)) {
+ if (info->linking)
+ return dpaa2_switch_port_bond_join(netdev, upper_dev,
+ info->upper_info,
+ extack);
+ else
+ return dpaa2_switch_port_bond_leave(netdev, upper_dev);
+ }
+
+ return 0;
+}
+
+static int
+dpaa2_switch_lag_prechangeupper(struct net_device *netdev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = 0;
+
+ if (!netif_is_lag_master(netdev))
+ return 0;
+
+ netdev_for_each_lower_dev(netdev, lower, iter) {
+ if (!dpaa2_switch_port_dev_check(lower))
+ continue;
+
+ err = dpaa2_switch_port_prechangeupper(lower, info);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static int
+dpaa2_switch_lag_changeupper(struct net_device *netdev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = 0;
+
+ if (!netif_is_lag_master(netdev))
+ return 0;
+
+ netdev_for_each_lower_dev(netdev, lower, iter) {
+ if (!dpaa2_switch_port_dev_check(lower))
+ continue;
+
+ err = dpaa2_switch_port_changeupper(lower, info);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+dpaa2_switch_port_changelowerstate(struct net_device *netdev,
+ struct netdev_lag_lower_state_info *linfo)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ int err;
+
+ if (!rtnl_dereference(port_priv->lag))
+ return 0;
+
+ err = dpsw_if_set_lag_state(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ port_priv->idx, linfo->tx_enabled ? 1 : 0);
+ if (err) {
+ netdev_err(netdev, "dpsw_if_set_lag_state() = %d\n", err);
+ return err;
}
return 0;
@@ -2308,6 +2722,7 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changelowerstate_info *info;
int err = 0;
switch (event) {
@@ -2316,13 +2731,29 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
if (err)
return notifier_from_errno(err);
+ err = dpaa2_switch_lag_prechangeupper(netdev, ptr);
+ if (err)
+ return notifier_from_errno(err);
+
break;
case NETDEV_CHANGEUPPER:
err = dpaa2_switch_port_changeupper(netdev, ptr);
if (err)
return notifier_from_errno(err);
+ err = dpaa2_switch_lag_changeupper(netdev, ptr);
+ if (err)
+ return notifier_from_errno(err);
+
break;
+ case NETDEV_CHANGELOWERSTATE:
+ info = ptr;
+ if (!dpaa2_switch_port_dev_check(netdev))
+ break;
+
+ err = dpaa2_switch_port_changelowerstate(netdev,
+ info->lower_state_info);
+ return notifier_from_errno(err);
}
return NOTIFY_DONE;
@@ -2581,6 +3012,9 @@ static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6))
ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
+
+ if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 13))
+ ethsw->features |= ETHSW_FEATURE_LAG_OFFLOAD;
}
static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
@@ -3370,6 +3804,7 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
kfree(ethsw->fdbs);
kfree(ethsw->filter_blocks);
kfree(ethsw->ports);
+ kfree(ethsw->lags);
dpaa2_switch_teardown(sw_dev);
@@ -3397,6 +3832,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
port_priv = netdev_priv(port_netdev);
port_priv->netdev = port_netdev;
port_priv->ethsw_data = ethsw;
+ rcu_assign_pointer(port_priv->lag, NULL);
mutex_init(&port_priv->mac_lock);
@@ -3504,6 +3940,19 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
goto err_free_fdbs;
}
+ ethsw->lags = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->lags),
+ GFP_KERNEL);
+ if (!ethsw->lags) {
+ err = -ENOMEM;
+ goto err_free_filter;
+ }
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ ethsw->lags[i].bond_dev = NULL;
+ ethsw->lags[i].ethsw = ethsw;
+ ethsw->lags[i].id = i + 1;
+ ethsw->lags[i].in_use = 0;
+ }
+
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
err = dpaa2_switch_probe_port(ethsw, i);
if (err)
@@ -3550,6 +3999,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
err_free_netdev:
for (i--; i >= 0; i--)
dpaa2_switch_remove_port(ethsw, i);
+ kfree(ethsw->lags);
+err_free_filter:
kfree(ethsw->filter_blocks);
err_free_fdbs:
kfree(ethsw->fdbs);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 42b3ca73f55d..c98bddd7e359 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -41,7 +41,8 @@
#define ETHSW_MAX_FRAME_LENGTH (DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
#define ETHSW_L2_MAX_FRM(mtu) ((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
-#define ETHSW_FEATURE_MAC_ADDR BIT(0)
+#define ETHSW_FEATURE_MAC_ADDR BIT(0)
+#define ETHSW_FEATURE_LAG_OFFLOAD BIT(1)
/* Number of receive queues (one RX and one TX_CONF) */
#define DPAA2_SWITCH_RX_NUM_FQS 2
@@ -105,6 +106,14 @@ struct dpaa2_switch_fdb {
bool in_use;
};
+struct dpaa2_switch_lag {
+ struct ethsw_core *ethsw;
+ struct net_device *bond_dev;
+ bool in_use;
+ u8 id;
+ struct ethsw_port_priv *primary;
+};
+
struct dpaa2_switch_acl_entry {
struct list_head list;
u16 prio;
@@ -163,6 +172,8 @@ struct ethsw_port_priv {
struct dpaa2_mac *mac;
/* Protects against changes to port_priv->mac */
struct mutex mac_lock;
+
+ struct dpaa2_switch_lag __rcu *lag;
};
/* Switch data */
@@ -190,6 +201,8 @@ struct ethsw_core {
struct dpaa2_switch_fdb *fdbs;
struct dpaa2_switch_filter_block *filter_blocks;
u16 mirror_port;
+
+ struct dpaa2_switch_lag *lags;
};
static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw,
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 10/13] dpaa2-switch: offload FDBs added on an upper bond device
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
This patch adds support for offloading FDB entries added on upper bond
devices.
First of all, the call to switchdev_bridge_port_offload() is updated so
that the notifier blocks needed for FDB events replay are available to
the bridge core.
Using switchdev_handle_*() helpers is also necessary because each FDB
event needs to be fanned out to any DPAA2 switch lower device. This
triggers another change in the return type used by the
dpaa2_switch_port_fdb_event() - from notifier types to regular errno
types.
Handling of the SWITCHDEV_FDB_ADD_TO_DEVICE/SWITCHDEV_FDB_DEL_TO_DEVICE
events is updated so that the newly dpaa2_switch_lag_fdb_add() /
dpaa2_switch_lag_fdb_del() functions are called anytime a port is under
a bond device. This will allow us to manage refcounting on FDB entries
which are added on the upper bond devices.
The DPAA2 switch uses shared-VLAN learning which means that the vid
parameter is not used when adding an FDB entry to HW. The current
behavior when dealing with FDB entries with the same MAC address but
different VLANs is to add the entry to HW every time while removal will
get done on the first 'bridge fdb del' command issued by the user.
The same behavior is kept also for FDBs added on bond devices by keeping
the refcount on the {vid, addr} pair while the HW operation disregards
entirely the vid parameter.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- Migrate FDBs in case the primary interface of a LAG changes.
- Use lag->primary instead of determining each time the primary
interface of a LAG device
Changes in v3:
- Update dpaa2_switch_foreign_dev_check() so that we check if there is
any port in the same switch as dev which offloads foreign_dev in case
this is a bridge port.
- Add mutex_destroy on the per LAG fdb_lock
- Make sure that all FDB events were processed on the workqueue on the
.remove() path.
- Delete the refcounted entry in dpaa2_switch_lag_fdb_del() as soon as
possible, even if the HW deletion would fail
- Access the port_priv->lag field only through the proper rcu accessors.
Changes in v2:
- Update dpaa2_switch_foreign_dev_check() so that we check if between
the switch port and the foreign net_device is an offloaded path. Before
this change we also checked if the foreign_dev was offloaded or not by
the switch port.
- Update the switchdev_bridge_port_unoffload() by passing it the proper
context and the notifier blocks.
- Add dev_hold() and dev_put() calls for orig_dev
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 227 ++++++++++++++++--
.../ethernet/freescale/dpaa2/dpaa2-switch.h | 24 ++
2 files changed, 225 insertions(+), 26 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 949a7241a00f..307b3b7a1bfb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -25,6 +25,9 @@
#define DEFAULT_VLAN_ID 1
+static struct notifier_block dpaa2_switch_port_switchdev_nb;
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb;
+
static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv)
{
return port_priv->fdb->fdb_id;
@@ -585,6 +588,81 @@ static int dpaa2_switch_port_fdb_del(struct ethsw_port_priv *port_priv,
return dpaa2_switch_port_fdb_del_mc(port_priv, addr);
}
+static struct dpaa2_mac_addr *
+dpaa2_switch_mac_addr_find(struct list_head *addr_list,
+ const unsigned char *addr, u16 vid)
+{
+ struct dpaa2_mac_addr *a;
+
+ list_for_each_entry(a, addr_list, list)
+ if (ether_addr_equal(a->addr, addr) && a->vid == vid)
+ return a;
+
+ return NULL;
+}
+
+static int dpaa2_switch_lag_fdb_add(struct dpaa2_switch_lag *lag,
+ const unsigned char *addr, u16 vid)
+{
+ struct ethsw_port_priv *port_priv = lag->primary;
+ struct dpaa2_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dpaa2_switch_mac_addr_find(&lag->fdbs, addr, vid);
+ if (a) {
+ refcount_inc(&a->refcount);
+ goto out;
+ }
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = dpaa2_switch_port_fdb_add(port_priv, addr);
+ if (err) {
+ kfree(a);
+ goto out;
+ }
+
+ ether_addr_copy(a->addr, addr);
+ a->vid = vid;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &lag->fdbs);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+
+ return err;
+}
+
+static void dpaa2_switch_lag_fdb_del(struct dpaa2_switch_lag *lag,
+ const unsigned char *addr, u16 vid)
+{
+ struct ethsw_port_priv *port_priv = lag->primary;
+ struct dpaa2_mac_addr *a;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dpaa2_switch_mac_addr_find(&lag->fdbs, addr, vid);
+ if (!a)
+ goto out;
+
+ if (!refcount_dec_and_test(&a->refcount))
+ goto out;
+
+ list_del(&a->list);
+ kfree(a);
+
+ dpaa2_switch_port_fdb_del(port_priv, addr);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+}
+
static void dpaa2_switch_port_get_stats(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -1533,6 +1611,33 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
return netdev->netdev_ops == &dpaa2_switch_port_ops;
}
+static bool dpaa2_switch_foreign_dev_check(const struct net_device *dev,
+ const struct net_device *foreign_dev)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(dev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct ethsw_port_priv *other_port;
+ int i;
+
+ if (netif_is_bridge_master(foreign_dev))
+ if (port_priv->fdb->bridge_dev == foreign_dev)
+ return false;
+
+ if (netif_is_bridge_port(foreign_dev)) {
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ other_port = ethsw->ports[i];
+
+ if (!other_port)
+ continue;
+ if (dpaa2_switch_port_offloads_bridge_port(other_port,
+ foreign_dev))
+ return false;
+ }
+ }
+
+ return true;
+}
+
static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
{
struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
@@ -2100,8 +2205,10 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
goto err_egress_flood;
brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
- err = switchdev_bridge_port_offload(brport_dev, netdev, NULL,
- NULL, NULL, false, extack);
+ err = switchdev_bridge_port_offload(brport_dev, netdev, port_priv,
+ &dpaa2_switch_port_switchdev_nb,
+ &dpaa2_switch_port_switchdev_blocking_nb,
+ false, extack);
if (err)
goto err_switchdev_offload;
@@ -2143,7 +2250,9 @@ static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
if (!brport_dev)
return;
- switchdev_bridge_port_unoffload(brport_dev, NULL, NULL, NULL);
+ switchdev_bridge_port_unoffload(brport_dev, port_priv,
+ &dpaa2_switch_port_switchdev_nb,
+ &dpaa2_switch_port_switchdev_blocking_nb);
/* Make sure that any FDB add/del operations are completed before the
* bridge layout changes
@@ -2425,9 +2534,10 @@ static int dpaa2_switch_set_lag_cfg(struct net_device *bond_dev, u8 lag_id,
struct ethsw_core *ethsw)
{
struct dpaa2_switch_lag *lag = ðsw->lags[lag_id - 1];
- struct ethsw_port_priv *primary, *new_primary = NULL;
- struct ethsw_port_priv *port_priv = NULL;
+ struct ethsw_port_priv *primary, *port_priv;
+ struct ethsw_port_priv *new_primary = NULL;
struct dpsw_lag_cfg cfg = {0};
+ struct dpaa2_mac_addr *a;
u8 num_ifs = 0;
int err, i;
@@ -2454,7 +2564,6 @@ static int dpaa2_switch_set_lag_cfg(struct net_device *bond_dev, u8 lag_id,
/* Build the interface list, always placing the primary first */
if (primary)
cfg.if_id[num_ifs++] = primary->idx;
-
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
port_priv = ethsw->ports[i];
if (port_priv == primary)
@@ -2477,11 +2586,32 @@ static int dpaa2_switch_set_lag_cfg(struct net_device *bond_dev, u8 lag_id,
lag->in_use = false;
}
+ /* When the primary changes, migrate the FDB entries from the old
+ * primary to the new one: remove them before reconfiguring the LAG in
+ * hardware and re-add them on the new primary afterwards. We do not
+ * touch any refcounting since the intention is to change the HW entry,
+ * not the parallel software tracking.
+ */
+ if (new_primary) {
+ mutex_lock(&lag->fdb_lock);
+ list_for_each_entry(a, &lag->fdbs, list)
+ dpaa2_switch_port_fdb_del(lag->primary, a->addr);
+ mutex_unlock(&lag->fdb_lock);
+ }
+
err = dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg);
if (err)
return err;
if (new_primary) {
+ mutex_lock(&lag->fdb_lock);
+ list_for_each_entry(a, &lag->fdbs, list) {
+ err = dpaa2_switch_port_fdb_add(new_primary, a->addr);
+ if (err)
+ netdev_err(new_primary->netdev, "Unable to migrate FDB\n");
+ }
+ mutex_unlock(&lag->fdb_lock);
+
synchronize_net();
lag->primary = new_primary;
}
@@ -2763,67 +2893,97 @@ struct ethsw_switchdev_event_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct net_device *dev;
+ struct net_device *orig_dev;
unsigned long event;
+ u16 vid;
};
static void dpaa2_switch_event_work(struct work_struct *work)
{
struct ethsw_switchdev_event_work *switchdev_work =
container_of(work, struct ethsw_switchdev_event_work, work);
+ struct net_device *orig_dev = switchdev_work->orig_dev;
struct net_device *dev = switchdev_work->dev;
+ struct ethsw_port_priv *port_priv = netdev_priv(dev);
struct switchdev_notifier_fdb_info *fdb_info;
+ struct dpaa2_switch_lag *lag;
int err;
fdb_info = &switchdev_work->fdb_info;
+ /* The lag structures are freed only from dpaa2_switch_remove(), which
+ * first flushes this workqueue, so the pointer stays valid for the
+ * lifetime of the work item. Only the dereference needs the RCU
+ * read-side lock; the FDB helpers below can sleep and must run outside
+ * of it.
+ */
+ rcu_read_lock();
+ lag = rcu_dereference(port_priv->lag);
+ rcu_read_unlock();
+
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
- fdb_info->addr);
+ if (lag)
+ err = dpaa2_switch_lag_fdb_add(lag, fdb_info->addr,
+ switchdev_work->vid);
+ else
+ err = dpaa2_switch_port_fdb_add(port_priv,
+ fdb_info->addr);
if (err)
break;
fdb_info->offloaded = true;
- call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+ call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, orig_dev,
&fdb_info->info, NULL);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- dpaa2_switch_port_fdb_del(netdev_priv(dev), fdb_info->addr);
+ if (lag)
+ dpaa2_switch_lag_fdb_del(lag, fdb_info->addr,
+ switchdev_work->vid);
+ else
+ dpaa2_switch_port_fdb_del(port_priv, fdb_info->addr);
break;
}
kfree(switchdev_work->fdb_info.addr);
kfree(switchdev_work);
dev_put(dev);
+ dev_put(orig_dev);
}
-static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+static int
+dpaa2_switch_port_fdb_event(struct net_device *dev,
+ struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
{
- struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct ethsw_port_priv *port_priv = netdev_priv(dev);
struct ethsw_switchdev_event_work *switchdev_work;
- struct switchdev_notifier_fdb_info *fdb_info = ptr;
- struct ethsw_core *ethsw;
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
- if (!dpaa2_switch_port_dev_check(dev))
- return NOTIFY_DONE;
- ethsw = port_priv->ethsw_data;
+ if (ctx && ctx != port_priv)
+ return 0;
+
+ /* For the moment, do nothing with entries towards foreign devices */
+ if (dpaa2_switch_foreign_dev_check(dev, orig_dev))
+ return 0;
if (!fdb_info->added_by_user || fdb_info->is_local)
- return NOTIFY_DONE;
+ return 0;
switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC);
if (!switchdev_work)
- return NOTIFY_BAD;
+ return -ENOMEM;
INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work);
switchdev_work->dev = dev;
switchdev_work->event = event;
+ switchdev_work->orig_dev = orig_dev;
+ switchdev_work->vid = fdb_info->vid;
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- memcpy(&switchdev_work->fdb_info, ptr,
+ memcpy(&switchdev_work->fdb_info, fdb_info,
sizeof(switchdev_work->fdb_info));
switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
if (!switchdev_work->fdb_info.addr)
@@ -2834,19 +2994,20 @@ static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
/* Take a reference on the device to avoid being freed. */
dev_hold(dev);
+ dev_hold(orig_dev);
break;
default:
kfree(switchdev_work);
- return NOTIFY_DONE;
+ return 0;
}
queue_work(ethsw->workqueue, &switchdev_work->work);
- return NOTIFY_DONE;
+ return 0;
err_addr_alloc:
kfree(switchdev_work);
- return NOTIFY_BAD;
+ return -ENOMEM;
}
/* Called under rcu_read_lock() */
@@ -2854,13 +3015,18 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
switch (event) {
case SWITCHDEV_PORT_ATTR_SET:
return dpaa2_switch_port_attr_set_event(dev, ptr);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- return dpaa2_switch_port_fdb_event(nb, event, ptr);
+ err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
+ dpaa2_switch_port_dev_check,
+ dpaa2_switch_foreign_dev_check,
+ dpaa2_switch_port_fdb_event);
+ return notifier_from_errno(err);
default:
return NOTIFY_DONE;
}
@@ -3785,6 +3951,9 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
dev = &sw_dev->dev;
ethsw = dev_get_drvdata(dev);
+ /* Make sure that all events were handled before we kfree anything */
+ flush_workqueue(ethsw->workqueue);
+
dpaa2_switch_teardown_irqs(sw_dev);
dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
@@ -3798,8 +3967,10 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
netif_napi_del(ðsw->fq[i].napi);
- for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
dpaa2_switch_remove_port(ethsw, i);
+ mutex_destroy(ðsw->lags[i].fdb_lock);
+ }
kfree(ethsw->fdbs);
kfree(ethsw->filter_blocks);
@@ -3951,6 +4122,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
ethsw->lags[i].ethsw = ethsw;
ethsw->lags[i].id = i + 1;
ethsw->lags[i].in_use = 0;
+ mutex_init(ðsw->lags[i].fdb_lock);
+ INIT_LIST_HEAD(ðsw->lags[i].fdbs);
}
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
@@ -3999,6 +4172,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
err_free_netdev:
for (i--; i >= 0; i--)
dpaa2_switch_remove_port(ethsw, i);
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+ mutex_destroy(ðsw->lags[i].fdb_lock);
kfree(ethsw->lags);
err_free_filter:
kfree(ethsw->filter_blocks);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index c98bddd7e359..e8bc1469cbf7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -100,6 +100,13 @@ struct dpaa2_switch_fq {
u32 fqid;
};
+struct dpaa2_mac_addr {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+ refcount_t refcount;
+ struct list_head list;
+};
+
struct dpaa2_switch_fdb {
struct net_device *bridge_dev;
u16 fdb_id;
@@ -112,6 +119,9 @@ struct dpaa2_switch_lag {
bool in_use;
u8 id;
struct ethsw_port_priv *primary;
+ /* Protects the list of fdbs installed on this LAG */
+ struct mutex fdb_lock;
+ struct list_head fdbs;
};
struct dpaa2_switch_acl_entry {
@@ -287,4 +297,18 @@ int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block,
int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block,
struct ethsw_port_priv *port_priv);
+
+static inline bool
+dpaa2_switch_port_offloads_bridge_port(struct ethsw_port_priv *port_priv,
+ const struct net_device *dev)
+{
+ struct dpaa2_switch_lag *lag = rcu_dereference_rtnl(port_priv->lag);
+
+ if (lag && lag->bond_dev == dev)
+ return true;
+ if (port_priv->netdev == dev)
+ return true;
+ return false;
+}
+
#endif /* __ETHSW_H */
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 11/13] dpaa2-switch: offload port objects on an upper bond device
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
This patch adds support for offloading port objects, VLANs and MDBs,
added on upper bond devices.
First of all, the use of the switchdev_handle_*() replication helpers
is introduced for the SWITCHDEV_PORT_OBJ_ADD/SWITCHDEV_PORT_OBJ_DEL
events. With this change, setting up the 'port_obj_info->handled = true'
is not needed anymore since it's now handled by the new helpers.
In the DPAA2 architecture, there is no difference in adding a FDB or MDB
which points towards a LAG port. Unlike other architectures, we do not
need to populate all the possible destinations which are under the LAG,
we only have to specify a single queueing destination (QDID) which
represents the LAG. This all means that handling of MDBs in bond devices
needs to have refcount mechanism as with the FDBs.
This mechanism is triggered by calling the dpaa2_switch_lag_fdb_add() /
dpaa2_switch_lag_fdb_del() functions which were added in the previous
patch.
Also change how dpaa2_switch_port_mdb_del() behaves in case the
underlying HW operation failed. Since the delete operations cannot be
stopped from a switchdev standpoint, go ahead and ignore the return code
from the dpaa2_switch_*_fdb_del() calls.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- Updates necessary for the dev_mc_add/dev_mc_del removal
Changes in v3:
- Access the port_priv->lag field only through the proper rcu accessors.
Changes in v2:
- In case dev_mc_add() fails, remove the MDB address from HW with the
proper function, dpaa2_switch_lag_fdb_del() or
dpaa2_switch_port_fdb_del(), depending on the LAG offload state.
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 69 +++++++++++--------
1 file changed, 41 insertions(+), 28 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 307b3b7a1bfb..1f7875ecefe2 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2017,15 +2017,28 @@ static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
const struct switchdev_obj_port_mdb *mdb)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct dpaa2_switch_lag *lag;
- return dpaa2_switch_port_fdb_add(port_priv, mdb->addr);
+ lag = rtnl_dereference(port_priv->lag);
+ if (lag)
+ return dpaa2_switch_lag_fdb_add(lag, mdb->addr, mdb->vid);
+ else
+ return dpaa2_switch_port_fdb_add(port_priv, mdb->addr);
}
-static int dpaa2_switch_port_obj_add(struct net_device *netdev,
- const struct switchdev_obj *obj)
+static int dpaa2_switch_port_obj_add(struct net_device *netdev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
int err;
+ if (ctx && ctx != port_priv)
+ return 0;
+
+ if (!dpaa2_switch_port_offloads_bridge_port(port_priv, obj->orig_dev))
+ return -EOPNOTSUPP;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dpaa2_switch_port_vlans_add(netdev,
@@ -2121,15 +2134,29 @@ static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
const struct switchdev_obj_port_mdb *mdb)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct dpaa2_switch_lag *lag;
+
+ lag = rtnl_dereference(port_priv->lag);
+ if (lag)
+ dpaa2_switch_lag_fdb_del(lag, mdb->addr, mdb->vid);
+ else
+ dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
- return dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
+ return 0;
}
-static int dpaa2_switch_port_obj_del(struct net_device *netdev,
+static int dpaa2_switch_port_obj_del(struct net_device *netdev, const void *ctx,
const struct switchdev_obj *obj)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
int err;
+ if (ctx && ctx != port_priv)
+ return 0;
+
+ if (!dpaa2_switch_port_offloads_bridge_port(port_priv, obj->orig_dev))
+ return -EOPNOTSUPP;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj));
@@ -3032,37 +3059,23 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
}
}
-static int dpaa2_switch_port_obj_event(unsigned long event,
- struct net_device *netdev,
- struct switchdev_notifier_port_obj_info *port_obj_info)
-{
- int err = -EOPNOTSUPP;
-
- if (!dpaa2_switch_port_dev_check(netdev))
- return NOTIFY_DONE;
-
- switch (event) {
- case SWITCHDEV_PORT_OBJ_ADD:
- err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
- break;
- case SWITCHDEV_PORT_OBJ_DEL:
- err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj);
- break;
- }
-
- port_obj_info->handled = true;
- return notifier_from_errno(err);
-}
-
static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
+ err = switchdev_handle_port_obj_add(dev, ptr,
+ dpaa2_switch_port_dev_check,
+ dpaa2_switch_port_obj_add);
+ return notifier_from_errno(err);
case SWITCHDEV_PORT_OBJ_DEL:
- return dpaa2_switch_port_obj_event(event, dev, ptr);
+ err = switchdev_handle_port_obj_del(dev, ptr,
+ dpaa2_switch_port_dev_check,
+ dpaa2_switch_port_obj_del);
+ return notifier_from_errno(err);
case SWITCHDEV_PORT_ATTR_SET:
return dpaa2_switch_port_attr_set_event(dev, ptr);
}
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 12/13] dpaa2-switch: trap all link local reserved addresses to the CPU
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
Do not trap only STP frames to the control interface but rather trap all
link local reserved addresses. This will still be done by looking at the
destination MAC address but keeping in mind to not take into account the
last byte.
This change will benefit LACP frames which now will reach the control
interface.
While at it, change the prototype of the
dpaa2_switch_port_trap_mac_addr() function so that we directly pass a
'const u8 *' so that it matches the ether_addr_copy() used.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- none
Changes in v3:
- Change the mask so that we restrict the trap only to the link local
addresses (01:80:c2:00:00:00 to 01:80:c2:00:00:0F) instead of the entire
reserved bridge block of addresses
Changes in v2:
- none
---
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 1f7875ecefe2..b94d83f5ef06 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -3828,17 +3828,15 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
return err;
}
-/* Add an ACL to redirect frames with specific destination MAC address to
- * control interface
- */
+/* Add an ACL to redirect frames to control interface based on the dst MAC */
static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
- const char *mac)
+ const u8 *mac, const u8 *mask)
{
struct dpaa2_switch_acl_entry acl_entry = {0};
/* Match on the destination MAC address */
ether_addr_copy(acl_entry.key.match.l2_dest_mac, mac);
- eth_broadcast_addr(acl_entry.key.mask.l2_dest_mac);
+ ether_addr_copy(acl_entry.key.mask.l2_dest_mac, mask);
/* Trap to CPU */
acl_entry.cfg.precedence = 0;
@@ -3849,7 +3847,8 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
{
- const char stpa[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+ const u8 ll_mac[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+ const u8 ll_mask[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xf0};
struct switchdev_obj_port_vlan vlan = {
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.vid = DEFAULT_VLAN_ID,
@@ -3924,7 +3923,7 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
if (err)
return err;
- err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa);
+ err = dpaa2_switch_port_trap_mac_addr(port_priv, ll_mac, ll_mask);
if (err)
return err;
--
2.25.1
^ permalink raw reply related
* [PATCH net-next v4 13/13] dpaa2-switch: add support for imprecise source port
From: Ioana Ciornei @ 2026-06-29 11:23 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
In-Reply-To: <20260629112309.154328-1-ioana.ciornei@nxp.com>
Switch ports configured as part of a LAG group are not able to provide
a precise source port for all packets which reach the control interface.
The only frames which will have a precise source port are those that are
explicitly trapped, for example STP and LCAP frames. For any other
frames (for example, those which are flooded) we can only know the
ingress LAG group.
Take into account the DPAA2_ETHSW_FLC_IMPRECISE_IF_ID bit and based on
its value target the bond device or the specific source netdevice.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v4:
- None
- Note that I did not address sashiko's feedback related to the
rcu_read_lock() dropped before netif_receive_skb() since even under
PREEMPT_RT NAPI is under rcu protection, rcu_read_lock() being called
from local_bh_disable().
Changes in v3:
- None
Changes in v2:
- Fix 32bit build by using BIT_ULL
- Take a reference to port_priv->lag instead of reading it multiple
times.
---
.../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 15 +++++++++++++--
.../net/ethernet/freescale/dpaa2/dpaa2-switch.h | 3 +++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index b94d83f5ef06..8320b26c3f72 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -3120,19 +3120,22 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
dma_addr_t addr = dpaa2_fd_get_addr(fd);
struct ethsw_core *ethsw = fq->ethsw;
struct ethsw_port_priv *port_priv;
+ struct dpaa2_switch_lag *lag;
struct net_device *netdev;
struct vlan_ethhdr *hdr;
struct sk_buff *skb;
u16 vlan_tci, vid;
int if_id, err;
void *vaddr;
+ u64 flc;
vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr);
dma_unmap_page(ethsw->dev, addr, DPAA2_SWITCH_RX_BUF_SIZE,
DMA_FROM_DEVICE);
/* get switch ingress interface ID */
- if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF;
+ flc = dpaa2_fd_get_flc(fd);
+ if_id = DPAA2_ETHSW_FLC_IF_ID(flc);
if (if_id >= ethsw->sw_attr.num_ifs) {
dev_err(ethsw->dev, "Frame received from unknown interface!\n");
goto err_free_fd;
@@ -3171,12 +3174,20 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
}
}
- skb->dev = netdev;
+ rcu_read_lock();
+
+ lag = rcu_dereference(port_priv->lag);
+ if (DPAA2_ETHSW_FLC_IMPRECISE_IF_ID(flc) && lag)
+ skb->dev = lag->bond_dev;
+ else
+ skb->dev = netdev;
skb->protocol = eth_type_trans(skb, skb->dev);
/* Setup the offload_fwd_mark only if the port is under a bridge */
skb->offload_fwd_mark = !!(port_priv->fdb->bridge_dev);
+ rcu_read_unlock();
+
netif_receive_skb(skb);
return;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index e8bc1469cbf7..63b702b0000c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -87,6 +87,9 @@
#define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE 256
+#define DPAA2_ETHSW_FLC_IF_ID(flc) (((flc) >> 32) & GENMASK(15, 0))
+#define DPAA2_ETHSW_FLC_IMPRECISE_IF_ID(flc) ((flc) & BIT_ULL(63))
+
extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
struct ethsw_core;
--
2.25.1
^ permalink raw reply related
* RE: the confusing 10000base_CR. Shouldn't it be 10000_SFI_DA?
From: D H, Siddaraju @ 2026-06-29 11:25 UTC (permalink / raw)
To: Maxime Chevallier, Andrew Lunn, Michal Kubecek
Cc: netdev@vger.kernel.org, Das, Shubham, Chintalapalle, Balaji,
Srinivasan, Vijay, Lindberg, Magnus, Niklas Damberg,
Wirandi, Jonas, Siddaraju DH
In-Reply-To: <1b3975a8-788a-4b81-94ec-3ab5708b251b@bootlin.com>
On 6/29/26, Maxime Chevallier wrote:
>
> On 6/26/26 21:19, D H, Siddaraju wrote:
>
> > What about
> > "option-(b): create a new enum ETHTOOL_LINK_MODE_10G_SFI_DA_Full_BIT"?
> > Idea is just to create a new enum, with same enum value of 10000baseCR.
> > This will NOT consume a bit position in "ethtool_link_mode_bit_indices".
> > It just helps those tech-savvy people, who does not accept 10000baseCR
> > and prefer 10000sfiDA for being explicit.
>
> The thing is that even with a new enum value, that won't bring much to the
> table. It would likely be better to have a comment near the 10000baseCR
> definition explaining the SFF equivalency.
>
> >
> > At worst case, hope we agree for
> > "option-(c): ethtool.8 man page help strings to indicate 10G_SFI_DA"
> > Something like
> > "10000baseCR (10G_SFI_DA SFF-8431 SFP+ DA)
> > under "advertise" mask values.
>
> In that case, let's add Michal in the loop as the ethtool maintainer.
> Even then it's not straightforward as some tooling relies on the JSON
> output from ethtool, so _if_ we change the output for that mode, it should
> only be in the non-json output.
>
> My personal opinion would be that adding a comment in the enum definition
> near 10000baseCR is enough :/
Will wait for @Michal Kubecek's response about the manual page update
and as second possibility: options to update ethtool help string.
IMHO, yes the comment in ethtool.h enum definition is good
as it helps developers who use ethtool.h directly but from
ethtool app USER point-of-view, the manual page is the
first impression and ethtool --help is second. The effort here is
to help the user with a clarification, to avoid the clear confusion
with the wrong naming of 10000baseCR for all the major 4 reasons
listed in the first email of this thread. With that said, hope that
someone will also support the manual page update because
we think it is useful.
- Thank you,
Siddaraju D H
^ permalink raw reply
* [PATCH net-next v11 0/7] net: stmmac: qcom-ethqos: add support for SCMI power domains
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski,
Martin Blumenstingl, Krzysztof Kozlowski, Lad Prabhakar
Add support for the firmware-managed variant of the DesignWare MAC on
the sa8255p platform. This series contains new DT bindings and driver
changes required to support the MAC in the STMMAC driver.
It also reorganizes the ethqos code quite a bit to make the introduction
of power domains into the driver a bit easier on the eye.
The DTS changes will go in separately.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
Changes in v11:
- Take a new approach: add a dedicated driver for the firmware-managed
SGMII PHY and simplify changes made to the MAC driver
- Link to v10: https://patch.msgid.link/20260323-qcom-sa8255p-emac-v10-0-79302b238a16@oss.qualcomm.com
Changes in v10:
- Fix unit address in DT example
- Link to v9: https://patch.msgid.link/20260316-qcom-sa8255p-emac-v9-0-c58934e76ff2@oss.qualcomm.com
Changes in v9:
- Rebase on top of current linux-next again
- Link to v8: https://patch.msgid.link/20260311-qcom-sa8255p-emac-v8-0-58227bcf1018@oss.qualcomm.com
Changes in v8:
- Rebase on top of recent changes in linux-next which required an
extensive rework
- Drop partial R-b tags
- Link to v7: https://patch.msgid.link/20260306-qcom-sa8255p-emac-v7-0-d6a3013094b7@oss.qualcomm.com
Changes in v7:
- Restored the correct authorship after learning git uses .mailmap for
the --author switch
- Rebased on top of changes from Russell
- Fixed resource management issues in error paths
- Link to v6: https://lore.kernel.org/r/20260112-qcom-sa8255p-emac-v6-0-86a3d4b2ad83@oss.qualcomm.com
Changes in v6:
- Fix $id value in the bindings
- Drop patch 3/8 from the series
- Update init/exit callback signatures
- Link to v5: https://lore.kernel.org/r/20251107-qcom-sa8255p-emac-v5-0-01d3e3aaf388@linaro.org
- Link to v6: https://lore.kernel.org/r/20251219-qcom-sa8255p-emac-v6-0-487f1082461e@oss.qualcomm.com
Changes in v5:
- Name the DT binding document after the new compatbile
- Add missing space
- Make the power-domains limits stricter
- Link to v4: https://lore.kernel.org/r/20251104-qcom-sa8255p-emac-v4-0-f76660087cea@linaro.org
Changes in v4:
- Remove the phys property from the SCMI bindings
- Mark the power-domain-names property as required
- Set maxItems for power-domains to 1 for all existing bindings to
maintain the current requirements after modifying the value in the
top-level document
- Link to v3: https://lore.kernel.org/r/20251027-qcom-sa8255p-emac-v3-0-75767b9230ab@linaro.org
Changes in v3:
- Drop 'power' and 'perf' prefixes from power domain names
- Rebase on top of Russell's changes to dwmac
- Rebase on top of even more changes from Russell that are not yet
in next (E1vB6ld-0000000BIPy-2Qi4@rmk-PC.armlinux.org.uk)
- Link to v2: https://lore.kernel.org/all/20251008-qcom-sa8255p-emac-v2-0-92bc29309fce@linaro.org/
Changes in v2:
- Fix the power-domains property in DT bindings
- Rework the DT bindings example
- Drop the DTS patch, it will go upstream separately
- Link to v1: https://lore.kernel.org/r/20250910-qcom-sa8255p-emac-v1-0-32a79cf1e668@linaro.org
---
Bartosz Golaszewski (7):
dt-bindings: phy: document the serdes PHY on sa8255p
phy: qcom: add the SGMII SerDes PHY driver for SCMI systems
dt-bindings: net: qcom: document the ethqos device for SCMI-based systems
net: stmmac: qcom-ethqos: set serdes mode before powerup
net: stmmac: qcom-ethqos: reuse the address of ethqos_emac_driver_data
net: stmmac: qcom-ethqos: factor out linux-level setup into a separate function
net: stmmac: qcom-ethqos: add support for sa8255p
.../bindings/net/allwinner,sun7i-a20-gmac.yaml | 3 +
.../bindings/net/altr,socfpga-stmmac.yaml | 3 +
.../bindings/net/amlogic,meson-dwmac.yaml | 3 +
.../devicetree/bindings/net/eswin,eic7700-eth.yaml | 3 +
.../devicetree/bindings/net/intel,dwmac-plat.yaml | 3 +
.../bindings/net/loongson,ls1b-gmac.yaml | 3 +
.../bindings/net/loongson,ls1c-emac.yaml | 3 +
.../devicetree/bindings/net/nxp,dwmac-imx.yaml | 3 +
.../devicetree/bindings/net/nxp,lpc1850-dwmac.yaml | 3 +
.../devicetree/bindings/net/nxp,s32-dwmac.yaml | 3 +
.../devicetree/bindings/net/qcom,ethqos.yaml | 3 +
.../bindings/net/qcom,sa8255p-ethqos.yaml | 107 ++++++++++
.../devicetree/bindings/net/renesas,rzn1-gmac.yaml | 3 +
.../bindings/net/renesas,rzv2h-gbeth.yaml | 3 +
.../devicetree/bindings/net/rockchip-dwmac.yaml | 3 +
.../devicetree/bindings/net/snps,dwmac.yaml | 5 +-
.../bindings/net/sophgo,cv1800b-dwmac.yaml | 3 +
.../bindings/net/sophgo,sg2044-dwmac.yaml | 3 +
.../bindings/net/starfive,jh7110-dwmac.yaml | 3 +
.../devicetree/bindings/net/stm32-dwmac.yaml | 3 +
.../devicetree/bindings/net/tesla,fsd-ethqos.yaml | 3 +
.../devicetree/bindings/net/thead,th1520-gmac.yaml | 3 +
.../bindings/net/toshiba,visconti-dwmac.yaml | 3 +
.../bindings/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml | 51 +++++
MAINTAINERS | 1 +
.../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 230 ++++++++++++++++-----
drivers/phy/qualcomm/Kconfig | 10 +
drivers/phy/qualcomm/Makefile | 1 +
drivers/phy/qualcomm/phy-qcom-sgmii-eth-scmi.c | 161 +++++++++++++++
29 files changed, 573 insertions(+), 56 deletions(-)
---
base-commit: a8bd881f6c5eeb8fedf29d8dc0df9296de576f93
change-id: 20250704-qcom-sa8255p-emac-8460235ac512
Best regards,
--
Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
^ permalink raw reply
* [PATCH net-next v11 1/7] dt-bindings: phy: document the serdes PHY on sa8255p
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
Describe the SGMII/SerDes PHY present on the Qualcomm sa8255p platforms.
This is essentially the same hardware as sa8775p rev3 but the PHY is
managed by firmware over SCMI.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
.../bindings/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml | 51 ++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/Documentation/devicetree/bindings/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4cea6926d1c28872ea7b7aad53088dbbcb74fa99
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/qcom,sa8255p-dwmac-sgmii-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SerDes/SGMII ethernet PHY controller (firmware managed)
+
+maintainers:
+ - Bartosz Golaszewski <brgl@kernel.org>
+
+description:
+ The SerDes PHY sits between the MAC and the external PHY and provides
+ separate Rx Tx lines.
+
+properties:
+ compatible:
+ const: qcom,sa8255p-dwmac-sgmii-phy
+
+ reg:
+ items:
+ - description: serdes
+
+ power-domains:
+ maxItems: 1
+
+ power-domain-names:
+ items:
+ - const: serdes
+
+ "#phy-cells":
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - "#phy-cells"
+ - power-domains
+ - power-domain-names
+
+additionalProperties: false
+
+examples:
+ - |
+ phy@8901000 {
+ compatible = "qcom,sa8255p-dwmac-sgmii-phy";
+ reg = <0x08901000 0xe10>;
+ #phy-cells = <0>;
+ power-domains = <&scmi7_dvfs 0>;
+ power-domain-names = "serdes";
+ };
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v11 2/7] phy: qcom: add the SGMII SerDes PHY driver for SCMI systems
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
Implement support for the firmware-managed SGMII/SerDes PHY present on
Qualcomm platforms. Do this as a separate driver from the HLOS-managed
variant as they don't share almost any code.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
drivers/phy/qualcomm/Kconfig | 10 ++
drivers/phy/qualcomm/Makefile | 1 +
drivers/phy/qualcomm/phy-qcom-sgmii-eth-scmi.c | 161 +++++++++++++++++++++++++
3 files changed, 172 insertions(+)
diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index 60a0ead127fa9f08749e1bc686e15cc5eb341c28..bd7d3fe411d7f3ea333e9e32e54f926a3bdead01 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -232,3 +232,13 @@ config PHY_QCOM_SGMII_ETH
help
Enable this to support the internal SerDes/SGMII PHY on various
Qualcomm chipsets.
+
+config PHY_QCOM_SGMII_ETH_SCMI
+ tristate "Qualcomm DWMAC SGMII SerDes/PHY driver (firmware managed)"
+ depends on OF && (ARCH_QCOM || COMPILE_TEST)
+ select GENERIC_PHY
+ select PM_GENERIC_DOMAINS
+ help
+ Enable this to support the internal SerDes/SGMII PHY on Qualcomm
+ chipsets where the SerDes hardware (clocks and registers) is owned
+ by the firmware.
diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile
index b71a6a0bed3f1489b1d07664ecd728f1db145986..032e582f2e1af96687484ce28aaba0c2ef73e754 100644
--- a/drivers/phy/qualcomm/Makefile
+++ b/drivers/phy/qualcomm/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_PHY_QCOM_USB_SS) += phy-qcom-usb-ss.o
obj-$(CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2)+= phy-qcom-snps-femto-v2.o
obj-$(CONFIG_PHY_QCOM_IPQ806X_USB) += phy-qcom-ipq806x-usb.o
obj-$(CONFIG_PHY_QCOM_SGMII_ETH) += phy-qcom-sgmii-eth.o
+obj-$(CONFIG_PHY_QCOM_SGMII_ETH_SCMI) += phy-qcom-sgmii-eth-scmi.o
diff --git a/drivers/phy/qualcomm/phy-qcom-sgmii-eth-scmi.c b/drivers/phy/qualcomm/phy-qcom-sgmii-eth-scmi.c
new file mode 100644
index 0000000000000000000000000000000000000000..8ee62189556fe4ff0d8aa2f8b105175e08000b7c
--- /dev/null
+++ b/drivers/phy/qualcomm/phy-qcom-sgmii-eth-scmi.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ * Firmware-managed variant of the Qualcomm DWMAC SGMII SerDes/PHY driver.
+ */
+
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+struct qcom_dwmac_sgmii_phy_scmi {
+ unsigned int perf_state;
+};
+
+static int qcom_dwmac_sgmii_phy_scmi_power_on(struct phy *phy)
+{
+ struct qcom_dwmac_sgmii_phy_scmi *priv = phy_get_drvdata(phy);
+ struct device *dev = phy->dev.parent;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret)
+ return ret;
+
+ ret = dev_pm_genpd_set_performance_state(dev, priv->perf_state);
+ if (ret) {
+ pm_runtime_put(dev);
+ return ret;
+ }
+
+ usleep_range(5000, 10000);
+
+ return 0;
+}
+
+static int qcom_dwmac_sgmii_phy_scmi_power_off(struct phy *phy)
+{
+ struct device *dev = phy->dev.parent;
+
+ dev_pm_genpd_set_performance_state(dev, 0);
+ pm_runtime_put(dev);
+
+ return 0;
+}
+
+static int qcom_dwmac_sgmii_phy_scmi_validate(struct phy *phy, enum phy_mode mode,
+ int submode,
+ union phy_configure_opts *opts)
+{
+ if (mode != PHY_MODE_ETHERNET)
+ return -EINVAL;
+
+ switch (submode) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int qcom_dwmac_sgmii_phy_scmi_set_mode(struct phy *phy, enum phy_mode mode,
+ int submode)
+{
+ struct qcom_dwmac_sgmii_phy_scmi *priv = phy_get_drvdata(phy);
+ int ret;
+
+ ret = qcom_dwmac_sgmii_phy_scmi_validate(phy, mode, submode, NULL);
+ if (ret)
+ return ret;
+
+ priv->perf_state = (submode == PHY_INTERFACE_MODE_2500BASEX) ?
+ SPEED_2500 : SPEED_1000;
+
+ return 0;
+}
+
+static const struct phy_ops qcom_dwmac_sgmii_phy_scmi_ops = {
+ .power_on = qcom_dwmac_sgmii_phy_scmi_power_on,
+ .power_off = qcom_dwmac_sgmii_phy_scmi_power_off,
+ .set_mode = qcom_dwmac_sgmii_phy_scmi_set_mode,
+ .validate = qcom_dwmac_sgmii_phy_scmi_validate,
+ .owner = THIS_MODULE,
+};
+
+static void qcom_dwmac_sgmii_phy_scmi_runtime_disable(void *data)
+{
+ struct device *dev = data;
+
+ pm_runtime_disable(dev);
+}
+
+static int qcom_dwmac_sgmii_phy_scmi_probe(struct platform_device *pdev)
+{
+ struct qcom_dwmac_sgmii_phy_scmi *priv;
+ struct device *dev = &pdev->dev;
+ struct phy_provider *provider;
+ struct phy *phy;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->perf_state = SPEED_1000;
+
+ /*
+ * Enable runtime PM on the provider before creating the PHY so that the
+ * PHY core enables runtime PM on the PHY device too. The single SCMI
+ * power domain has already been attached to this device by the driver
+ * core, so runtime PM votes propagate to firmware through the genpd
+ * device link. No register or clock access is done here - firmware owns
+ * the SerDes.
+ */
+ pm_runtime_enable(dev);
+
+ ret = devm_add_action_or_reset(dev, qcom_dwmac_sgmii_phy_scmi_runtime_disable, dev);
+ if (ret)
+ return ret;
+
+ phy = devm_phy_create(dev, NULL, &qcom_dwmac_sgmii_phy_scmi_ops);
+ if (IS_ERR(phy))
+ return dev_err_probe(dev, PTR_ERR(phy), "failed to create the phy\n");
+
+ phy_set_drvdata(phy, priv);
+
+ provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+ if (IS_ERR(provider))
+ return dev_err_probe(dev, PTR_ERR(provider),
+ "failed to register the PHY provider\n");
+
+ return 0;
+}
+
+static const struct of_device_id qcom_dwmac_sgmii_phy_scmi_of_match[] = {
+ { .compatible = "qcom,sa8255p-dwmac-sgmii-phy" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, qcom_dwmac_sgmii_phy_scmi_of_match);
+
+static struct platform_driver qcom_dwmac_sgmii_phy_scmi_driver = {
+ .probe = qcom_dwmac_sgmii_phy_scmi_probe,
+ .driver = {
+ .name = "qcom-dwmac-sgmii-phy-scmi",
+ .of_match_table = qcom_dwmac_sgmii_phy_scmi_of_match,
+ },
+};
+module_platform_driver(qcom_dwmac_sgmii_phy_scmi_driver);
+
+MODULE_DESCRIPTION("Qualcomm DWMAC SGMII PHY driver (firmware managed)");
+MODULE_AUTHOR("Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>");
+MODULE_LICENSE("GPL");
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v11 3/7] dt-bindings: net: qcom: document the ethqos device for SCMI-based systems
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski,
Martin Blumenstingl, Krzysztof Kozlowski, Lad Prabhakar
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Describe the firmware-managed variant of the QCom DesignWare MAC. As the
properties here differ a lot from the HLOS-managed variant, lets put it
in a separate file. Since we need to update the maximum number of power
domains, let's update existing bindings referencing the top-level
snps,dwmac.yaml and limit their maxItems for power-domains to 1.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
.../bindings/net/allwinner,sun7i-a20-gmac.yaml | 3 +
.../bindings/net/altr,socfpga-stmmac.yaml | 3 +
.../bindings/net/amlogic,meson-dwmac.yaml | 3 +
.../devicetree/bindings/net/eswin,eic7700-eth.yaml | 3 +
.../devicetree/bindings/net/intel,dwmac-plat.yaml | 3 +
.../bindings/net/loongson,ls1b-gmac.yaml | 3 +
.../bindings/net/loongson,ls1c-emac.yaml | 3 +
.../devicetree/bindings/net/nxp,dwmac-imx.yaml | 3 +
.../devicetree/bindings/net/nxp,lpc1850-dwmac.yaml | 3 +
.../devicetree/bindings/net/nxp,s32-dwmac.yaml | 3 +
.../devicetree/bindings/net/qcom,ethqos.yaml | 3 +
.../bindings/net/qcom,sa8255p-ethqos.yaml | 107 +++++++++++++++++++++
.../devicetree/bindings/net/renesas,rzn1-gmac.yaml | 3 +
.../bindings/net/renesas,rzv2h-gbeth.yaml | 3 +
.../devicetree/bindings/net/rockchip-dwmac.yaml | 3 +
.../devicetree/bindings/net/snps,dwmac.yaml | 5 +-
.../bindings/net/sophgo,cv1800b-dwmac.yaml | 3 +
.../bindings/net/sophgo,sg2044-dwmac.yaml | 3 +
.../bindings/net/starfive,jh7110-dwmac.yaml | 3 +
.../devicetree/bindings/net/stm32-dwmac.yaml | 3 +
.../devicetree/bindings/net/tesla,fsd-ethqos.yaml | 3 +
.../devicetree/bindings/net/thead,th1520-gmac.yaml | 3 +
.../bindings/net/toshiba,visconti-dwmac.yaml | 3 +
MAINTAINERS | 1 +
24 files changed, 175 insertions(+), 1 deletion(-)
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
index 23e92be33ac8609a16db530782989caed22a5730..b12632545673b2ad0148a677f45a7447309a43cd 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -40,6 +40,9 @@ properties:
description:
PHY regulator
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml
index fc445ad5a1f1ac490e921696d6f7ca83d15de459..448e617cddc4cda8dbc77e83324495ffd5dfb9be 100644
--- a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml
+++ b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml
@@ -140,6 +140,9 @@ properties:
- description: offset of the control register
- description: shift within the control register
+ power-domains:
+ maxItems: 1
+
patternProperties:
"^mdio[0-9]$":
type: object
diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index 5c91716d1f21e617543b03c5a90b993f8aee053c..9c9cc3ef384da0270489c21b3426572ea46d9499 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -158,6 +158,9 @@ properties:
interrupt-names:
const: macirq
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
index 65882ff79d8d7e3227e31415676639dd86f7098f..081a21174fd42bc4925d1850b1147545e111363c 100644
--- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
+++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
@@ -95,6 +95,9 @@ properties:
- description: Optional offset of register controlling TXD delay
- description: Optional offset of register controlling RXD delay
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
index 62c1da36a2b5a29290e5e01be87c48158c4adf89..e41851931b947559c89b0cd6f4756f71046f9594 100644
--- a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
+++ b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
@@ -47,6 +47,9 @@ properties:
interrupt-names:
const: macirq
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- clocks
diff --git a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
index c4f3224bad387b87a5b4a3049dabd75f2c4bd42f..c9a131b8d8304c41559a416b324df749c0a87d14 100644
--- a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
@@ -66,6 +66,9 @@ properties:
- mii
- rgmii-id
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
index 99001b940b8361f69bb917617f857ee99f4b3fa5..49db18423dd807683b9bb297978f5da8ea6cee3d 100644
--- a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
+++ b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
@@ -65,6 +65,9 @@ properties:
- mii
- rmii
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
index e5db346beca9649be4f97727b78fda8973095912..b240c76e7dd5254d0c3752610c4aa848a3c3d65b 100644
--- a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
@@ -83,6 +83,9 @@ properties:
description:
To select RMII reference clock from external.
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- clocks
diff --git a/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml
index 05acd9bc7616356e68090ebdd4df20e42f70dd7e..f61188ab0dbe3c0cec5b10f7a65dfaff4dc3898f 100644
--- a/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml
@@ -51,6 +51,9 @@ properties:
items:
- const: stmmaceth
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml
index 753a04941659b82b655dad1439ff66f8b37fa18b..fcad2274302d9cff36760184b74918a9835906f7 100644
--- a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml
@@ -81,6 +81,9 @@ properties:
- const: rx
- const: ptp_ref
+ power-domains:
+ maxItems: 1
+
required:
- clocks
- clock-names
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
index 423959cb928d945aa3e758a3c803d12bd61ec42b..ef520f8105773e22c0536ff419dad55fe316e1bd 100644
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -86,6 +86,9 @@ properties:
phy-names:
const: serdes
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- clocks
diff --git a/Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5158481e5e5b0feed5b3dcd2cda2f593b7ff62e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qcom,sa8255p-ethqos.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Ethernet ETHQOS device (firmware managed)
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Konrad Dybcio <konradybcio@kernel.org>
+ - Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+
+description:
+ dwmac based Qualcomm ethernet devices which support Gigabit
+ ethernet (version v2.3.0 and onwards) with clocks, interconnects, etc.
+ managed by firmware
+
+allOf:
+ - $ref: snps,dwmac.yaml#
+
+properties:
+ compatible:
+ const: qcom,sa8255p-ethqos
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: stmmaceth
+ - const: rgmii
+
+ interrupts:
+ items:
+ - description: Combined signal for various interrupt events
+ - description: The interrupt that occurs when HW safety error triggered
+
+ interrupt-names:
+ items:
+ - const: macirq
+ - const: sfty
+
+ power-domains:
+ minItems: 2
+ maxItems: 2
+
+ power-domain-names:
+ items:
+ - const: core
+ - const: mdio
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ items:
+ - const: serdes
+
+ iommus:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg-names
+ - power-domains
+ - power-domain-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ ethernet: ethernet@23040000 {
+ compatible = "qcom,sa8255p-ethqos";
+ reg = <0x23040000 0x10000>,
+ <0x23056000 0x100>;
+ reg-names = "stmmaceth", "rgmii";
+
+ iommus = <&apps_smmu 0x120 0x7>;
+
+ interrupts = <GIC_SPI 946 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 782 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "sfty";
+
+ dma-coherent;
+
+ snps,tso;
+ snps,pbl = <32>;
+ rx-fifo-depth = <16384>;
+ tx-fifo-depth = <16384>;
+
+ phy-handle = <ðernet_phy>;
+ phy-mode = "2500base-x";
+
+ snps,mtl-rx-config = <&mtl_rx_setup1>;
+ snps,mtl-tx-config = <&mtl_tx_setup1>;
+
+ power-domains = <&scmi8_pd 0>, <&scmi8_pd 1>;
+ power-domain-names = "core", "mdio";
+
+ phys = <&serdes1>;
+ phy-names = "serdes";
+ };
diff --git a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
index 16dd7a2631abf6fb7dc8e0c90755ab1e81915b38..ed0d10a19ca4c47c05f6873c64b0537b90acd15a 100644
--- a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
@@ -44,6 +44,9 @@ properties:
phandle pointing to a PCS sub-node compatible with
renesas,rzn1-miic.yaml#
+ power-domains:
+ maxItems: 1
+
required:
- compatible
diff --git a/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml b/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml
index 2125b5ddf73dadd8b0d372e83a6b5c4624f5e648..8338834f49cd21df6c697a1f52a8b195c0208c23 100644
--- a/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml
@@ -154,6 +154,9 @@ properties:
Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml#
(Refer RZ/T2H portion in the DT-binding file)
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 80c252845349c4533deff85b052157984d0e2f23..3ec3f6dc2a125908ba98f20b1120311de8967954 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -123,6 +123,9 @@ properties:
phy-supply:
description: PHY regulator
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- clocks
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 2449311c6d28ed3fbf8c92526ce8b872900653f4..d3aff1df3070d1b22198766ee2e0131dcf925287 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -72,6 +72,7 @@ properties:
- nuvoton,ma35d1-dwmac
- nxp,s32g2-dwmac
- qcom,qcs404-ethqos
+ - qcom,sa8255p-ethqos
- qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
- qcom,sm8150-ethqos
@@ -185,7 +186,8 @@ properties:
- const: ahb
power-domains:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
mac-mode:
$ref: ethernet-controller.yaml#/properties/phy-connection-type
@@ -630,6 +632,7 @@ allOf:
- ingenic,x1830-mac
- ingenic,x2000-mac
- qcom,qcs404-ethqos
+ - qcom,sa8255p-ethqos
- qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
- qcom,sm8150-ethqos
diff --git a/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml
index b89456f0ef830991135bd17626da98661429596c..e78cbf594c695204040a53ab1e367daa9e12246b 100644
--- a/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml
@@ -49,6 +49,9 @@ properties:
reset-names:
const: stmmaceth
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
index e8d3814db0e94fdcd0f3ab2a9fa8bab972a97ab5..845e2c67d20037496bb1eec6eb73c99cde74e944 100644
--- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
@@ -52,6 +52,9 @@ properties:
interrupt-names:
maxItems: 1
+ power-domains:
+ maxItems: 1
+
resets:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
index fdcc61c65f87d1dc15a17b9486cde032ffa7798b..c424e77973921e1f9f18135c3106f1c270c9ac87 100644
--- a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -75,6 +75,9 @@ properties:
The argument one is the offset of phy mode selection, the
argument two is the shift of phy mode selection.
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
index 987254900d0da7aab81237f20b1540ad8a17bd21..29b878079ff0c1a0ef95fc63f2035f478ee039b2 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
@@ -121,6 +121,9 @@ properties:
minItems: 1
maxItems: 2
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- clocks
diff --git a/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml b/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml
index dd7481bb16e59982c26c1a54ae82b8cff85fdfe0..ad635529d676ed6b752ab3bde5152d5cbddcb519 100644
--- a/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml
+++ b/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml
@@ -67,6 +67,9 @@ properties:
- rgmii-rxid
- rgmii-txid
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml
index b3492a9aa4effa73fadf92a63a76ba8bb65a8769..c859f8bb5d582af8b8782f2f89ab5e6ee3d7a46c 100644
--- a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml
@@ -78,6 +78,9 @@ properties:
items:
- const: macirq
+ power-domains:
+ maxItems: 1
+
required:
- clocks
- clock-names
diff --git a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
index f0f32e18fc8550e6f63b87b60a095972453836c9..efa39eab0256a1102b01872bd848749788c9b4e8 100644
--- a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
@@ -48,6 +48,9 @@ properties:
interrupt-names:
const: macirq
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/MAINTAINERS b/MAINTAINERS
index 15011f5752a994cf1b354f490d6c4e411588df88..266bccd5c82364e20f17c471abec17c6b6330bec 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22184,6 +22184,7 @@ L: netdev@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/qcom,ethqos.yaml
+F: Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml
F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
QUALCOMM FASTRPC DRIVER
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v11 4/7] net: stmmac: qcom-ethqos: set serdes mode before powerup
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
Call phy_set_mode_ext() before phy_power_on() in
qcom_ethqos_serdes_powerup(). This is harmless for existing users but on
SCMI systems this is required for the PHY driver to select the right
performance level - which translates to the link speed. This is done
ahead of adding support for the firmware-managed EMAC on Qualcomm sa8255p.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index ac7d6d3e205a1ab5b391def879d6f1033a0961b6..47b70b5e706f221c01f1c0ae3b1acafae6641165 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -601,10 +601,19 @@ static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv)
if (ret)
return ret;
+ ret = phy_set_mode_ext(ethqos->serdes_phy, PHY_MODE_ETHERNET,
+ ethqos->phy_mode);
+ if (ret)
+ goto err_out;
+
ret = phy_power_on(ethqos->serdes_phy);
if (ret)
- phy_exit(ethqos->serdes_phy);
+ goto err_out;
+ return 0;
+
+err_out:
+ phy_exit(ethqos->serdes_phy);
return ret;
}
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v11 5/7] net: stmmac: qcom-ethqos: reuse the address of ethqos_emac_driver_data
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
Instead of needlessly copying the fields of ethqos_emac_driver_data into
struct qcom_ethqos, just use the address of the former as a reference.
It's .rodata after all. This is done in order to avoid having either two
calls to of_device_get_match_data() or having to extend the latter with
another field when adding support for SCMI.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
.../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 37 ++++++++--------------
1 file changed, 14 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 47b70b5e706f221c01f1c0ae3b1acafae6641165..fa3447b90315672d706d5ce7d710bdec6214e4e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -103,12 +103,7 @@ struct qcom_ethqos {
struct clk *link_clk;
struct phy *serdes_phy;
phy_interface_t phy_mode;
-
- const struct ethqos_emac_por *rgmii_por;
- unsigned int num_rgmii_por;
- bool rgmii_config_loopback_en;
- bool has_emac_ge_3;
- bool needs_sgmii_loopback;
+ const struct ethqos_emac_driver_data *data;
};
static u32 rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset)
@@ -189,7 +184,7 @@ static int ethqos_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i,
static void
qcom_ethqos_set_sgmii_loopback(struct qcom_ethqos *ethqos, bool enable)
{
- if (!ethqos->needs_sgmii_loopback ||
+ if (!ethqos->data->needs_sgmii_loopback ||
ethqos->phy_mode != PHY_INTERFACE_MODE_2500BASEX)
return;
@@ -322,7 +317,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
/* Set DLL_EN */
rgmii_setmask(ethqos, SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG);
- if (!ethqos->has_emac_ge_3) {
+ if (!ethqos->data->has_emac_ge_3) {
rgmii_clrmask(ethqos, SDCC_DLL_MCLK_GATING_EN,
SDCC_HC_REG_DLL_CONFIG);
@@ -352,7 +347,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
rgmii_setmask(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN,
SDCC_HC_REG_DLL_CONFIG2);
- if (!ethqos->has_emac_ge_3) {
+ if (!ethqos->data->has_emac_ge_3) {
rgmii_clrmask(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS,
SDCC_HC_REG_DLL_CONFIG2);
@@ -432,7 +427,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed)
rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
RGMII_IO_MACRO_CONFIG2);
- if (speed == SPEED_1000 || ethqos->has_emac_ge_3)
+ if (speed == SPEED_1000 || ethqos->data->has_emac_ge_3)
rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
RGMII_IO_MACRO_CONFIG2);
else
@@ -456,7 +451,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed)
* in practice this becomes PRG_RCLK_DLY = 52 * 4 /
* (2 * RX delay ns)
*/
- if (ethqos->has_emac_ge_3) {
+ if (ethqos->data->has_emac_ge_3) {
/* 0.9 ns */
prg_rclk_dly = 115;
} else {
@@ -472,7 +467,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed)
SDCC_HC_REG_DDR_CONFIG);
}
- if (ethqos->rgmii_config_loopback_en)
+ if (ethqos->data->rgmii_config_loopback_en)
loopback = RGMII_CONFIG_LOOPBACK_EN;
else
loopback = 0;
@@ -495,9 +490,9 @@ static void ethqos_fix_mac_speed_rgmii(void *bsp_priv,
dev = ðqos->pdev->dev;
/* Reset to POR values and enable clk */
- for (i = 0; i < ethqos->num_rgmii_por; i++)
- rgmii_writel(ethqos, ethqos->rgmii_por[i].value,
- ethqos->rgmii_por[i].offset);
+ for (i = 0; i < ethqos->data->num_rgmii_por; i++)
+ rgmii_writel(ethqos, ethqos->data->rgmii_por[i].value,
+ ethqos->data->rgmii_por[i].offset);
ethqos_set_func_clk_en(ethqos);
@@ -511,7 +506,7 @@ static void ethqos_fix_mac_speed_rgmii(void *bsp_priv,
rgmii_setmask(ethqos, SDCC_DLL_CONFIG_PDN,
SDCC_HC_REG_DLL_CONFIG);
- if (ethqos->has_emac_ge_3) {
+ if (ethqos->data->has_emac_ge_3) {
if (speed == SPEED_1000) {
rgmii_writel(ethqos, 0x1800000, SDCC_TEST_CTL);
rgmii_writel(ethqos, 0x2C010800, SDCC_USR_CTL);
@@ -538,7 +533,7 @@ static void ethqos_fix_mac_speed_rgmii(void *bsp_priv,
SDCC_HC_REG_DLL_CONFIG);
/* Set USR_CTL bit 26 with mask of 3 bits */
- if (!ethqos->has_emac_ge_3)
+ if (!ethqos->data->has_emac_ge_3)
rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26),
SDCC_USR_CTL);
@@ -743,11 +738,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
"Failed to map rgmii resource\n");
data = of_device_get_match_data(dev);
- ethqos->rgmii_por = data->rgmii_por;
- ethqos->num_rgmii_por = data->num_rgmii_por;
- ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en;
- ethqos->has_emac_ge_3 = data->has_emac_ge_3;
- ethqos->needs_sgmii_loopback = data->needs_sgmii_loopback;
+ ethqos->data = data;
ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii");
if (IS_ERR(ethqos->link_clk))
@@ -784,7 +775,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->dump_debug_regs = rgmii_dump;
plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config;
plat_dat->core_type = DWMAC_CORE_GMAC4;
- if (ethqos->has_emac_ge_3)
+ if (data->has_emac_ge_3)
plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
plat_dat->pmt = true;
if (of_property_read_bool(np, "snps,tso"))
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v11 6/7] net: stmmac: qcom-ethqos: factor out linux-level setup into a separate function
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
Ahead of adding support for firmware-controlled EMAC variants, extend
the ethqos_emac_driver_data structure with a setup() callback, implement
it for the existing models and move all operations not required in SCMI
mode into it.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
.../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 99 +++++++++++++++-------
1 file changed, 68 insertions(+), 31 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index fa3447b90315672d706d5ce7d710bdec6214e4e6..f379570f80680e96f027873cda6a6bca398e22dc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -5,6 +5,7 @@
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/phy.h>
#include <linux/phy/phy.h>
@@ -81,6 +82,8 @@
#define SGMII_10M_RX_CLK_DVDR 0x31
+struct qcom_ethqos;
+
struct ethqos_emac_por {
unsigned int offset;
unsigned int value;
@@ -95,6 +98,8 @@ struct ethqos_emac_driver_data {
const char *link_clk_name;
struct dwmac4_addrs dwmac4_addrs;
bool needs_sgmii_loopback;
+ int (*setup)(struct qcom_ethqos *ethqos,
+ struct plat_stmmacenet_data *plat_dat);
};
struct qcom_ethqos {
@@ -199,6 +204,9 @@ static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos)
rgmii_setmask(ethqos, RGMII_CONFIG_FUNC_CLK_EN, RGMII_IO_MACRO_CONFIG);
}
+static int ethqos_hlos_setup(struct qcom_ethqos *ethqos,
+ struct plat_stmmacenet_data *plat_dat);
+
static const struct ethqos_emac_por emac_v2_3_0_por[] = {
{ .offset = RGMII_IO_MACRO_CONFIG, .value = 0x00C01343 },
{ .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642C },
@@ -213,6 +221,7 @@ static const struct ethqos_emac_driver_data emac_v2_3_0_data = {
.num_rgmii_por = ARRAY_SIZE(emac_v2_3_0_por),
.rgmii_config_loopback_en = true,
.has_emac_ge_3 = false,
+ .setup = ethqos_hlos_setup,
};
static const struct ethqos_emac_por emac_v2_1_0_por[] = {
@@ -229,6 +238,7 @@ static const struct ethqos_emac_driver_data emac_v2_1_0_data = {
.num_rgmii_por = ARRAY_SIZE(emac_v2_1_0_por),
.rgmii_config_loopback_en = false,
.has_emac_ge_3 = false,
+ .setup = ethqos_hlos_setup,
};
static const struct ethqos_emac_por emac_v3_0_0_por[] = {
@@ -261,6 +271,7 @@ static const struct ethqos_emac_driver_data emac_v3_0_0_data = {
.mtl_low_cred = 0x00008024,
.mtl_low_cred_offset = 0x1000,
},
+ .setup = ethqos_hlos_setup,
};
static const struct ethqos_emac_por emac_v4_0_0_por[] = {
@@ -296,6 +307,7 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = {
.mtl_low_cred = 0x00008024,
.mtl_low_cred_offset = 0x1000,
},
+ .setup = ethqos_hlos_setup,
};
static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
@@ -685,6 +697,58 @@ static void ethqos_ptp_clk_freq_config(struct stmmac_priv *priv)
netdev_dbg(priv->dev, "PTP rate %lu\n", plat_dat->clk_ptp_rate);
}
+static int ethqos_hlos_setup(struct qcom_ethqos *ethqos,
+ struct plat_stmmacenet_data *plat_dat)
+{
+ struct platform_device *pdev = ethqos->pdev;
+ struct device *dev = &pdev->dev;
+ int ret;
+
+ ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii");
+ if (IS_ERR(ethqos->rgmii_base))
+ return dev_err_probe(dev, PTR_ERR(ethqos->rgmii_base),
+ "Failed to map rgmii resource\n");
+
+ ethqos->link_clk = devm_clk_get(dev, ethqos->data->link_clk_name ?: "rgmii");
+ if (IS_ERR(ethqos->link_clk))
+ return dev_err_probe(dev, PTR_ERR(ethqos->link_clk),
+ "Failed to get link_clk\n");
+
+ plat_dat->clks_config = ethqos_clks_config;
+
+ ret = ethqos_clks_config(ethqos, true);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos);
+ if (ret)
+ return ret;
+
+ ethqos_set_clk_tx_rate(ethqos, NULL, plat_dat->phy_interface, SPEED_1000);
+ qcom_ethqos_set_sgmii_loopback(ethqos, true);
+ ethqos_set_func_clk_en(ethqos);
+
+ switch (ethqos->phy_mode) {
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ plat_dat->fix_mac_speed = ethqos_fix_mac_speed_rgmii;
+ break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ case PHY_INTERFACE_MODE_SGMII:
+ plat_dat->fix_mac_speed = ethqos_fix_mac_speed_sgmii;
+ break;
+ default:
+ break;
+ }
+
+ plat_dat->set_clk_tx_rate = ethqos_set_clk_tx_rate;
+ plat_dat->dump_debug_regs = rgmii_dump;
+
+ return 0;
+}
+
static int qcom_ethqos_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -706,23 +770,20 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
"dt configuration failed\n");
}
- plat_dat->clks_config = ethqos_clks_config;
-
ethqos = devm_kzalloc(dev, sizeof(*ethqos), GFP_KERNEL);
if (!ethqos)
return -ENOMEM;
ethqos->phy_mode = plat_dat->phy_interface;
+
switch (ethqos->phy_mode) {
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_RXID:
case PHY_INTERFACE_MODE_RGMII_TXID:
- plat_dat->fix_mac_speed = ethqos_fix_mac_speed_rgmii;
break;
case PHY_INTERFACE_MODE_2500BASEX:
case PHY_INTERFACE_MODE_SGMII:
- plat_dat->fix_mac_speed = ethqos_fix_mac_speed_sgmii;
plat_dat->mac_finish = ethqos_mac_finish_serdes;
break;
default:
@@ -732,24 +793,13 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
}
ethqos->pdev = pdev;
- ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii");
- if (IS_ERR(ethqos->rgmii_base))
- return dev_err_probe(dev, PTR_ERR(ethqos->rgmii_base),
- "Failed to map rgmii resource\n");
-
data = of_device_get_match_data(dev);
ethqos->data = data;
- ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii");
- if (IS_ERR(ethqos->link_clk))
- return dev_err_probe(dev, PTR_ERR(ethqos->link_clk),
- "Failed to get link_clk\n");
-
- ret = ethqos_clks_config(ethqos, true);
- if (ret)
- return ret;
+ if (WARN_ON(!data->setup))
+ return -EINVAL;
- ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos);
+ ret = data->setup(ethqos, plat_dat);
if (ret)
return ret;
@@ -758,21 +808,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(ethqos->serdes_phy),
"Failed to get serdes phy\n");
- ethqos_set_clk_tx_rate(ethqos, NULL, plat_dat->phy_interface,
- SPEED_1000);
-
- qcom_ethqos_set_sgmii_loopback(ethqos, true);
- ethqos_set_func_clk_en(ethqos);
-
- /* The clocks are controlled by firmware, so we don't know for certain
- * what clock rate is being used. Hardware documentation mentions that
- * the AHB slave clock will be in the range of 50 to 100MHz, which
- * equates to a MDC between 1.19 and 2.38MHz.
- */
plat_dat->clk_csr = STMMAC_CSR_60_100M;
plat_dat->bsp_priv = ethqos;
- plat_dat->set_clk_tx_rate = ethqos_set_clk_tx_rate;
- plat_dat->dump_debug_regs = rgmii_dump;
plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config;
plat_dat->core_type = DWMAC_CORE_GMAC4;
if (data->has_emac_ge_3)
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v11 7/7] net: stmmac: qcom-ethqos: add support for sa8255p
From: Bartosz Golaszewski @ 2026-06-29 11:28 UTC (permalink / raw)
To: Bjorn Andersson, Konrad Dybcio, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Alexandre Torgue,
Vinod Koul, Giuseppe Cavallaro, Chen-Yu Tsai, Jernej Skrabec,
Neil Armstrong, Kevin Hilman, Jerome Brunet, Shawn Guo,
Fabio Estevam, Jan Petrous, s32, Mohd Ayaan Anwar, Romain Gantois,
Geert Uytterhoeven, Magnus Damm, Maxime Ripard,
Christophe Roullier, Bartosz Golaszewski, Radu Rendec
Cc: linux-arm-msm, devicetree, linux-kernel, netdev, linux-stm32,
linux-arm-kernel, Drew Fustini, linux-sunxi, linux-amlogic,
linux-mips, imx, linux-renesas-soc, linux-rockchip, sophgo,
linux-riscv, brgl, Bartosz Golaszewski, Bartosz Golaszewski
In-Reply-To: <20260629-qcom-sa8255p-emac-v11-0-1b7fb95b51f9@oss.qualcomm.com>
Extend the driver to support a new model - sa8255p. Unlike the previously
supported variants, this one's power management is done in the firmware
over SCMI. This is modeled in linux using power domains so add a new
emac data variant and a separate setup callback.
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
.../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 83 ++++++++++++++++++++++
1 file changed, 83 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index f379570f80680e96f027873cda6a6bca398e22dc..47175670a32631369a2cf8b00388d9359513e090 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -108,6 +108,7 @@ struct qcom_ethqos {
struct clk *link_clk;
struct phy *serdes_phy;
phy_interface_t phy_mode;
+ struct dev_pm_domain_list *pds;
const struct ethqos_emac_driver_data *data;
};
@@ -206,6 +207,8 @@ static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos)
static int ethqos_hlos_setup(struct qcom_ethqos *ethqos,
struct plat_stmmacenet_data *plat_dat);
+static int ethqos_scmi_setup(struct qcom_ethqos *ethqos,
+ struct plat_stmmacenet_data *plat_dat);
static const struct ethqos_emac_por emac_v2_3_0_por[] = {
{ .offset = RGMII_IO_MACRO_CONFIG, .value = 0x00C01343 },
@@ -310,6 +313,29 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = {
.setup = ethqos_hlos_setup,
};
+static const struct ethqos_emac_driver_data emac_v4_0_0_scmi_data = {
+ .has_emac_ge_3 = true,
+ .needs_sgmii_loopback = true,
+ .dma_addr_width = 36,
+ .dwmac4_addrs = {
+ .dma_chan = 0x00008100,
+ .dma_chan_offset = 0x1000,
+ .mtl_chan = 0x00008000,
+ .mtl_chan_offset = 0x1000,
+ .mtl_ets_ctrl = 0x00008010,
+ .mtl_ets_ctrl_offset = 0x1000,
+ .mtl_txq_weight = 0x00008018,
+ .mtl_txq_weight_offset = 0x1000,
+ .mtl_send_slp_cred = 0x0000801c,
+ .mtl_send_slp_cred_offset = 0x1000,
+ .mtl_high_cred = 0x00008020,
+ .mtl_high_cred_offset = 0x1000,
+ .mtl_low_cred = 0x00008024,
+ .mtl_low_cred_offset = 0x1000,
+ },
+ .setup = ethqos_scmi_setup,
+};
+
static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
{
struct device *dev = ðqos->pdev->dev;
@@ -749,6 +775,62 @@ static int ethqos_hlos_setup(struct qcom_ethqos *ethqos,
return 0;
}
+static const char *const ethqos_scmi_pd_names[] = { "core", "mdio" };
+
+static int ethqos_scmi_setup(struct qcom_ethqos *ethqos,
+ struct plat_stmmacenet_data *plat_dat)
+{
+ const struct dev_pm_domain_attach_data pd_data = {
+ .pd_names = ethqos_scmi_pd_names,
+ .num_pd_names = ARRAY_SIZE(ethqos_scmi_pd_names),
+ .pd_flags = PD_FLAG_DEV_LINK_ON,
+ };
+
+ struct platform_device *pdev = ethqos->pdev;
+ struct device *dev = &pdev->dev;
+ int ret;
+
+ ret = devm_pm_domain_attach_list(dev, &pd_data, ðqos->pds);
+ if (ret < 0)
+ return dev_err_probe(dev, ret,
+ "Failed to attach power domains\n");
+
+ /*
+ * The SerDes lane, its clocks and the MAC AXI/AHB clocks are owned by
+ * firmware and brought up through the SCMI power domains above. The
+ * MAC wrapper itself, however is in the kernel's register space: the
+ * mux that feeds the SerDes recovered RX clock into the MAC's clk_rx_i
+ * is not configured by firmware. Without it, clk_rx_i never toggles
+ * and the DMA SW-reset polled in dwmac4_dma_reset() never completes.
+ *
+ * Map the wrapper and program the same loopback/functional clock bits
+ * the non-firmware platforms rely on (see ethqos_clks_config) so the
+ * RX clock is present by the time the DMA engine is reset.
+ */
+ ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii");
+ if (IS_ERR(ethqos->rgmii_base))
+ return dev_err_probe(dev, PTR_ERR(ethqos->rgmii_base),
+ "Failed to map rgmii resource\n");
+
+ /*
+ * Run on every runtime resume, which stmmac performs after the power
+ * domains are on but before serdes_powerup() and the DMA reset, so the
+ * wrapper is always configured ahead of the reset.
+ */
+ plat_dat->clks_config = ethqos_clks_config;
+
+ switch (ethqos->phy_mode) {
+ case PHY_INTERFACE_MODE_2500BASEX:
+ case PHY_INTERFACE_MODE_SGMII:
+ plat_dat->fix_mac_speed = ethqos_fix_mac_speed_sgmii;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int qcom_ethqos_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -836,6 +918,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
static const struct of_device_id qcom_ethqos_match[] = {
{ .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data},
+ { .compatible = "qcom,sa8255p-ethqos", .data = &emac_v4_0_0_scmi_data},
{ .compatible = "qcom,sa8775p-ethqos", .data = &emac_v4_0_0_data},
{ .compatible = "qcom,sc8280xp-ethqos", .data = &emac_v3_0_0_data},
{ .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data},
--
2.47.3
^ permalink raw reply related
* Re: [PATCH bpf v2 1/4] bpf, sockmap: Reject unhashed UDP sockets on sockmap update
From: Jakub Sitnicki @ 2026-06-29 11:38 UTC (permalink / raw)
To: Michal Luczaj
Cc: Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
John Fastabend, Jiayuan Chen, David S. Miller, Jakub Kicinski,
Simon Horman, Alexei Starovoitov, Cong Wang, Daniel Borkmann,
Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi,
Martin KaFai Lau, Song Liu, Yonghong Song, Jiri Olsa,
Emil Tsalapatis, Shuah Khan, netdev, bpf, linux-kernel,
linux-kselftest
In-Reply-To: <20260626-sockmap-lookup-udp-leak-v2-1-7e7e201c951a@rbox.co>
On Fri, Jun 26, 2026 at 10:36 PM +02, Michal Luczaj wrote:
> UDP sockets get SOCK_RCU_FREE set when (auto-)bound. This means
> sk_is_refcounted(unbound) = true, while sk_is_refcounted(bound) = false.
>
> Because sockmap accepts unbound UDP sockets, a BPF program can increment a
> socket's refcount via lookup. If the socket is subsequently bound, the
> transition from unbound to bound causes bpf_sk_release() to skip the
> decrement of the refcount, causing a memory leak.
>
> unreferenced object 0xffff88810bc2eb40 (size 1984):
> comm "test_progs", pid 2451, jiffies 4295320596
> hex dump (first 32 bytes):
> 7f 00 00 01 7f 00 00 01 d2 04 1b b7 04 d2 00 00 ................
> 02 00 01 40 00 00 00 00 00 00 00 00 00 00 00 00 ...@............
> backtrace (crc bdee079d):
> kmem_cache_alloc_noprof+0x557/0x660
> sk_prot_alloc+0x69/0x240
> sk_alloc+0x30/0x460
> inet_create+0x2ce/0xf80
> __sock_create+0x25b/0x5c0
> __sys_socket+0x119/0x1d0
> __x64_sys_socket+0x72/0xd0
> do_syscall_64+0xa1/0x5f0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> Instead of special-casing for refcounted sockets, reject unhashed UDP
> sockets during sockmap updates, as there is no benefit to supporting those.
> This effectively reverts the commit under Fixes, with two exceptions:
>
> 1. sock_map_sk_state_allowed() maintains a fall-through `return true`.
> 2. In the spirit of commit b8b8315e39ff ("bpf, sockmap: Remove unhash
> handler for BPF sockmap usage"), the proto::unhash BPF handler is not
> reintroduced.
>
> Historical note: this issue is related to commit 67312adc96b5 ("bpf: reject
> unhashed sockets in bpf_sk_assign").
>
> Fixes: 0c48eefae712 ("sock_map: Lift socket state restriction for datagram sockets")
> Suggested-by: Kuniyuki Iwashima <kuniyu@google.com>
> Signed-off-by: Michal Luczaj <mhal@rbox.co>
> ---
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
^ permalink raw reply
* [PATCH net-next 0/2] net: do not warn on best-effort skb allocation failures
From: Breno Leitao @ 2026-06-29 11:45 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: netdev, linux-kernel, asantostc, gustavold, vlad.wing,
Breno Leitao, kernel-team
Both netconsole and netpoll keep a small preallocated pool of skbs
(skb_pool) so they can still get a buffer under memory pressure.
On the hot path they first attempt a normal GFP_ATOMIC allocation and only
fall back to the pool when that fails, keeping the pool as a last resort.
This is where the problem happens. If alloc_skb() fails, we now have
more than 100 message coming from the page=0 failure, which consumes the
scarce pool of skb, making the real issue disappear.
So the noise (memory allocation failure) deplets the SKB buffer and crowds out
the real message we were trying to deliver.
This is happening on the Meta fleet. The stack trace looks like:
pr/netcon_ext0: page allocation failure: order:0, mode:0x40820(GFP_ATOMIC|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0
...
dump_stack_lvl
warn_alloc
__alloc_pages_slowpath
__alloc_frozen_pages_noprof
alloc_pages_mpol
alloc_slab_page
allocate_slab
kmem_cache_alloc_node_noprof
__alloc_skb
send_udp
netconsole_write
nbcon_emit_next_record
nbcon_emit_one
nbcon_kthread_func
kthread
Solution: Do not warn if netconsole/netpoll fails to allocate these SKBs. Pass
__GFP_NOWARN on these best-effort allocations -- both the hot-path attempt in
netconsole's find_skb() and the pool refill in netpoll's refill_skbs() -- and
let the existing fallback paths do their job quietly. The allocation will
happen on SKB refill workqueue.
Given I am touching this code, if alloc_skb() fails, reschedule the
workqueue to try later.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
Breno Leitao (2):
netconsole: do not warn when the best-effort skb allocation fails
netpoll: do not warn when the best-effort pool refill fails
drivers/net/netconsole.c | 2 +-
net/core/netpoll.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
---
base-commit: 3d5670d672ae08b8c534b7beed6f57c8b44e7b43
change-id: 20260629-netpoll_no_warn-20174d15bcd3
Best regards,
--
Breno Leitao <leitao@debian.org>
^ permalink raw reply
* [PATCH net-next 1/2] netconsole: do not warn when the best-effort skb allocation fails
From: Breno Leitao @ 2026-06-29 11:45 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: netdev, linux-kernel, asantostc, gustavold, vlad.wing,
Breno Leitao, kernel-team
In-Reply-To: <20260629-netpoll_no_warn-v1-0-f380f0b2cd0c@debian.org>
find_skb() allocates the skb with GFP_ATOMIC as a best-effort attempt:
on failure it falls back to the preallocated skb pool and, failing that,
polls the device and retries. The allocation failing is therefore an
expected and fully handled condition, but without __GFP_NOWARN the page
allocator still emits a warn_alloc() splat with a full stack trace on
every miss, which then consumes the whole SKB pool, that would be useful
printing the real issue rather than the memory failure.
Pass __GFP_NOWARN so the best-effort allocation stays quiet and lets the
existing fallback path do its job.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
drivers/net/netconsole.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 862001d09aa84..c1812a98365b7 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -1737,7 +1737,7 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
netpoll_zap_completion_queue();
repeat:
- skb = alloc_skb(len, GFP_ATOMIC);
+ skb = alloc_skb(len, GFP_ATOMIC | __GFP_NOWARN);
if (!skb)
skb = netcons_skb_pop(np, len);
--
2.53.0-Meta
^ permalink raw reply related
* [PATCH net-next 2/2] netpoll: do not warn when the best-effort pool refill fails
From: Breno Leitao @ 2026-06-29 11:45 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: netdev, linux-kernel, asantostc, gustavold, vlad.wing,
Breno Leitao, kernel-team
In-Reply-To: <20260629-netpoll_no_warn-v1-0-f380f0b2cd0c@debian.org>
refill_skbs() tops up the per-netpoll skb pool with GFP_ATOMIC and
simply stops on the first allocation failure, leaving the pool partially
filled; a later refill tops it up once memory frees up. The allocation
failing is therefore an expected and fully handled condition, but
without __GFP_NOWARN the page allocator emits a warn_alloc() splat with
a full stack trace on every miss.
Pass __GFP_NOWARN so the best-effort refill stays quiet, mirroring the
same change in netconsole's find_skb().
Signed-off-by: Breno Leitao <leitao@debian.org>
---
net/core/netpoll.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 229dde818ab33..85aa513508811 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -221,7 +221,7 @@ static void refill_skbs(struct netpoll *np)
skb_pool = &np->skb_pool;
while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) {
- skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
+ skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC | __GFP_NOWARN);
if (!skb)
break;
--
2.53.0-Meta
^ permalink raw reply related
* Re: [PATCH iwl-net v2 1/2] ice: skip per-VLAN promisc rules when default VSI Rx rule is set
From: Petr Oros @ 2026-06-29 12:06 UTC (permalink / raw)
To: Simon Horman; +Cc: netdev, aleksandr.loktionov
In-Reply-To: <20260623162526.1099096-2-horms@kernel.org>
On 6/23/26 18:25, Simon Horman wrote:
> This is an AI-generated review of your patch. The human sending this
> email has considered the AI review valid, or at least plausible.
> Full review at: https://sashiko.dev
> ---
> Subject: ice: skip per-VLAN promisc rules when default VSI Rx rule is set
>
> This patch optimizes the number of hardware switch rules used when an ice
> port is part of a vlan-filtering bridge. It avoids installing per-VLAN
> promiscuous rules if the broad default VSI Rx rule is already catching
> all traffic.
>
>> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
>> index e2fbe111f8496..efb1f536de6c7 100644
>> --- a/drivers/net/ethernet/intel/ice/ice_main.c
>> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> [ ... ]
>> @@ -442,17 +496,12 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
> [Severity: High]
> This is a pre-existing issue, but does MAC filter overflow fail to force
> promiscuous mode, resulting in silent packet drops?
Pre-existing; the promisc_forced_on fallback is not changed by this patch.
>
> When the hardware MAC filter limit is reached (LIBIE_AQ_RC_ENOSPC), the driver
> attempts to fall back to promiscuous mode by setting promisc_forced_on = true.
>
> However, the execution flow then reaches a check for IFF_PROMISC:
>
> if (vsi->current_netdev_flags & IFF_PROMISC) {
> /* Apply Rx filter rule to get traffic from wire */
> ...
> } else {
> /* Clear Rx filter to remove traffic from wire */
>
> Since the user never explicitly requested IFF_PROMISC, this evaluates to false,
> causing the driver to take the else branch. The else branch actively disables
> the default VSI rule and clears promiscuous mode instead of enabling it. The
> fallback feature appears to be non-functional, leading to lost traffic for
> newly added MACs.
>
> [Severity: High]
> This is a pre-existing issue, but could this function prevent retries of
> incomplete configurations and break multicast promiscuity if any default
> VSI is already active?
v3 will address the multicast half: ice_set_promisc(ICE_MCAST_PROMISC_BITS)
will be issued whenever the netdev is promiscuous, not only inside the
!ice_is_dflt_vsi_in_use() branch.
>
> In ice_vsi_sync_fltr(), the setup of multicast promiscuity and disabling of
> Rx filtering is gated:
>
> if (!ice_is_dflt_vsi_in_use(vsi->port_info)) {
> err = ice_set_dflt_vsi(vsi);
> ...
> err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
> if (err)
> goto out_promisc;
> }
>
> If ice_set_dflt_vsi() succeeds but the subsequent ice_set_promisc() fails,
> the function jumps to out_promisc to trigger a retry. On the next cycle,
> ice_is_dflt_vsi_in_use() returns true because this VSI is now the default,
> causing the entire block to be skipped and leaving the VSI misconfigured.
>
> Furthermore, if another VSI (such as a VF) is already the default VSI,
> the PF VSI entirely skips setting multicast promiscuity, breaking multicast
> reception in promiscuous mode.
>
>> } else {
>> /* Clear Rx filter to remove traffic from wire */
>> if (ice_is_vsi_dflt_vsi(vsi)) {
>> - err = ice_clear_dflt_vsi(vsi);
>> + err = ice_vsi_exit_dflt_promisc(vsi);
>> if (err) {
>> - netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
>> - err, vsi->vsi_num);
>> vsi->current_netdev_flags |=
>> IFF_PROMISC;
>> goto out_promisc;
>> }
>> - if (vsi->netdev->features &
>> - NETIF_F_HW_VLAN_CTAG_FILTER)
>> - vlan_ops->ena_rx_filtering(vsi);
>> }
> [ ... ]
>> @@ -3675,10 +3724,9 @@ int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
>> while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
>> usleep_range(1000, 2000);
> [Severity: High]
> This is a pre-existing issue, but isn't there a risk of TOCTOU races with
> this ad-hoc lock pattern lacking memory barriers?
Pre-existing; the ICE_CFG_BUSY locking is not changed by this patch.
>
> The driver spins on ICE_CFG_BUSY using test_and_set_bit() and usleep_range()
> to guard VSI filter synchronization. The lock is subsequently dropped using
> clear_bit(ICE_CFG_BUSY, vsi->state) at the exit label.
>
> Because clear_bit() lacks a memory release barrier, the CPU or compiler can
> reorder memory stores from the critical section to occur after the flag is
> cleared, leading to data races and corruption of shared filter state on
> weakly ordered architectures.
>
>> - /* Add multicast promisc rule for the VLAN ID to be added if
>> - * all-multicast is currently enabled.
>> - */
>> - if (vsi->current_netdev_flags & IFF_ALLMULTI) {
>> + /* skip the per-VID rule when the DFLT Rx rule already covers this VID */
>> + if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
>> + !ice_is_vsi_dflt_vsi(vsi)) {
>> ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
>> ICE_MCAST_VLAN_PROMISC_BITS,
>> vid);
>> @@ -3696,11 +3744,12 @@ int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
>> if (ret)
>> goto finish;
> [Severity: High]
> This is a pre-existing issue, but what happens if vlan_ops->add_vlan()
> fails here? Is the hardware switch rule leaked?
Pre-existing; the add_vlan() error unwind is not changed by this patch.
>
> When adding a VLAN to an interface with IFF_ALLMULTI enabled, this function
> first allocates a promiscuous switch rule in hardware via
> ice_fltr_set_vsi_promisc().
>
> If this succeeds but the subsequent call to vlan_ops->add_vlan() fails
> (e.g., due to ENOSPC or memory allocation failure), the function returns
> early without rolling back the promiscuous rule. Because the network stack
> considers the VLAN addition to have failed, ice_vlan_rx_kill_vid() will
> never be called for this VID, permanently leaking a Flow Lookup Unit (FLU).
>
>> - /* If all-multicast is currently enabled and this VLAN ID is only one
>> - * besides VLAN-0 we have to update look-up type of multicast promisc
>> - * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
>> + /* On the first non-zero VLAN, promote the VLAN-0 multicast promisc
>> + * rule from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN. Skip when
>> + * the DFLT Rx rule is installed; it already covers every VID.
>> */
>> if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
>> + !ice_is_vsi_dflt_vsi(vsi) &&
>> ice_vsi_num_non_zero_vlans(vsi) == 1) {
> [ ... ]
>> @@ -3763,11 +3812,12 @@ int ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
>> ICE_MCAST_VLAN_PROMISC_BITS, vid);
>>
>> if (!ice_vsi_has_non_zero_vlans(vsi)) {
>> - /* Update look-up type of multicast promisc rule for VLAN 0
>> - * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
>> - * all-multicast is enabled and VLAN 0 is the only VLAN rule.
>> + /* Last non-zero VLAN gone: demote the VLAN-0 multicast promisc
>> + * rule back to ICE_SW_LKUP_PROMISC. Skip when the DFLT Rx rule
>> + * is installed; no recipe swap is needed.
>> */
>> - if (vsi->current_netdev_flags & IFF_ALLMULTI) {
>> + if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
>> + !ice_is_vsi_dflt_vsi(vsi)) {
> [Severity: High]
> Will this modification cause a stale switch rule (ICE_SW_LKUP_PROMISC_VLAN for
> vid=0) to be permanently leaked if the last VLAN is removed while the DFLT VSI
> rule is active?
Real, and introduced by v1/v2. v3 will drop the two vid=0 recipe-swap
guards, so the rule is demoted back to ICE_SW_LKUP_PROMISC when the last
VLAN is removed and never stranded.
Petr
>
> This patch skips demoting the vid=0 multicast promiscuous rule from
> ICE_SW_LKUP_PROMISC_VLAN back to ICE_SW_LKUP_PROMISC when the last VLAN is
> removed, if ice_is_vsi_dflt_vsi(vsi) is true.
>
> However, if the interface later drops IFF_PROMISC (removing the DFLT VSI rule)
> and then drops IFF_ALLMULTI, ice_clear_promisc() is called. Because the
> interface now has zero VLANs, ice_clear_promisc() executes its else branch:
>
> } else {
> status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
> promisc_m, 0);
> }
>
> This only searches and clears rules from the ICE_SW_LKUP_PROMISC recipe. The
> stranded rule in the ICE_SW_LKUP_PROMISC_VLAN recipe is never cleared,
> resulting in a permanent hardware switch rule leak.
>
>> ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
>> ICE_MCAST_VLAN_PROMISC_BITS,
>> 0);
^ permalink raw reply
* [PATCH RFC] net: enforce net sysctl registration
From: Joel Granados @ 2026-06-29 12:22 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: netdev, linux-kernel, Joel Granados
Replace the warning and file permission change with an error when an
"unsafe" net sysctl registration is detected.
One of the barriers preventing the const qualification of the ctl_tables
in the net directory is the permission (->mode) change in
ensure_safe_net_sysctl. This prep commit removes that barrier allowing
the const qualification of net ctl_tables.
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
What?
=====
Replace warning and file permission change with an error (reject
registration) when an "unsafe" net sysctl registration is detected.
Why?
====
The main motivation for this is to continue with the const qualification
of the ctl_table arrays [1]. The permission change inside
ensure_safe_net_sysctl disallows cons qualifiaction as it basically
modifies the entries before running the sysctl registration.
ent->mode &= ~0222;
Analysis
========
* I believe that there is currently now way that the permission change
gets executed [2]
* I found one case where the warning message was posted to lore
(vsock_sysctl_register) [3], but it made its to mainline as part of
the second case in [2].
* We should error anyway because writing to the global sysctl value
through a child netns is indicative of a bug [4].
RFC
===
I'm sending it out as an RFC as I would like to discuss the change to
ensure_safe_net_sysctl in isolation, but my idea is to send out a series
that actually const qualifies the clt_tables in the net directory. I
would be very thankful if you point me to anything that I have missed in
my analysis that shows that this cannot/shouldn't be done.
Best
[1]
https://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl.git/commit/?h=constfy-sysctl-6.14-rc1&id=1751f872cc97f992ed5c4c72c55588db1f0021e1
[2]
I have identified 4 contexts relevant to the ensure_safe_net_sysctl call
inside the network sysctl registration.
1. When the (struct net) == &init_net (like in iw_cm_init): In this case
ensure_safe_net_sysctl is not executed and permission modification
never happens.
2. When the ctl_table data (->data) gets "manually" assigned to
something other init_net (like in vsock_sysctl_register): In this
case ensure_safe_net_sysctl *is* executed but the data that is passed
is neither a module address (!is_module_address) nor a kernel core
address (!is_kernel_core_data); so the permission modification never
happens.
3. When the permissions are explicitly changed on a kmemdup'ed ctl_table
array (like in sysctl_core_net_init): in this case
ensure_safe_net_sysctl *is* executed but the permission modification
never happens as the mode is not writable.
4. When ctl have custom proc_handlers (like in nf_lwtunnel_net_init): In
this case ->data is NULL so it is not a module address
(!is_module_address) nor a kernel core address
(!is_kernel_core_data), so permission modification never happens.
It seems like there is no way of executing the permission change in
ensure_safe_net_sysctl. Please correct me if this is inacurate and help
me find the case that I missed.
[3]
https://lore.kernel.org/all/20260302194926.90378-1-graf@amazon.com/
[4]
The ensure_safe_net_sysctl function was introduced in Commit:
31c4d2f160eb7b17cbead24dc6efed06505a3fee ("net: Ensure net namespace
isolation of sysctls") which states that it is trying to prevent a
leak (indicative of a bug).
---
net/sysctl_net.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 19e8048241bacb18de853d3b904d0f97fd2fe78a..c1630a266f8436d8962ceb87dc629964b2d71260 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -114,16 +114,16 @@ __init int net_sysctl_init(void)
goto out;
}
-/* Verify that sysctls for non-init netns are safe by either:
+/* Return error when sysctls for non-init netns are unsafe by verifying:
* 1) being read-only, or
* 2) having a data pointer which points outside of the global kernel/module
* data segment, and rather into the heap where a per-net object was
* allocated.
*/
-static void ensure_safe_net_sysctl(struct net *net, const char *path,
- struct ctl_table *table, size_t table_size)
+static int ensure_safe_net_sysctl(struct net *net, const char *path,
+ const struct ctl_table *table, size_t table_size)
{
- struct ctl_table *ent;
+ const struct ctl_table *ent;
pr_debug("Registering net sysctl (net %p): %s\n", net, path);
ent = table;
@@ -149,15 +149,14 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path,
else
continue;
- /* If it is writable and points to kernel/module global
- * data, then it's probably a netns leak.
- */
+ /* Warn on netns leak. */
WARN(1, "sysctl %s/%s: data points to %s global data: %ps\n",
- path, ent->procname, where, ent->data);
+ path, ent->procname, where, ent->data);
- /* Make it "safe" by dropping writable perms */
- ent->mode &= ~0222;
+ return -EACCES;
}
+
+ return 0;
}
struct ctl_table_header *register_net_sysctl_sz(struct net *net,
@@ -166,7 +165,8 @@ struct ctl_table_header *register_net_sysctl_sz(struct net *net,
size_t table_size)
{
if (!net_eq(net, &init_net))
- ensure_safe_net_sysctl(net, path, table, table_size);
+ if (ensure_safe_net_sysctl(net, path, table, table_size))
+ return NULL;
return __register_sysctl_table(&net->sysctls, path, table, table_size);
}
---
base-commit: 8cd9520d35a6c38db6567e97dd93b1f11f185dc6
change-id: 20260629-jag-net_const_qualify-f4e09759dac7
Best regards,
--
Joel Granados <joel.granados@kernel.org>
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox