* [PATCH net-next v3 01/12] dpaa2-switch: add LAG configuration API
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 02/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (10 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
Add the necessary APIs to configure and control the LAG support on the
DPAA2 switch object.
- The dpsw_lag_set() function will be used to either verify that a LAG
configuration can be support or to actually apply it in HW.
- The dpsw_if_set_lag_state() will get used in the next patches to
change the per port LAG state of a specific DPSW interface.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Add a check in dpsw_lag_set() for cfg->num_ifs against
DPSW_MAX_LAG_IFS
- Add kerneldoc for the dpsw_lag_cfg structure.
Changes in v2:
- none
---
.../net/ethernet/freescale/dpaa2/dpsw-cmd.h | 18 +++++-
drivers/net/ethernet/freescale/dpaa2/dpsw.c | 60 +++++++++++++++++++
drivers/net/ethernet/freescale/dpaa2/dpsw.h | 30 ++++++++++
3 files changed, 107 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 397d55f2bd99..9a2055c64983 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -12,7 +12,7 @@
/* DPSW Version */
#define DPSW_VER_MAJOR 8
-#define DPSW_VER_MINOR 9
+#define DPSW_VER_MINOR 13
#define DPSW_CMD_BASE_VERSION 1
#define DPSW_CMD_VERSION_2 2
@@ -92,11 +92,14 @@
#define DPSW_CMDID_CTRL_IF_SET_POOLS DPSW_CMD_ID(0x0A1)
#define DPSW_CMDID_CTRL_IF_ENABLE DPSW_CMD_ID(0x0A2)
#define DPSW_CMDID_CTRL_IF_DISABLE DPSW_CMD_ID(0x0A3)
+#define DPSW_CMDID_SET_LAG DPSW_CMD_V2(0x0A4)
#define DPSW_CMDID_CTRL_IF_SET_QUEUE DPSW_CMD_ID(0x0A6)
#define DPSW_CMDID_SET_EGRESS_FLOOD DPSW_CMD_ID(0x0AC)
#define DPSW_CMDID_IF_SET_LEARNING_MODE DPSW_CMD_ID(0x0AD)
+#define DPSW_CMDID_IF_SET_LAG_STATE DPSW_CMD_ID(0x0B0)
+
/* Macros for accessing command fields smaller than 1byte */
#define DPSW_MASK(field) \
GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \
@@ -552,5 +555,18 @@ struct dpsw_cmd_if_reflection {
/* only 2 bits from the LSB */
u8 filter;
};
+
+struct dpsw_cmd_lag {
+ u8 group_id;
+ u8 num_ifs;
+ u8 pad[6];
+ u8 if_id[DPSW_MAX_LAG_IFS];
+ u8 phase;
+};
+
+struct dpsw_cmd_if_set_lag_state {
+ __le16 if_id;
+ u8 tx_enabled;
+};
#pragma pack(pop)
#endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index ab921d75deb2..f75cbdce42ba 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1659,3 +1659,63 @@ int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
return mc_send_command(mc_io, &cmd);
}
+
+/**
+ * dpsw_lag_set() - Set LAG configuration
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @cfg: pointer to LAG configuration
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpsw_lag_set(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ const struct dpsw_lag_cfg *cfg)
+{
+ struct fsl_mc_command cmd = { 0 };
+ struct dpsw_cmd_lag *cmd_params;
+ int i = 0;
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_LAG, cmd_flags, token);
+
+ if (cfg->num_ifs > DPSW_MAX_LAG_IFS)
+ return -EOPNOTSUPP;
+
+ cmd_params = (struct dpsw_cmd_lag *)cmd.params;
+ cmd_params->group_id = cfg->group_id;
+ cmd_params->num_ifs = cfg->num_ifs;
+ cmd_params->phase = cfg->phase;
+
+ for (i = 0; i < cfg->num_ifs; i++)
+ cmd_params->if_id[i] = cfg->if_id[i];
+
+ return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_set_lag_state() - Change per port LAG state
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPSW object
+ * @if_id: ID of the switch interface
+ * @tx_enabled: Value of the per port LAG state
+ * - 0 if the interface will not be active as part of the LAG group
+ * - 1 if the interface will be active in the LAG group
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_lag_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, u8 tx_enabled)
+{
+ struct dpsw_cmd_if_set_lag_state *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LAG_STATE,
+ cmd_flags, token);
+
+ cmd_params = (struct dpsw_cmd_if_set_lag_state *)cmd.params;
+ cmd_params->if_id = cpu_to_le16(if_id);
+ cmd_params->tx_enabled = tx_enabled;
+
+ return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index b90bd363f47a..89f0267de8e9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -20,6 +20,8 @@ struct fsl_mc_io;
#define DPSW_MAX_IF 64
+#define DPSW_MAX_LAG_IFS 8
+
int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token);
int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
@@ -788,4 +790,32 @@ int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
u16 if_id, const struct dpsw_reflection_cfg *cfg);
+
+/* Link Aggregation Group configuration */
+
+#define DPSW_LAG_SET_PHASE_APPLY 0
+#define DPSW_LAG_SET_PHASE_CHECK 1
+
+/**
+ * struct dpsw_lag_cfg - Configuration structure for a LAG group
+ * @group_id: Link aggregation group ID. Valid values are in the
+ * [1, DPSW_MAX_LAG_IFS] range.
+ * @num_ifs: Number of interfaces in this LAG group, valid range is
+ * [0, DPSW_MAX_LAG_IFS].
+ * @if_id: Array containing the interface IDs of the ports part of a LAG group
+ * @phase: Use DPSW_LAG_SET_PHASE_APPLY for LAG configuration processing or
+ * DPSW_LAG_SET_PHASE_CHECK for LAG configuration validation.
+ */
+struct dpsw_lag_cfg {
+ u8 group_id;
+ u8 num_ifs;
+ u8 if_id[DPSW_MAX_LAG_IFS];
+ u8 phase;
+};
+
+int dpsw_lag_set(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ const struct dpsw_lag_cfg *cfg);
+
+int dpsw_if_set_lag_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u16 if_id, u8 tx_enabled);
#endif /* __FSL_DPSW_H */
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 02/12] dpaa2-switch: add support for LAG offload
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 01/12] dpaa2-switch: add LAG configuration API Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-06 2:10 ` Jakub Kicinski
2026-06-03 14:36 ` [PATCH net-next v3 03/12] dpaa2-switch: change dpaa2_switch_port_set_fdb() function prototype Ioana Ciornei
` (9 subsequent siblings)
11 siblings, 1 reply; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
This patch adds the bulk of the changes needed in order to support
offloading of an upper bond device.
First of all, handling of the NETDEV_CHANGEUPPER and
NETDEV_PRECHANGEUPPER events is extended so that the driver is capable
to handle joining or leaving an upper bond device.
All the restrictions around the LAG offload support are added in the
newly added dpaa2_switch_pre_lag_join() function.
The same events are extended to also detect if one of our upper bond
devices changes its own upper device. In this case, on each lower device
that is DPAA2 the corresponding dpaa2_switch_port_[pre]changeupper()
function will be called. This will start the process of joining the same
FDB as the one used by the bridge device.
Setting the 'offload_fwd_mark' field on the skbs is also extended to be
setup not only when the port is under a bridge but also under a bond
device that is offloaded.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Fix logic in prechangeupper callback in order to not call
dpaa2_switch_prechangeupper_sanity_checks() on !info->linking
- Fixed up the logic in the dpaa2_switch_port_bond_join()'s error path
so that the FDBs are cleaned-up properly and we do not end-up with FDB's
leaked, meaning that they could have been marked as in-use but actually
no port was using it.
- Mark the port_priv->lag field as __rcu and use the proper accesors for
it. This will eventually become useful in a later patch when the lag
field will be accessed concurrently from the NAPI context and the
join/leave paths
Changes in v2:
- Extend dpaa2_switch_prechangeupper_sanity_checks() with
netdev_walk_all_lower_dev() so that checks are done on all lower devices
of a bridge, even for the lowers of a bridged bond.
- Manage better the default VLAN on bond join
- Clean-up the error path in dpaa2_switch_port_bond_join()
- Call dpaa2_switch_port_bridge_leave() in case a port is leaving a bond
which is also a bridged port
- Update dpaa2_switch_port_bond_leave() so that in case of any failure
the driver tries to cleanup the LAG offload configuration.
- Call switchdev_bridge_port_unoffload() in a switch port is leaving a
bridge bond device.
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 497 +++++++++++++++++-
.../ethernet/freescale/dpaa2/dpaa2-switch.h | 14 +-
2 files changed, 504 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index a0bf5b50aae5..d082ecf9b125 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -51,6 +51,17 @@ dpaa2_switch_filter_block_get_unused(struct ethsw_core *ethsw)
return NULL;
}
+static struct dpaa2_switch_lag *
+dpaa2_switch_lag_get_unused(struct ethsw_core *ethsw)
+{
+ int i;
+
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+ if (!ethsw->lags[i].in_use)
+ return ðsw->lags[i];
+ return NULL;
+}
+
static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
struct net_device *bridge_dev)
{
@@ -2180,15 +2191,30 @@ static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *net
return 0;
}
+static int dpaa2_switch_check_dpsw_instance(struct net_device *dev,
+ struct netdev_nested_priv *priv)
+{
+ struct ethsw_port_priv *port_priv = (struct ethsw_port_priv *)priv->data;
+ struct ethsw_port_priv *other_priv = netdev_priv(dev);
+
+ if (!dpaa2_switch_port_dev_check(dev))
+ return 0;
+
+ if (other_priv->ethsw_data == port_priv->ethsw_data)
+ return 0;
+
+ return 1;
+}
+
static int
dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
struct net_device *upper_dev,
struct netlink_ext_ack *extack)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
- struct ethsw_port_priv *other_port_priv;
- struct net_device *other_dev;
- struct list_head *iter;
+ struct netdev_nested_priv data = {
+ .data = (void *)port_priv,
+ };
int err;
if (!br_vlan_enabled(upper_dev)) {
@@ -2203,6 +2229,70 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
return 0;
}
+ err = netdev_walk_all_lower_dev(upper_dev,
+ dpaa2_switch_check_dpsw_instance,
+ &data);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interface from a different DPSW is in the bridge already");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dpaa2_switch_pre_lag_join(struct net_device *netdev,
+ struct net_device *upper_dev,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct ethsw_port_priv *other_port_priv;
+ struct dpaa2_switch_lag *lag = NULL;
+ struct dpsw_lag_cfg cfg = {0};
+ struct net_device *other_dev;
+ int i, num_ifs = 0, err;
+ struct list_head *iter;
+
+ if (!(ethsw->features & ETHSW_FEATURE_LAG_OFFLOAD)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG offload is supported only for DPSW >= v8.13");
+ return -EOPNOTSUPP;
+ }
+
+ if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only offload LAG using hash TX type");
+ return -EOPNOTSUPP;
+ }
+
+ if (info->hash_type != NETDEV_LAG_HASH_L23) {
+ NL_SET_ERR_MSG_MOD(extack, "Can only offload L2+L3 Tx hash");
+ return -EOPNOTSUPP;
+ }
+
+ if (!dpaa2_switch_port_has_mac(port_priv)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only switch interfaces connected to MACs can be under a LAG");
+ return -EINVAL;
+ }
+
+ if (vlan_uses_dev(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot join a LAG upper that has a VLAN");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ if (!ethsw->lags[i].in_use)
+ continue;
+ if (ethsw->lags[i].bond_dev != upper_dev)
+ continue;
+ lag = ðsw->lags[i];
+ break;
+ }
+
netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
if (!dpaa2_switch_port_dev_check(other_dev))
continue;
@@ -2210,19 +2300,275 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
other_port_priv = netdev_priv(other_dev);
if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
NL_SET_ERR_MSG_MOD(extack,
- "Interface from a different DPSW is in the bridge already");
+ "Interface from a different DPSW is in the bond already");
+ return -EINVAL;
+ }
+
+ cfg.if_id[num_ifs++] = other_port_priv->idx;
+
+ if (num_ifs >= DPSW_MAX_LAG_IFS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot add more than 8 DPAA2 switch ports under the same bond");
return -EINVAL;
}
}
+ if (lag) {
+ cfg.group_id = lag->id;
+ cfg.if_id[num_ifs++] = port_priv->idx;
+ cfg.num_ifs = num_ifs;
+ cfg.phase = DPSW_LAG_SET_PHASE_CHECK;
+
+ err = dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload LAG configuration");
+ return -EOPNOTSUPP;
+ }
+ }
+
return 0;
}
+static void dpaa2_switch_port_set_lag_group(struct ethsw_port_priv *port_priv,
+ struct net_device *bond_dev)
+{
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct ethsw_port_priv *other_port_priv = NULL;
+ struct dpaa2_switch_lag *lag = NULL;
+ struct dpaa2_switch_lag *other_lag;
+ struct net_device *other_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(bond_dev, other_dev, iter) {
+ if (!dpaa2_switch_port_dev_check(other_dev))
+ continue;
+
+ other_port_priv = netdev_priv(other_dev);
+ other_lag = rtnl_dereference(other_port_priv->lag);
+ if (!other_lag)
+ continue;
+
+ if (other_lag->bond_dev == bond_dev) {
+ rcu_assign_pointer(port_priv->lag, other_lag);
+ return;
+ }
+ }
+
+ /* This is the first interface to be added under a bond device. Find an
+ * unused LAG group. No need to check for NULL since there are the same
+ * amount of DPSW ports as LAG groups, meaning that each port can have
+ * its own LAG group.
+ */
+ lag = dpaa2_switch_lag_get_unused(ethsw);
+ lag->in_use = true;
+ lag->bond_dev = bond_dev;
+ rcu_assign_pointer(port_priv->lag, lag);
+}
+
+static int dpaa2_switch_set_lag_cfg(struct net_device *bond_dev, u8 lag_id,
+ struct ethsw_core *ethsw)
+{
+ struct dpaa2_switch_lag *lag = ðsw->lags[lag_id - 1];
+ struct ethsw_port_priv *other_port_priv = NULL;
+ struct dpaa2_switch_lag *other_lag;
+ struct dpsw_lag_cfg cfg = {0};
+ u8 num_ifs = 0;
+ int i;
+
+ cfg.group_id = lag_id;
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ other_port_priv = ethsw->ports[i];
+
+ if (!other_port_priv)
+ continue;
+ other_lag = rtnl_dereference(other_port_priv->lag);
+ if (!other_lag)
+ continue;
+ if (other_lag->bond_dev != bond_dev)
+ continue;
+
+ /* No need to check against DPSW_MAX_LAG_IFS since this
+ * was done in the prechangeupper stage. The flow will
+ * not reach this point in case there are more DPAA2
+ * switch ports under the same bond than we can accept.
+ */
+ cfg.if_id[num_ifs++] = other_port_priv->idx;
+ }
+
+ cfg.num_ifs = num_ifs;
+
+ /* No more interfaces under this LAG group, mark it as not in use */
+ if (!num_ifs) {
+ /* We wait here for a grace-period so that we make sure any
+ * readers of the lag structure finished.
+ */
+ synchronize_net();
+
+ lag->bond_dev = NULL;
+ lag->in_use = false;
+ }
+
+ return dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg);
+}
+
+static int dpaa2_switch_port_bond_join(struct net_device *netdev,
+ struct net_device *bond_dev,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
+ struct dpaa2_switch_fdb *new_fdb;
+ struct net_device *bridge_dev;
+ struct dpaa2_switch_lag *lag;
+ int err = 0;
+ u8 lag_id;
+
+ /* Delete the default VLAN, we might change our FDB in this operation */
+ err = dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID);
+ if (err)
+ return err;
+
+ /* Setup the egress flood policy (broadcast, unknown unicast) */
+ dpaa2_switch_port_set_fdb(port_priv, bond_dev);
+ err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+ if (err)
+ goto err_egress_flood;
+
+ /* Recreate the egress flood domain of the FDB that we just left. */
+ err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id);
+ if (err)
+ goto err_egress_flood;
+
+ /* Setup the port_priv->lag pointer for this switch port */
+ dpaa2_switch_port_set_lag_group(port_priv, bond_dev);
+
+ /* Create the LAG configuration and apply it in MC */
+ lag = rtnl_dereference(port_priv->lag);
+ lag_id = lag->id;
+ err = dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw);
+ if (err)
+ goto err_lag_cfg;
+
+ err = dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
+ BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_PVID,
+ false);
+ if (err)
+ goto err_vlan_add;
+
+ /* If the bond device is a switch port, join the bridge as well */
+ bridge_dev = netdev_master_upper_dev_get(bond_dev);
+ if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+ return 0;
+
+ err = dpaa2_switch_port_bridge_join(netdev, bridge_dev, extack);
+ if (err)
+ goto err_bridge_join;
+
+ return err;
+
+err_bridge_join:
+ dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID);
+err_vlan_add:
+err_lag_cfg:
+ rcu_assign_pointer(port_priv->lag, NULL);
+ dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw);
+err_egress_flood:
+ if (port_priv->fdb != old_fdb) {
+ new_fdb = port_priv->fdb;
+
+ /* Explicitly move the port back into the old private FDB */
+ old_fdb->in_use = true;
+ old_fdb->bridge_dev = NULL;
+ port_priv->fdb = old_fdb;
+
+ /* Reprogram the bond FDB to no longer include this port */
+ dpaa2_switch_fdb_set_egress_flood(ethsw, new_fdb->fdb_id);
+ } else {
+ /* Same FDB means that this was the first port joining the
+ * bond. Keep using the same FDB.
+ */
+ old_fdb->bridge_dev = NULL;
+ }
+ /* Make sure that the egress flood of the FDB that we reverted to is as
+ * it should be now, for a standalone port.
+ */
+ dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+
+ dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
+ BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_PVID,
+ false);
+ return err;
+}
+
+static int dpaa2_switch_port_bond_leave(struct net_device *netdev,
+ struct net_device *bond_dev)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct dpaa2_switch_lag *lag = rtnl_dereference(port_priv->lag);
+ struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct net_device *bridge_dev;
+ int err = 0;
+
+ /* In case the bond is a bridge port, leave the upper bridge as well */
+ bridge_dev = netdev_master_upper_dev_get(bond_dev);
+ if (bridge_dev && netif_is_bridge_master(bridge_dev)) {
+ err = dpaa2_switch_port_bridge_leave(netdev);
+ if (err)
+ goto lag_cleanup;
+ }
+
+ /* Delete the default VLAN, we might change our FDB in this operation */
+ err = dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID);
+ if (err)
+ goto lag_cleanup;
+
+ /* Setup the FDB for this port which is now standalone */
+ dpaa2_switch_port_set_fdb(port_priv, NULL);
+
+ /* Setup the egress flood policy (broadcast, unknown unicast).
+ * When the port is not under a bond, only the CTRL interface is part
+ * of the flooding domain besides the actual port.
+ */
+ err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+ if (err)
+ goto lag_cleanup;
+
+ /* Recreate the egress flood domain of the FDB that we just left. */
+ err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id);
+ if (err)
+ goto lag_cleanup;
+
+ /* Add the VLAN 1 as PVID when not under a bond. We need this since
+ * the dpaa2 switch interfaces are not capable to be VLAN unaware
+ */
+ err = dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
+ BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_PVID,
+ false);
+ if (err)
+ goto lag_cleanup;
+
+lag_cleanup:
+ /* Recreate the LAG configuration for the LAG group that we left. In
+ * case any step failed, at least we free up a LAG resource.
+ */
+ rcu_assign_pointer(port_priv->lag, NULL);
+ dpaa2_switch_set_lag_cfg(bond_dev, lag->id, ethsw);
+
+ return err;
+}
+
static int dpaa2_switch_port_prechangeupper(struct net_device *netdev,
struct netdev_notifier_changeupper_info *info)
{
+ struct net_device *upper_dev, *br;
struct netlink_ext_ack *extack;
- struct net_device *upper_dev;
int err;
if (!dpaa2_switch_port_dev_check(netdev))
@@ -2239,6 +2585,24 @@ static int dpaa2_switch_port_prechangeupper(struct net_device *netdev,
if (!info->linking)
dpaa2_switch_port_pre_bridge_leave(netdev);
+ } else if (netif_is_lag_master(upper_dev)) {
+ if (!info->linking) {
+ if (netif_is_bridge_port(upper_dev))
+ dpaa2_switch_port_pre_bridge_leave(netdev);
+ return 0;
+ }
+
+ if (netif_is_bridge_port(upper_dev)) {
+ br = netdev_master_upper_dev_get(upper_dev);
+ err = dpaa2_switch_prechangeupper_sanity_checks(netdev,
+ br,
+ extack);
+ if (err)
+ return err;
+ }
+
+ return dpaa2_switch_pre_lag_join(netdev, upper_dev,
+ info->upper_info, extack);
}
return 0;
@@ -2263,6 +2627,80 @@ static int dpaa2_switch_port_changeupper(struct net_device *netdev,
extack);
else
return dpaa2_switch_port_bridge_leave(netdev);
+ } else if (netif_is_lag_master(upper_dev)) {
+ if (info->linking)
+ return dpaa2_switch_port_bond_join(netdev, upper_dev,
+ info->upper_info,
+ extack);
+ else
+ return dpaa2_switch_port_bond_leave(netdev, upper_dev);
+ }
+
+ return 0;
+}
+
+static int
+dpaa2_switch_lag_prechangeupper(struct net_device *netdev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = 0;
+
+ if (!netif_is_lag_master(netdev))
+ return 0;
+
+ netdev_for_each_lower_dev(netdev, lower, iter) {
+ if (!dpaa2_switch_port_dev_check(lower))
+ continue;
+
+ err = dpaa2_switch_port_prechangeupper(lower, info);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static int
+dpaa2_switch_lag_changeupper(struct net_device *netdev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = 0;
+
+ if (!netif_is_lag_master(netdev))
+ return 0;
+
+ netdev_for_each_lower_dev(netdev, lower, iter) {
+ if (!dpaa2_switch_port_dev_check(lower))
+ continue;
+
+ err = dpaa2_switch_port_changeupper(lower, info);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+dpaa2_switch_port_changelowerstate(struct net_device *netdev,
+ struct netdev_lag_lower_state_info *linfo)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ int err;
+
+ if (!rtnl_dereference(port_priv->lag))
+ return 0;
+
+ err = dpsw_if_set_lag_state(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ port_priv->idx, linfo->tx_enabled ? 1 : 0);
+ if (err) {
+ netdev_err(netdev, "dpsw_if_set_lag_state() = %d\n", err);
+ return err;
}
return 0;
@@ -2272,6 +2710,7 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changelowerstate_info *info;
int err = 0;
switch (event) {
@@ -2280,13 +2719,29 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
if (err)
return notifier_from_errno(err);
+ err = dpaa2_switch_lag_prechangeupper(netdev, ptr);
+ if (err)
+ return notifier_from_errno(err);
+
break;
case NETDEV_CHANGEUPPER:
err = dpaa2_switch_port_changeupper(netdev, ptr);
if (err)
return notifier_from_errno(err);
+ err = dpaa2_switch_lag_changeupper(netdev, ptr);
+ if (err)
+ return notifier_from_errno(err);
+
break;
+ case NETDEV_CHANGELOWERSTATE:
+ info = ptr;
+ if (!dpaa2_switch_port_dev_check(netdev))
+ break;
+
+ err = dpaa2_switch_port_changelowerstate(netdev,
+ info->lower_state_info);
+ return notifier_from_errno(err);
}
return NOTIFY_DONE;
@@ -2470,6 +2925,7 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
dma_addr_t addr = dpaa2_fd_get_addr(fd);
struct ethsw_core *ethsw = fq->ethsw;
struct ethsw_port_priv *port_priv;
+ struct dpaa2_switch_lag *lag;
struct net_device *netdev;
struct vlan_ethhdr *hdr;
struct sk_buff *skb;
@@ -2521,11 +2977,20 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
}
}
+ rcu_read_lock();
+
+ lag = rcu_dereference(port_priv->lag);
+
skb->dev = netdev;
skb->protocol = eth_type_trans(skb, skb->dev);
- /* Setup the offload_fwd_mark only if the port is under a bridge */
+ /* Setup the offload_fwd_mark only if the port is under a bridge
+ * or under a bond device that is offloaded.
+ */
skb->offload_fwd_mark = !!(port_priv->fdb->bridge_dev);
+ skb->offload_fwd_mark |= !!(lag);
+
+ rcu_read_unlock();
netif_receive_skb(skb);
@@ -2541,6 +3006,9 @@ static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6))
ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
+
+ if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 13))
+ ethsw->features |= ETHSW_FEATURE_LAG_OFFLOAD;
}
static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
@@ -3330,6 +3798,7 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
kfree(ethsw->fdbs);
kfree(ethsw->filter_blocks);
kfree(ethsw->ports);
+ kfree(ethsw->lags);
dpaa2_switch_teardown(sw_dev);
@@ -3357,6 +3826,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
port_priv = netdev_priv(port_netdev);
port_priv->netdev = port_netdev;
port_priv->ethsw_data = ethsw;
+ rcu_assign_pointer(port_priv->lag, NULL);
mutex_init(&port_priv->mac_lock);
@@ -3464,6 +3934,19 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
goto err_free_fdbs;
}
+ ethsw->lags = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->lags),
+ GFP_KERNEL);
+ if (!ethsw->lags) {
+ err = -ENOMEM;
+ goto err_free_filter;
+ }
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ ethsw->lags[i].bond_dev = NULL;
+ ethsw->lags[i].ethsw = ethsw;
+ ethsw->lags[i].id = i + 1;
+ ethsw->lags[i].in_use = 0;
+ }
+
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
err = dpaa2_switch_probe_port(ethsw, i);
if (err)
@@ -3510,6 +3993,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
err_free_netdev:
for (i--; i >= 0; i--)
dpaa2_switch_remove_port(ethsw, i);
+ kfree(ethsw->lags);
+err_free_filter:
kfree(ethsw->filter_blocks);
err_free_fdbs:
kfree(ethsw->fdbs);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 42b3ca73f55d..07301885763c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -41,7 +41,8 @@
#define ETHSW_MAX_FRAME_LENGTH (DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
#define ETHSW_L2_MAX_FRM(mtu) ((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
-#define ETHSW_FEATURE_MAC_ADDR BIT(0)
+#define ETHSW_FEATURE_MAC_ADDR BIT(0)
+#define ETHSW_FEATURE_LAG_OFFLOAD BIT(1)
/* Number of receive queues (one RX and one TX_CONF) */
#define DPAA2_SWITCH_RX_NUM_FQS 2
@@ -105,6 +106,13 @@ struct dpaa2_switch_fdb {
bool in_use;
};
+struct dpaa2_switch_lag {
+ struct ethsw_core *ethsw;
+ struct net_device *bond_dev;
+ bool in_use;
+ u8 id;
+};
+
struct dpaa2_switch_acl_entry {
struct list_head list;
u16 prio;
@@ -163,6 +171,8 @@ struct ethsw_port_priv {
struct dpaa2_mac *mac;
/* Protects against changes to port_priv->mac */
struct mutex mac_lock;
+
+ struct dpaa2_switch_lag __rcu *lag;
};
/* Switch data */
@@ -190,6 +200,8 @@ struct ethsw_core {
struct dpaa2_switch_fdb *fdbs;
struct dpaa2_switch_filter_block *filter_blocks;
u16 mirror_port;
+
+ struct dpaa2_switch_lag *lags;
};
static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw,
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH net-next v3 02/12] dpaa2-switch: add support for LAG offload
2026-06-03 14:36 ` [PATCH net-next v3 02/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
@ 2026-06-06 2:10 ` Jakub Kicinski
0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2026-06-06 2:10 UTC (permalink / raw)
To: ioana.ciornei
Cc: Jakub Kicinski, andrew+netdev, davem, edumazet, pabeni, netdev,
linux-kernel
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
dpaa2-switch: add support for LAG offload
This patch adds the bulk of the changes needed in order to support
offloading of an upper bond device.
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> index a0bf5b50aae5f..d082ecf9b1252 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
[ ... ]
> @@ -2210,19 +2300,275 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
> +static int dpaa2_switch_port_bond_join(struct net_device *netdev,
> + struct net_device *bond_dev,
> + struct netdev_lag_upper_info *info,
> + struct netlink_ext_ack *extack)
> +{
[ ... ]
> + /* If the bond device is a switch port, join the bridge as well */
> + bridge_dev = netdev_master_upper_dev_get(bond_dev);
> + if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
> + return 0;
> +
> + err = dpaa2_switch_port_bridge_join(netdev, bridge_dev, extack);
[Severity: High]
Should switchdev_bridge_port_offload() inside dpaa2_switch_port_bridge_join()
receive the actual bridge port (the bond device) rather than the lower
physical port?
For ports enslaved to a LAG, passing the lower physical port might cause
switchdev to immediately abort the offload with -EINVAL, since the netdev
is not directly enslaved to the bridge. I see this was addressed in a later
commit adding a dpaa2_switch_port_to_bridge_port() helper.
[Severity: High]
When a port joins a LAG that is already enslaved to a bridge, is there a risk
of dpaa2_switch_port_set_fdb() corrupting the in_use flag of active FDBs?
Since the port first acquires the bond's shared FDB, and then
dpaa2_switch_port_bridge_join() finds a port already using that same FDB,
the code in dpaa2_switch_port_set_fdb() seems to lack a self-assignment
check. This could mark the shared FDB as unused before reassigning it,
allowing another standalone port to reallocate it. I note this was fixed in
a later commit covering bond scenarios.
[Severity: High]
Will dpaa2_switch_port_set_fdb() skip bonds when iterating through lower
devices, breaking cross-LAG forwarding?
When a port joins a bridge, dpaa2_switch_port_set_fdb() uses a non-recursive
iterator (netdev_for_each_lower_dev). If the other lower device is a bond,
dpaa2_switch_port_dev_check() will return false, skipping nested ports and
failing to share the hardware FDB. I note this was also fixed in a later
commit.
> + if (err)
> + goto err_bridge_join;
> +
> + return err;
> +
> +err_bridge_join:
> + dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID);
> +err_vlan_add:
> +err_lag_cfg:
> + rcu_assign_pointer(port_priv->lag, NULL);
> + dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw);
> +err_egress_flood:
> + if (port_priv->fdb != old_fdb) {
> + new_fdb = port_priv->fdb;
> +
> + /* Explicitly move the port back into the old private FDB */
> + old_fdb->in_use = true;
> + old_fdb->bridge_dev = NULL;
> + port_priv->fdb = old_fdb;
> +
> + /* Reprogram the bond FDB to no longer include this port */
> + dpaa2_switch_fdb_set_egress_flood(ethsw, new_fdb->fdb_id);
[Severity: High]
Are we permanently leaking a hardware FDB resource in this error path?
If dpaa2_switch_port_bridge_join() fails, it will have allocated a new
intermediate FDB and marked it in_use. The cleanup block here restores
the original old_fdb, but does not appear to set new_fdb->in_use back to
false. Repeated failures might exhaust the fixed number of FDBs.
> + } else {
> + /* Same FDB means that this was the first port joining the
> + * bond. Keep using the same FDB.
> + */
> + old_fdb->bridge_dev = NULL;
> + }
[ ... ]
> +static int dpaa2_switch_port_bond_leave(struct net_device *netdev,
> + struct net_device *bond_dev)
> +{
[ ... ]
> +lag_cleanup:
> + /* Recreate the LAG configuration for the LAG group that we left. In
> + * case any step failed, at least we free up a LAG resource.
> + */
> + rcu_assign_pointer(port_priv->lag, NULL);
> + dpaa2_switch_set_lag_cfg(bond_dev, lag->id, ethsw);
[Severity: Critical]
Could this trigger a kernel panic if lag is NULL on the rollback path?
If dpaa2_switch_port_bond_join() fails and sets port_priv->lag to NULL, a
subsequent NETDEV_CHANGEUPPER rollback calls this function. The lag variable
is initialized via rtnl_dereference(port_priv->lag), which would be NULL,
leading to a dereference when accessing lag->id here.
> +
> + return err;
> +}
> +
> static int dpaa2_switch_port_prechangeupper(struct net_device *netdev,
> struct netdev_notifier_changeupper_info *info)
> {
> + struct net_device *upper_dev, *br;
> struct netlink_ext_ack *extack;
> - struct net_device *upper_dev;
> int err;
>
> if (!dpaa2_switch_port_dev_check(netdev))
> @@ -2239,6 +2585,24 @@ static int dpaa2_switch_port_prechangeupper(struct net_device *netdev,
>
> if (!info->linking)
> dpaa2_switch_port_pre_bridge_leave(netdev);
> + } else if (netif_is_lag_master(upper_dev)) {
> + if (!info->linking) {
> + if (netif_is_bridge_port(upper_dev))
> + dpaa2_switch_port_pre_bridge_leave(netdev);
> + return 0;
> + }
> +
> + if (netif_is_bridge_port(upper_dev)) {
> + br = netdev_master_upper_dev_get(upper_dev);
> + err = dpaa2_switch_prechangeupper_sanity_checks(netdev,
> + br,
> + extack);
> + if (err)
> + return err;
> + }
> +
> + return dpaa2_switch_pre_lag_join(netdev, upper_dev,
> + info->upper_info, extack);
> }
>
> return 0;
[Severity: High]
Might this bypass topological checks and allow VLAN uppers on offloaded LAGs?
If a port joins a LAG first, and then a VLAN is added to the LAG, the
event propagates here with the VLAN device as upper_dev. Since it is
neither a bridge nor a LAG master, this function returns 0 without checking
vlan_uses_dev(), inadvertently allowing an unsupported hardware configuration.
> @@ -2263,6 +2627,80 @@ static int dpaa2_switch_port_changeupper(struct net_device *netdev,
> extack);
> else
> return dpaa2_switch_port_bridge_leave(netdev);
[Severity: High]
Does calling dpaa2_switch_port_bridge_leave() wrongly split LAG member ports
into isolated private FDBs?
When a LAG leaves a bridge, this is invoked for each member port. It will
unconditionally call dpaa2_switch_port_set_fdb(port_priv, NULL), which forces
the port to drop the shared FDB and isolates it from the other LAG members.
I note this was also fixed in a later commit covering bond scenarios.
> + } else if (netif_is_lag_master(upper_dev)) {
> + if (info->linking)
> + return dpaa2_switch_port_bond_join(netdev, upper_dev,
> + info->upper_info,
> + extack);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH net-next v3 03/12] dpaa2-switch: change dpaa2_switch_port_set_fdb() function prototype
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 01/12] dpaa2-switch: add LAG configuration API Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 02/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 04/12] dpaa2-switch: extend dpaa2_switch_port_set_fdb() to cover bond scenarios Ioana Ciornei
` (8 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
Since there dpaa2_switch_port_set_fdb() never fails and its return value
was never checked, change its prototype to return void.
Also, instead of determining if the DPAA2 port is joining or leaving an
upper based on the value of the 'bridge_dev' parameter, add the
'linking' parameter to explicitly specify the action. This will enable
us to pass the upper device that we are joining/leaving in all possible
cases. This will get used in the next patches to determine what kind of
device the upper is: a bridge or a bond.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- none
Changes in v2:
- none
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 33 +++++++++----------
1 file changed, 15 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index d082ecf9b125..8026a5014105 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -62,8 +62,9 @@ dpaa2_switch_lag_get_unused(struct ethsw_core *ethsw)
return NULL;
}
-static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
- struct net_device *bridge_dev)
+static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
+ struct net_device *upper_dev,
+ bool linking)
{
struct ethsw_core *ethsw = port_priv->ethsw_data;
struct ethsw_port_priv *other_port_priv = NULL;
@@ -73,10 +74,8 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
struct list_head *iter;
int i;
- /* If we leave a bridge (bridge_dev is NULL), find an unused
- * FDB and use that.
- */
- if (!bridge_dev) {
+ /* If we leave a bridge, find an unused FDB and use that. */
+ if (!linking) {
/* First verify if this is the last port to leave this bridge */
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
if (!ethsw->ports[i] || ethsw->ports[i] == port_priv)
@@ -90,7 +89,7 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
/* If this is the last user of the FDB, just keep using it. */
if (last_fdb_user) {
port_priv->fdb->bridge_dev = NULL;
- return 0;
+ return;
}
/* Since we are not the last port which leaves a bridge,
@@ -101,12 +100,12 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
*/
fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
if (WARN_ON(!fdb))
- return 0;
+ return;
port_priv->fdb = fdb;
port_priv->fdb->in_use = true;
port_priv->fdb->bridge_dev = NULL;
- return 0;
+ return;
}
/* The below call to netdev_for_each_lower_dev() demands the RTNL lock
@@ -118,7 +117,7 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
/* If part of a bridge, use the FDB of the first dpaa2 switch interface
* to be present in that bridge
*/
- netdev_for_each_lower_dev(bridge_dev, other_dev, iter) {
+ netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
if (!dpaa2_switch_port_dev_check(other_dev))
continue;
@@ -144,9 +143,7 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
}
/* Keep track of the new upper bridge device */
- port_priv->fdb->bridge_dev = bridge_dev;
-
- return 0;
+ port_priv->fdb->bridge_dev = upper_dev;
}
static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id,
@@ -2062,7 +2059,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
if (err)
return err;
- dpaa2_switch_port_set_fdb(port_priv, upper_dev);
+ dpaa2_switch_port_set_fdb(port_priv, upper_dev, true);
/* Inherit the initial bridge port learning state */
learn_ena = br_port_flag_is_set(netdev, BR_LEARNING);
@@ -2088,7 +2085,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
err_switchdev_offload:
err_egress_flood:
- dpaa2_switch_port_set_fdb(port_priv, NULL);
+ dpaa2_switch_port_set_fdb(port_priv, upper_dev, false);
return err;
}
@@ -2135,7 +2132,7 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
if (err)
netdev_err(netdev, "Unable to clear RX VLANs from old FDB table, err (%d)\n", err);
- dpaa2_switch_port_set_fdb(port_priv, NULL);
+ dpaa2_switch_port_set_fdb(port_priv, port_priv->fdb->bridge_dev, false);
/* Restore all RX VLANs into the new FDB table that we just joined */
err = vlan_for_each(netdev, dpaa2_switch_port_restore_rxvlan, netdev);
@@ -2432,7 +2429,7 @@ static int dpaa2_switch_port_bond_join(struct net_device *netdev,
return err;
/* Setup the egress flood policy (broadcast, unknown unicast) */
- dpaa2_switch_port_set_fdb(port_priv, bond_dev);
+ dpaa2_switch_port_set_fdb(port_priv, bond_dev, true);
err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
if (err)
goto err_egress_flood;
@@ -2529,7 +2526,7 @@ static int dpaa2_switch_port_bond_leave(struct net_device *netdev,
goto lag_cleanup;
/* Setup the FDB for this port which is now standalone */
- dpaa2_switch_port_set_fdb(port_priv, NULL);
+ dpaa2_switch_port_set_fdb(port_priv, bond_dev, false);
/* Setup the egress flood policy (broadcast, unknown unicast).
* When the port is not under a bond, only the CTRL interface is part
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 04/12] dpaa2-switch: extend dpaa2_switch_port_set_fdb() to cover bond scenarios
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (2 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 03/12] dpaa2-switch: change dpaa2_switch_port_set_fdb() function prototype Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-06 2:10 ` Jakub Kicinski
2026-06-03 14:36 ` [PATCH net-next v3 05/12] dpaa2-switch: add dpaa2_switch_port_to_bridge_port() helper Ioana Ciornei
` (7 subsequent siblings)
11 siblings, 1 reply; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
The dpaa2_switch_port_set_fdb() function is responsible with determining
what FDB should be used by a port as a consequence of changing its upper
device. This patch extends the function to also cover the circumstances
in which a DPAA2 switch port offloads a bond device.
This will allow us, for example, to setup the same FDB on all DPAA2
switch ports which are under the same bridge, even though not directly
but rather through an upper bond device which is bridged. How the
function does this is by first determining a DPAA2 port is already under
the same bridge and if so, choosing its FDB. To cover the entire
hierarchy in depth, we add an extra walk through all the lowers of a
bridged bond device.
When leaving an upper device, the DPAA2 switch port must find a new FDB
to use. If before it just searched for an unused FDB to go along with
its new standalone status, now it first checks if the port is still part
of a LAG and then uses the FDB of any port that already left the same
bridge.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- none
Changes in v2:
- none
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 121 +++++++++++++-----
1 file changed, 91 insertions(+), 30 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 8026a5014105..b851376b8e1b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -68,15 +68,45 @@ static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
{
struct ethsw_core *ethsw = port_priv->ethsw_data;
struct ethsw_port_priv *other_port_priv = NULL;
- struct dpaa2_switch_fdb *fdb;
- struct net_device *other_dev;
+ struct net_device *other_dev, *other_dev2;
+ u16 fdb_id_old = port_priv->fdb->fdb_id;
+ struct dpaa2_switch_fdb *fdb = NULL;
+ struct list_head *iter, *iter2;
bool last_fdb_user = true;
- struct list_head *iter;
int i;
- /* If we leave a bridge, find an unused FDB and use that. */
+ /* If we leave a an upper device, be it a bond or a bridge, find an
+ * unused FDB and use that.
+ */
if (!linking) {
- /* First verify if this is the last port to leave this bridge */
+ /* This port leaves a bridge, but it's still under a bond.
+ * Search for the first port under the same bond which already
+ * left the bridge.
+ */
+ if (netif_is_bridge_master(upper_dev) &&
+ rtnl_dereference(port_priv->lag)) {
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ other_port_priv = ethsw->ports[i];
+ if (!other_port_priv)
+ continue;
+
+ if (other_port_priv == port_priv)
+ continue;
+
+ /* Found a port which is under the same bond
+ * device but already left the bridge. Use
+ * this port's FDB.
+ */
+ if (rtnl_dereference(other_port_priv->lag) ==
+ rtnl_dereference(port_priv->lag) &&
+ other_port_priv->fdb->fdb_id != fdb_id_old) {
+ fdb = other_port_priv->fdb;
+ break;
+ }
+ }
+ }
+
+ /* Verify if any other port references our FDB */
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
if (!ethsw->ports[i] || ethsw->ports[i] == port_priv)
continue;
@@ -86,25 +116,32 @@ static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
}
}
- /* If this is the last user of the FDB, just keep using it. */
- if (last_fdb_user) {
- port_priv->fdb->bridge_dev = NULL;
- return;
+ if (fdb) {
+ /* Switching to the FDB of another port which is under
+ * the same bond device. Release the previous FDB in
+ * case we were the last to use it.
+ */
+ if (last_fdb_user) {
+ port_priv->fdb->in_use = false;
+ port_priv->fdb->bridge_dev = NULL;
+ }
+ port_priv->fdb = fdb;
+ } else if (last_fdb_user) {
+ /* No other bond lowers to share the FDB with and we
+ * are its last user, just keep it.
+ */
+ } else {
+ fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
+ if (WARN_ON(!fdb))
+ return;
+
+ port_priv->fdb = fdb;
+ port_priv->fdb->in_use = true;
}
- /* Since we are not the last port which leaves a bridge,
- * acquire a new FDB and use it. The number of FDBs is sized to
- * accommodate all switch ports as standalone, each with its
- * private FDB, which means that dpaa2_switch_fdb_get_unused()
- * must succeed here. WARN if not.
- */
- fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
- if (WARN_ON(!fdb))
- return;
+ if (netif_is_bridge_master(upper_dev))
+ port_priv->fdb->bridge_dev = NULL;
- port_priv->fdb = fdb;
- port_priv->fdb->in_use = true;
- port_priv->fdb->bridge_dev = NULL;
return;
}
@@ -114,18 +151,41 @@ static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
*/
ASSERT_RTNL();
- /* If part of a bridge, use the FDB of the first dpaa2 switch interface
- * to be present in that bridge
+ /* In case we are joining an upper device, be it a bridge device or a
+ * bond device, we will use the FDB of the first DPAA2 switch interface
+ * that is already present under the same upper device. For this to
+ * happen we have to extend our search so that we can find any DPAA2
+ * interface that is a lower of a bond bridged port
*/
+ other_port_priv = NULL;
netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
- if (!dpaa2_switch_port_dev_check(other_dev))
- continue;
+ if (netif_is_lag_master(other_dev)) {
+ /* Search through all the lowers of the bridged lag */
+ netdev_for_each_lower_dev(other_dev, other_dev2, iter2) {
+ if (!dpaa2_switch_port_dev_check(other_dev2))
+ continue;
+ if (other_dev2 == port_priv->netdev)
+ continue;
+
+ /* Skip the port if it's the same upper */
+ other_port_priv = netdev_priv(other_dev2);
+ if (rtnl_dereference(other_port_priv->lag) ==
+ rtnl_dereference(port_priv->lag)) {
+ other_port_priv = NULL;
+ continue;
+ }
+ break;
+ }
- if (other_dev == port_priv->netdev)
- continue;
+ if (other_port_priv)
+ break;
+ } else if (dpaa2_switch_port_dev_check(other_dev)) {
+ if (other_dev == port_priv->netdev)
+ continue;
- other_port_priv = netdev_priv(other_dev);
- break;
+ other_port_priv = netdev_priv(other_dev);
+ break;
+ }
}
/* The current port is about to change its FDB to the one used by the
@@ -143,7 +203,8 @@ static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
}
/* Keep track of the new upper bridge device */
- port_priv->fdb->bridge_dev = upper_dev;
+ if (netif_is_bridge_master(upper_dev))
+ port_priv->fdb->bridge_dev = upper_dev;
}
static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id,
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH net-next v3 04/12] dpaa2-switch: extend dpaa2_switch_port_set_fdb() to cover bond scenarios
2026-06-03 14:36 ` [PATCH net-next v3 04/12] dpaa2-switch: extend dpaa2_switch_port_set_fdb() to cover bond scenarios Ioana Ciornei
@ 2026-06-06 2:10 ` Jakub Kicinski
0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2026-06-06 2:10 UTC (permalink / raw)
To: ioana.ciornei
Cc: Jakub Kicinski, andrew+netdev, davem, edumazet, pabeni, netdev,
linux-kernel
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
dpaa2-switch: extend dpaa2_switch_port_set_fdb() to cover bond scenarios
The dpaa2_switch_port_set_fdb() function is responsible with determining
what FDB should be used by a port as a consequence of changing its upper
device. This patch extends the function to also cover the circumstances
in which a DPAA2 switch port offloads a bond device.
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> index 8026a50141055..b851376b8e1b8 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> @@ -68,15 +68,45 @@ static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
[ ... ]
> - /* Since we are not the last port which leaves a bridge,
> - * acquire a new FDB and use it. The number of FDBs is sized to
> - * accommodate all switch ports as standalone, each with its
> - * private FDB, which means that dpaa2_switch_fdb_get_unused()
> - * must succeed here. WARN if not.
> - */
> - fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
> - if (WARN_ON(!fdb))
> - return;
> + if (fdb) {
> + /* Switching to the FDB of another port which is under
> + * the same bond device. Release the previous FDB in
> + * case we were the last to use it.
> + */
> + if (last_fdb_user) {
> + port_priv->fdb->in_use = false;
> + port_priv->fdb->bridge_dev = NULL;
> + }
> + port_priv->fdb = fdb;
> + } else if (last_fdb_user) {
> + /* No other bond lowers to share the FDB with and we
> + * are its last user, just keep it.
> + */
> + } else {
> + fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
> + if (WARN_ON(!fdb))
> + return;
>
> - port_priv->fdb = fdb;
> - port_priv->fdb->in_use = true;
> - port_priv->fdb->bridge_dev = NULL;
> + port_priv->fdb = fdb;
> + port_priv->fdb->in_use = true;
> }
[Severity: High]
Does this code leak a hardware FDB structure on the error recovery path of
dpaa2_switch_port_bond_join()?
If dpaa2_switch_port_bridge_join() fails inside dpaa2_switch_port_bond_join(),
the error recovery calls dpaa2_switch_port_set_fdb(port_priv, upper_dev, false).
Since the port is not the last user of the shared bridge FDB, this dynamically
allocates a new, unused FDB via dpaa2_switch_fdb_get_unused() and marks it
in_use = true.
Control then returns to dpaa2_switch_port_bond_join(), which continues its own
error recovery and explicitly reverts port_priv->fdb back to its original
pre-bond private FDB (old_fdb):
if (port_priv->fdb != old_fdb) {
new_fdb = port_priv->fdb;
old_fdb->in_use = true;
old_fdb->bridge_dev = NULL;
port_priv->fdb = old_fdb;
...
}
However, the newly allocated FDB (new_fdb) from the bridge_join failure
path is ignored and left marked as in_use = true. Because this FDB is no
longer referenced by any port, it seems to be permanently leaked.
Could this deplete the highly limited pool of hardware FDB slots, eventually
preventing any new standalone ports from functioning?
[ ... ]
> /* The current port is about to change its FDB to the one used by the
> @@ -143,7 +203,8 @@ static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
> }
>
> /* Keep track of the new upper bridge device */
> - port_priv->fdb->bridge_dev = upper_dev;
> + if (netif_is_bridge_master(upper_dev))
> + port_priv->fdb->bridge_dev = upper_dev;
> }
[Severity: Critical]
Does this prematurely mark actively shared FDBs as unused?
When a port joins a bond that is already in a bridge alongside another
standalone port:
1. dpaa2_switch_port_bond_join() first gives the port the bond's shared FDB
(which is the bridge FDB).
2. Then it calls dpaa2_switch_port_bridge_join(), which calls
dpaa2_switch_port_set_fdb(..., true).
3. This searches the bridge's lowers and finds the standalone port, skipping
the bond's ports.
Because both the bond and the standalone port are in the same bridge, they
share the exact same FDB.
However, the existing code just above this diff:
if (other_port_priv) {
port_priv->fdb->in_use = false;
port_priv->fdb->bridge_dev = NULL;
port_priv->fdb = other_port_priv->fdb;
}
executes without checking if port_priv->fdb == other_port_priv->fdb. Since
port_priv->fdb was already the bridge's FDB, this unconditionally marks the
actively shared bridge FDB as unused.
Once the RTNL lock is released, could subsequent calls to
dpaa2_switch_fdb_get_unused() by unrelated ports allocate this actively used
bridge FDB, causing L2 isolation breakdown and silent data corruption across
the entire bridge?
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH net-next v3 05/12] dpaa2-switch: add dpaa2_switch_port_to_bridge_port() helper
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (3 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 04/12] dpaa2-switch: extend dpaa2_switch_port_set_fdb() to cover bond scenarios Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 06/12] dpaa2-switch: create a separate dpaa2_switch_port_fdb_event() function Ioana Ciornei
` (6 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
With the addition of offloading support for upper bond devices we have
to let the switchdev framework know if a specific bridge port is
offloaded or not, even if that port is bond device.
For this to happen, create the dpaa2_switch_port_to_bridge_port function
which will determine the bridge port corresponding to a particulat DPAA2
switch interface and use it in the switchdev_bridge_port_offload call.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Access lag field through rtnl_dereference() so that we adapt to the
__rcu change.
- Check that the brport is non-NULL before calling
switchdev_bridge_port_unoffload() on it.
Changes in v2:
- none
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 31 +++++++++++++++++--
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index b851376b8e1b..87cb8270f30e 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2105,6 +2105,21 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
return notifier_from_errno(err);
}
+static struct net_device *
+dpaa2_switch_port_to_bridge_port(struct ethsw_port_priv *port_priv)
+{
+ struct dpaa2_switch_lag *lag;
+
+ if (!port_priv->fdb->bridge_dev)
+ return NULL;
+
+ lag = rtnl_dereference(port_priv->lag);
+ if (lag)
+ return lag->bond_dev;
+
+ return port_priv->netdev;
+}
+
static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
struct net_device *upper_dev,
struct netlink_ext_ack *extack)
@@ -2112,6 +2127,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct net_device *brport_dev;
bool learn_ena;
int err;
@@ -2123,7 +2139,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
dpaa2_switch_port_set_fdb(port_priv, upper_dev, true);
/* Inherit the initial bridge port learning state */
- learn_ena = br_port_flag_is_set(netdev, BR_LEARNING);
+ brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
+ learn_ena = br_port_flag_is_set(brport_dev, BR_LEARNING);
err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
port_priv->learn_ena = learn_ena;
@@ -2137,7 +2154,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
if (err)
goto err_egress_flood;
- err = switchdev_bridge_port_offload(netdev, netdev, NULL,
+ brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
+ err = switchdev_bridge_port_offload(brport_dev, netdev, NULL,
NULL, NULL, false, extack);
if (err)
goto err_switchdev_offload;
@@ -2172,7 +2190,14 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo
static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
{
- switchdev_bridge_port_unoffload(netdev, NULL, NULL, NULL);
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct net_device *brport_dev;
+
+ brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
+ if (!brport_dev)
+ return;
+
+ switchdev_bridge_port_unoffload(brport_dev, NULL, NULL, NULL);
}
static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 06/12] dpaa2-switch: create a separate dpaa2_switch_port_fdb_event() function
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (4 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 05/12] dpaa2-switch: add dpaa2_switch_port_to_bridge_port() helper Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 07/12] dpaa2-switch: check early if an FDB entry should be added Ioana Ciornei
` (5 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
Create a separate dpaa2_switch_port_fdb_event() function that will only
handle the FDB related events. With this change, the
dpaa2_switch_port_event() notifier handler can be written in a way that
it's easier to follow.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Get hold on port_priv->ethsw_data only after we know the device is a
dpaa2-switch one
Changes in v2:
- none
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 28 ++++++++++++++-----
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 87cb8270f30e..8e109b54aec5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2880,21 +2880,18 @@ static void dpaa2_switch_event_work(struct work_struct *work)
dev_put(dev);
}
-/* Called under rcu_read_lock() */
-static int dpaa2_switch_port_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct ethsw_port_priv *port_priv = netdev_priv(dev);
struct ethsw_switchdev_event_work *switchdev_work;
struct switchdev_notifier_fdb_info *fdb_info = ptr;
- struct ethsw_core *ethsw = port_priv->ethsw_data;
-
- if (event == SWITCHDEV_PORT_ATTR_SET)
- return dpaa2_switch_port_attr_set_event(dev, ptr);
+ struct ethsw_core *ethsw;
if (!dpaa2_switch_port_dev_check(dev))
return NOTIFY_DONE;
+ ethsw = port_priv->ethsw_data;
switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC);
if (!switchdev_work)
@@ -2933,6 +2930,23 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
return NOTIFY_BAD;
}
+/* Called under rcu_read_lock() */
+static int dpaa2_switch_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ return dpaa2_switch_port_attr_set_event(dev, ptr);
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ return dpaa2_switch_port_fdb_event(nb, event, ptr);
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int dpaa2_switch_port_obj_event(unsigned long event,
struct net_device *netdev,
struct switchdev_notifier_port_obj_info *port_obj_info)
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 07/12] dpaa2-switch: check early if an FDB entry should be added
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (5 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 06/12] dpaa2-switch: create a separate dpaa2_switch_port_fdb_event() function Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 08/12] dpaa2-switch: consolidate unicast and multicast management Ioana Ciornei
` (4 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
Instead of waiting until the last moment to check if an FDB entry should
be added to HW, move the check earlier (before even scheduling the work
item) so that we don't just waste time.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- none
Changes in v2:
- none
---
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 8e109b54aec5..1b3dc7b4a741 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2850,8 +2850,6 @@ static void dpaa2_switch_event_work(struct work_struct *work)
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (!fdb_info->added_by_user || fdb_info->is_local)
- break;
if (is_unicast_ether_addr(fdb_info->addr))
err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
fdb_info->addr);
@@ -2865,8 +2863,6 @@ static void dpaa2_switch_event_work(struct work_struct *work)
&fdb_info->info, NULL);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (!fdb_info->added_by_user || fdb_info->is_local)
- break;
if (is_unicast_ether_addr(fdb_info->addr))
dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
else
@@ -2893,6 +2889,9 @@ static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
return NOTIFY_DONE;
ethsw = port_priv->ethsw_data;
+ if (!fdb_info->added_by_user || fdb_info->is_local)
+ return NOTIFY_DONE;
+
switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC);
if (!switchdev_work)
return NOTIFY_BAD;
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 08/12] dpaa2-switch: consolidate unicast and multicast management
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (6 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 07/12] dpaa2-switch: check early if an FDB entry should be added Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 09/12] dpaa2-switch: offload FDBs added on an upper bond device Ioana Ciornei
` (3 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
This patch consolidates the unicast and multicast management by creating
two new functions - dpaa2_switch_port_fdb_[add|del]() - which can be
used for either uc or mc addresses. Having this common entrypoint for
both types of addresses will help us in the next patches to streamline
the same addresses but on LAG ports.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- none
Changes in v2:
- The rollback in dpaa2_switch_port_mdb_add() uses the newly introduced
dpaa2_switch_port_fdb_del() helper instead of the _mc counterpart.
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 41 +++++++++++++------
1 file changed, 28 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 1b3dc7b4a741..fb754dae8069 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -594,6 +594,28 @@ static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
return err;
}
+static int dpaa2_switch_port_fdb_add(struct ethsw_port_priv *port_priv,
+ const unsigned char *addr)
+{
+ int err;
+
+ if (is_unicast_ether_addr(addr))
+ err = dpaa2_switch_port_fdb_add_uc(port_priv, addr);
+ else
+ err = dpaa2_switch_port_fdb_add_mc(port_priv, addr);
+
+ return err;
+}
+
+static int dpaa2_switch_port_fdb_del(struct ethsw_port_priv *port_priv,
+ const unsigned char *addr)
+{
+ if (is_unicast_ether_addr(addr))
+ return dpaa2_switch_port_fdb_del_uc(port_priv, addr);
+ else
+ return dpaa2_switch_port_fdb_del_mc(port_priv, addr);
+}
+
static void dpaa2_switch_port_get_stats(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -1944,14 +1966,14 @@ static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
return -EEXIST;
- err = dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr);
+ err = dpaa2_switch_port_fdb_add(port_priv, mdb->addr);
if (err)
return err;
err = dev_mc_add(netdev, mdb->addr);
if (err) {
netdev_err(netdev, "dev_mc_add err %d\n", err);
- dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+ dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
}
return err;
@@ -2062,7 +2084,7 @@ static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
return -ENOENT;
- err = dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+ err = dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
if (err)
return err;
@@ -2850,12 +2872,8 @@ static void dpaa2_switch_event_work(struct work_struct *work)
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (is_unicast_ether_addr(fdb_info->addr))
- err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
- fdb_info->addr);
- else
- err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
- fdb_info->addr);
+ err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
+ fdb_info->addr);
if (err)
break;
fdb_info->offloaded = true;
@@ -2863,10 +2881,7 @@ static void dpaa2_switch_event_work(struct work_struct *work)
&fdb_info->info, NULL);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (is_unicast_ether_addr(fdb_info->addr))
- dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
- else
- dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr);
+ dpaa2_switch_port_fdb_del(netdev_priv(dev), fdb_info->addr);
break;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 09/12] dpaa2-switch: offload FDBs added on an upper bond device
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (7 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 08/12] dpaa2-switch: consolidate unicast and multicast management Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-06 2:10 ` Jakub Kicinski
2026-06-03 14:36 ` [PATCH net-next v3 10/12] dpaa2-switch: offload port objects " Ioana Ciornei
` (2 subsequent siblings)
11 siblings, 1 reply; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
This patch adds support for offloading FDB entries added on upper bond
devices.
First of all, the call to switchdev_bridge_port_offload() is updated so
that the notifier blocks needed for FDB events replay are available to
the bridge core.
Using switchdev_handle_*() helpers is also necessary because each FDB
event needs to be fanned out to any DPAA2 switch lower device. This
triggers another change in the return type used by the
dpaa2_switch_port_fdb_event() - from notifier types to regular errno
types.
Handling of the SWITCHDEV_FDB_ADD_TO_DEVICE/SWITCHDEV_FDB_DEL_TO_DEVICE
events is updated so that the newly dpaa2_switch_lag_fdb_add() /
dpaa2_switch_lag_fdb_del() functions are called anytime a port is under
a bond device. This will allow us to manage refcounting on FDB entries
which are added on the upper bond devices.
The DPAA2 switch uses shared-VLAN learning which means that the vid
parameter is not used when adding an FDB entry to HW. The current
behavior when dealing with FDB entries with the same MAC address but
different VLANs is to add the entry to HW every time while removal will
get done on the first 'bridge fdb del' command issued by the user.
The same behavior is kept also for FDBs added on bond devices by keeping
the refcount on the {vid, addr} pair while the HW operation disregards
entirely the vid parameter.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Update dpaa2_switch_foreign_dev_check() so that we check if there is
any port in the same switch as dev which offloads foreign_dev in case
this is a bridge port.
- Add mutex_destroy on the per LAG fdb_lock
- Make sure that all FDB events were processed on the workqueue on the
.remove() path.
- Delete the refcounted entry in dpaa2_switch_lag_fdb_del() as soon as
possible, even if the HW deletion would fail
- Access the port_priv->lag field only through the proper rcu accessors.
Changes in v2:
- Update dpaa2_switch_foreign_dev_check() so that we check if between
the switch port and the foreign net_device is an offloaded path. Before
this change we also checked if the foreign_dev was offloaded or not by
the switch port.
- Update the switchdev_bridge_port_unoffload() by passing it the proper
context and the notifier blocks.
- Add dev_hold() and dev_put() calls for orig_dev
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 222 ++++++++++++++++--
.../ethernet/freescale/dpaa2/dpaa2-switch.h | 24 ++
2 files changed, 223 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index fb754dae8069..fadebf6a758d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -25,6 +25,9 @@
#define DEFAULT_VLAN_ID 1
+static struct notifier_block dpaa2_switch_port_switchdev_nb;
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb;
+
static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv)
{
return port_priv->fdb->fdb_id;
@@ -62,6 +65,27 @@ dpaa2_switch_lag_get_unused(struct ethsw_core *ethsw)
return NULL;
}
+static struct ethsw_port_priv *
+dpaa2_switch_lag_get_primary(struct dpaa2_switch_lag *lag)
+{
+ struct ethsw_core *ethsw = lag->ethsw;
+ struct ethsw_port_priv *port_priv;
+ struct dpaa2_switch_lag *port_lag;
+ int i;
+
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ port_priv = ethsw->ports[i];
+ if (!port_priv)
+ continue;
+
+ port_lag = rtnl_dereference(port_priv->lag);
+ if (port_lag == lag)
+ return port_priv;
+ }
+
+ return NULL;
+}
+
static void dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
struct net_device *upper_dev,
bool linking)
@@ -616,6 +640,91 @@ static int dpaa2_switch_port_fdb_del(struct ethsw_port_priv *port_priv,
return dpaa2_switch_port_fdb_del_mc(port_priv, addr);
}
+static struct dpaa2_mac_addr *
+dpaa2_switch_mac_addr_find(struct list_head *addr_list,
+ const unsigned char *addr, u16 vid)
+{
+ struct dpaa2_mac_addr *a;
+
+ list_for_each_entry(a, addr_list, list)
+ if (ether_addr_equal(a->addr, addr) && a->vid == vid)
+ return a;
+
+ return NULL;
+}
+
+static int dpaa2_switch_lag_fdb_add(struct dpaa2_switch_lag *lag,
+ const unsigned char *addr, u16 vid)
+{
+ struct ethsw_port_priv *port_priv;
+ struct dpaa2_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dpaa2_switch_mac_addr_find(&lag->fdbs, addr, vid);
+ if (a) {
+ refcount_inc(&a->refcount);
+ goto out;
+ }
+
+ port_priv = dpaa2_switch_lag_get_primary(lag);
+ if (!port_priv) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = dpaa2_switch_port_fdb_add(port_priv, addr);
+ if (err) {
+ kfree(a);
+ goto out;
+ }
+
+ ether_addr_copy(a->addr, addr);
+ a->vid = vid;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &lag->fdbs);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+
+ return err;
+}
+
+static void dpaa2_switch_lag_fdb_del(struct dpaa2_switch_lag *lag,
+ const unsigned char *addr, u16 vid)
+{
+ struct ethsw_port_priv *port_priv;
+ struct dpaa2_mac_addr *a;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dpaa2_switch_mac_addr_find(&lag->fdbs, addr, vid);
+ if (!a)
+ goto out;
+
+ if (!refcount_dec_and_test(&a->refcount))
+ goto out;
+
+ list_del(&a->list);
+ kfree(a);
+
+ port_priv = dpaa2_switch_lag_get_primary(lag);
+ if (!port_priv)
+ goto out;
+
+ dpaa2_switch_port_fdb_del(port_priv, addr);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+}
+
static void dpaa2_switch_port_get_stats(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -1564,6 +1673,33 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
return netdev->netdev_ops == &dpaa2_switch_port_ops;
}
+static bool dpaa2_switch_foreign_dev_check(const struct net_device *dev,
+ const struct net_device *foreign_dev)
+{
+ struct ethsw_port_priv *port_priv = netdev_priv(dev);
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
+ struct ethsw_port_priv *other_port;
+ int i;
+
+ if (netif_is_bridge_master(foreign_dev))
+ if (port_priv->fdb->bridge_dev == foreign_dev)
+ return false;
+
+ if (netif_is_bridge_port(foreign_dev)) {
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+ other_port = ethsw->ports[i];
+
+ if (!other_port)
+ continue;
+ if (dpaa2_switch_port_offloads_bridge_port(other_port,
+ foreign_dev))
+ return false;
+ }
+ }
+
+ return true;
+}
+
static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
{
struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
@@ -2177,8 +2313,10 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
goto err_egress_flood;
brport_dev = dpaa2_switch_port_to_bridge_port(port_priv);
- err = switchdev_bridge_port_offload(brport_dev, netdev, NULL,
- NULL, NULL, false, extack);
+ err = switchdev_bridge_port_offload(brport_dev, netdev, port_priv,
+ &dpaa2_switch_port_switchdev_nb,
+ &dpaa2_switch_port_switchdev_blocking_nb,
+ false, extack);
if (err)
goto err_switchdev_offload;
@@ -2219,7 +2357,9 @@ static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
if (!brport_dev)
return;
- switchdev_bridge_port_unoffload(brport_dev, NULL, NULL, NULL);
+ switchdev_bridge_port_unoffload(brport_dev, port_priv,
+ &dpaa2_switch_port_switchdev_nb,
+ &dpaa2_switch_port_switchdev_blocking_nb);
}
static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
@@ -2856,32 +2996,46 @@ struct ethsw_switchdev_event_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct net_device *dev;
+ struct net_device *orig_dev;
unsigned long event;
+ u16 vid;
};
static void dpaa2_switch_event_work(struct work_struct *work)
{
struct ethsw_switchdev_event_work *switchdev_work =
container_of(work, struct ethsw_switchdev_event_work, work);
+ struct net_device *orig_dev = switchdev_work->orig_dev;
struct net_device *dev = switchdev_work->dev;
+ struct ethsw_port_priv *port_priv = netdev_priv(dev);
struct switchdev_notifier_fdb_info *fdb_info;
+ struct dpaa2_switch_lag *lag;
int err;
rtnl_lock();
fdb_info = &switchdev_work->fdb_info;
+ lag = rtnl_dereference(port_priv->lag);
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
- fdb_info->addr);
+ if (lag)
+ err = dpaa2_switch_lag_fdb_add(lag, fdb_info->addr,
+ switchdev_work->vid);
+ else
+ err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
+ fdb_info->addr);
if (err)
break;
fdb_info->offloaded = true;
- call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+ call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, orig_dev,
&fdb_info->info, NULL);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- dpaa2_switch_port_fdb_del(netdev_priv(dev), fdb_info->addr);
+ if (lag)
+ dpaa2_switch_lag_fdb_del(lag, fdb_info->addr,
+ switchdev_work->vid);
+ else
+ dpaa2_switch_port_fdb_del(port_priv, fdb_info->addr);
break;
}
@@ -2889,36 +3043,43 @@ static void dpaa2_switch_event_work(struct work_struct *work)
kfree(switchdev_work->fdb_info.addr);
kfree(switchdev_work);
dev_put(dev);
+ dev_put(orig_dev);
}
-static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+static int
+dpaa2_switch_port_fdb_event(struct net_device *dev,
+ struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
{
- struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct ethsw_port_priv *port_priv = netdev_priv(dev);
struct ethsw_switchdev_event_work *switchdev_work;
- struct switchdev_notifier_fdb_info *fdb_info = ptr;
- struct ethsw_core *ethsw;
+ struct ethsw_core *ethsw = port_priv->ethsw_data;
- if (!dpaa2_switch_port_dev_check(dev))
- return NOTIFY_DONE;
- ethsw = port_priv->ethsw_data;
+ if (ctx && ctx != port_priv)
+ return 0;
+
+ /* For the moment, do nothing with entries towards foreign devices */
+ if (dpaa2_switch_foreign_dev_check(dev, orig_dev))
+ return 0;
if (!fdb_info->added_by_user || fdb_info->is_local)
- return NOTIFY_DONE;
+ return 0;
switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC);
if (!switchdev_work)
- return NOTIFY_BAD;
+ return -ENOMEM;
INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work);
switchdev_work->dev = dev;
switchdev_work->event = event;
+ switchdev_work->orig_dev = orig_dev;
+ switchdev_work->vid = fdb_info->vid;
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- memcpy(&switchdev_work->fdb_info, ptr,
+ memcpy(&switchdev_work->fdb_info, fdb_info,
sizeof(switchdev_work->fdb_info));
switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
if (!switchdev_work->fdb_info.addr)
@@ -2929,19 +3090,20 @@ static int dpaa2_switch_port_fdb_event(struct notifier_block *nb,
/* Take a reference on the device to avoid being freed. */
dev_hold(dev);
+ dev_hold(orig_dev);
break;
default:
kfree(switchdev_work);
- return NOTIFY_DONE;
+ return 0;
}
queue_work(ethsw->workqueue, &switchdev_work->work);
- return NOTIFY_DONE;
+ return 0;
err_addr_alloc:
kfree(switchdev_work);
- return NOTIFY_BAD;
+ return -ENOMEM;
}
/* Called under rcu_read_lock() */
@@ -2949,13 +3111,18 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
switch (event) {
case SWITCHDEV_PORT_ATTR_SET:
return dpaa2_switch_port_attr_set_event(dev, ptr);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- return dpaa2_switch_port_fdb_event(nb, event, ptr);
+ err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
+ dpaa2_switch_port_dev_check,
+ dpaa2_switch_foreign_dev_check,
+ dpaa2_switch_port_fdb_event);
+ return notifier_from_errno(err);
default:
return NOTIFY_DONE;
}
@@ -3890,6 +4057,9 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
dev = &sw_dev->dev;
ethsw = dev_get_drvdata(dev);
+ /* Make sure that all events were handled before we kfree anything */
+ flush_workqueue(ethsw->workqueue);
+
dpaa2_switch_teardown_irqs(sw_dev);
dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
@@ -3903,8 +4073,10 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
netif_napi_del(ðsw->fq[i].napi);
- for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
dpaa2_switch_remove_port(ethsw, i);
+ mutex_destroy(ðsw->lags[i].fdb_lock);
+ }
kfree(ethsw->fdbs);
kfree(ethsw->filter_blocks);
@@ -4056,6 +4228,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
ethsw->lags[i].ethsw = ethsw;
ethsw->lags[i].id = i + 1;
ethsw->lags[i].in_use = 0;
+ mutex_init(ðsw->lags[i].fdb_lock);
+ INIT_LIST_HEAD(ðsw->lags[i].fdbs);
}
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
@@ -4104,6 +4278,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
err_free_netdev:
for (i--; i >= 0; i--)
dpaa2_switch_remove_port(ethsw, i);
+ for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+ mutex_destroy(ðsw->lags[i].fdb_lock);
kfree(ethsw->lags);
err_free_filter:
kfree(ethsw->filter_blocks);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 07301885763c..106cc9d7f0ec 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -100,6 +100,13 @@ struct dpaa2_switch_fq {
u32 fqid;
};
+struct dpaa2_mac_addr {
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+ refcount_t refcount;
+ struct list_head list;
+};
+
struct dpaa2_switch_fdb {
struct net_device *bridge_dev;
u16 fdb_id;
@@ -111,6 +118,9 @@ struct dpaa2_switch_lag {
struct net_device *bond_dev;
bool in_use;
u8 id;
+ /* Protects the list of fdbs installed on this LAG */
+ struct mutex fdb_lock;
+ struct list_head fdbs;
};
struct dpaa2_switch_acl_entry {
@@ -286,4 +296,18 @@ int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block,
int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block,
struct ethsw_port_priv *port_priv);
+
+static inline bool
+dpaa2_switch_port_offloads_bridge_port(struct ethsw_port_priv *port_priv,
+ const struct net_device *dev)
+{
+ struct dpaa2_switch_lag *lag = rcu_dereference_rtnl(port_priv->lag);
+
+ if (lag && lag->bond_dev == dev)
+ return true;
+ if (port_priv->netdev == dev)
+ return true;
+ return false;
+}
+
#endif /* __ETHSW_H */
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH net-next v3 09/12] dpaa2-switch: offload FDBs added on an upper bond device
2026-06-03 14:36 ` [PATCH net-next v3 09/12] dpaa2-switch: offload FDBs added on an upper bond device Ioana Ciornei
@ 2026-06-06 2:10 ` Jakub Kicinski
0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2026-06-06 2:10 UTC (permalink / raw)
To: ioana.ciornei
Cc: Jakub Kicinski, andrew+netdev, davem, edumazet, pabeni, netdev,
linux-kernel
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
dpaa2-switch: offload FDBs added on an upper bond device
This patch adds support for offloading FDB entries added on upper bond
devices.
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> index fb754dae80697..fadebf6a758d1 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
[ ... ]
> @@ -616,6 +640,91 @@ static int dpaa2_switch_port_fdb_del(struct ethsw_port_priv *port_priv,
> return dpaa2_switch_port_fdb_del_mc(port_priv, addr);
> }
>
> +static struct dpaa2_mac_addr *
> +dpaa2_switch_mac_addr_find(struct list_head *addr_list,
> + const unsigned char *addr, u16 vid)
> +{
> + struct dpaa2_mac_addr *a;
> +
> + list_for_each_entry(a, addr_list, list)
> + if (ether_addr_equal(a->addr, addr) && a->vid == vid)
> + return a;
> +
> + return NULL;
> +}
> +
> +static int dpaa2_switch_lag_fdb_add(struct dpaa2_switch_lag *lag,
> + const unsigned char *addr, u16 vid)
> +{
> + struct ethsw_port_priv *port_priv;
> + struct dpaa2_mac_addr *a;
> + int err = 0;
> +
> + mutex_lock(&lag->fdb_lock);
> +
> + a = dpaa2_switch_mac_addr_find(&lag->fdbs, addr, vid);
> + if (a) {
> + refcount_inc(&a->refcount);
[Severity: High]
If a port leaves the LAG, the fan-out for the FDB delete event will only
reach the remaining ports. Could this result in the refcount decrementing
fewer times than it incremented, preventing it from reaching zero and
permanently leaking the struct dpaa2_mac_addr?
> + goto out;
> + }
> +
> + port_priv = dpaa2_switch_lag_get_primary(lag);
[Severity: High]
The FDB entry is programmed into hardware using this primary port's physical
interface ID. If this primary port leaves the LAG, is there a mechanism to
migrate the existing FDB entries to one of the remaining ports? Without
migration, could traffic be misrouted, and would subsequent deletions fail
because they target the new primary port's ID?
> + if (!port_priv) {
> + err = -ENOENT;
> + goto out;
> + }
> +
> + a = kzalloc(sizeof(*a), GFP_KERNEL);
[ ... ]
> @@ -2856,32 +2996,46 @@ struct ethsw_switchdev_event_work {
> struct work_struct work;
> struct switchdev_notifier_fdb_info fdb_info;
> struct net_device *dev;
> + struct net_device *orig_dev;
> unsigned long event;
> + u16 vid;
> };
>
> static void dpaa2_switch_event_work(struct work_struct *work)
> {
> struct ethsw_switchdev_event_work *switchdev_work =
> container_of(work, struct ethsw_switchdev_event_work, work);
> + struct net_device *orig_dev = switchdev_work->orig_dev;
> struct net_device *dev = switchdev_work->dev;
> + struct ethsw_port_priv *port_priv = netdev_priv(dev);
> struct switchdev_notifier_fdb_info *fdb_info;
> + struct dpaa2_switch_lag *lag;
> int err;
>
> rtnl_lock();
> fdb_info = &switchdev_work->fdb_info;
> + lag = rtnl_dereference(port_priv->lag);
>
> switch (switchdev_work->event) {
> case SWITCHDEV_FDB_ADD_TO_DEVICE:
> - err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
> - fdb_info->addr);
> + if (lag)
> + err = dpaa2_switch_lag_fdb_add(lag, fdb_info->addr,
> + switchdev_work->vid);
> + else
> + err = dpaa2_switch_port_fdb_add(netdev_priv(dev),
> + fdb_info->addr);
> if (err)
> break;
> fdb_info->offloaded = true;
> - call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
> + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, orig_dev,
> &fdb_info->info, NULL);
> break;
> case SWITCHDEV_FDB_DEL_TO_DEVICE:
> - dpaa2_switch_port_fdb_del(netdev_priv(dev), fdb_info->addr);
> + if (lag)
> + dpaa2_switch_lag_fdb_del(lag, fdb_info->addr,
> + switchdev_work->vid);
> + else
> + dpaa2_switch_port_fdb_del(port_priv, fdb_info->addr);
[Severity: High]
Is it possible for a port to be removed from a bond between the queuing of
this event and its execution? If port_priv->lag becomes NULL during this
window, the worker would route the deletion to the standalone path instead
of the LAG path. Could this bypass the LAG refcount decrement and leak the
MAC address?
> break;
> }
[ ... ]
> @@ -3903,8 +4073,10 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
> for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
> netif_napi_del(ðsw->fq[i].napi);
>
> - for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
> + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
> dpaa2_switch_remove_port(ethsw, i);
> + mutex_destroy(ðsw->lags[i].fdb_lock);
> + }
[Severity: High]
Are the dynamically allocated struct dpaa2_mac_addr entries in the lag->fdbs
list freed during driver teardown? It appears ethsw->lags is freed shortly
after this without iterating through the fdbs lists, which might cause a
memory leak of all active LAG FDB entries. Could this also happen if all
ports leave a LAG and it becomes unused, inheriting stale entries upon reuse?
>
> kfree(ethsw->fdbs);
> kfree(ethsw->filter_blocks);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH net-next v3 10/12] dpaa2-switch: offload port objects on an upper bond device
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (8 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 09/12] dpaa2-switch: offload FDBs added on an upper bond device Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-06 2:10 ` Jakub Kicinski
2026-06-03 14:36 ` [PATCH net-next v3 11/12] dpaa2-switch: trap all link local reserved addresses to the CPU Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 12/12] dpaa2-switch: add support for imprecise source port Ioana Ciornei
11 siblings, 1 reply; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
This patch adds support for offloading port objects, VLANs and MDBs,
added on upper bond devices.
First of all, the use of the switchdev_handle_*() replication helpers
is introduced for the SWITCHDEV_PORT_OBJ_ADD/SWITCHDEV_PORT_OBJ_DEL
events. With this change, setting up the 'port_obj_info->handled = true'
is not needed anymore since it's now handled by the new helpers.
In the DPAA2 architecture, there is no difference in adding a FDB or MDB
which points towards a LAG port. Unlike other architectures, we do not
need to populate all the possible destinations which are under the LAG,
we only have to specify a single queueing destination (QDID) which
represents the LAG. This all means that handling of MDBs in bond devices
needs to have refcount mechanism as with the FDBs.
This mechanism is triggered by calling the dpaa2_switch_lag_fdb_add() /
dpaa2_switch_lag_fdb_del() functions which were added in the previous
patch.
Also change how dpaa2_switch_port_mdb_del() behaves in case the
underlying HW operation failed. Since the delete operations cannot be
stopped from a switchdev standpoint, go ahead and ignore the return code
from the dpaa2_switch_*_fdb_del() calls and call dev_mc_del() to at
least keep the SW state consistent.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Access the port_priv->lag field only through the proper rcu accessors.
Changes in v2:
- In case dev_mc_add() fails, remove the MDB address from HW with the
proper function, dpaa2_switch_lag_fdb_del() or
dpaa2_switch_port_fdb_del(), depending on the LAG offload state.
---
.../ethernet/freescale/dpaa2/dpaa2-switch.c | 74 +++++++++++--------
1 file changed, 43 insertions(+), 31 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index fadebf6a758d..9aba5f348abc 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2096,30 +2096,46 @@ static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
const struct switchdev_obj_port_mdb *mdb)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct dpaa2_switch_lag *lag;
int err;
/* Check if address is already set on this port */
if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
return -EEXIST;
- err = dpaa2_switch_port_fdb_add(port_priv, mdb->addr);
+ lag = rtnl_dereference(port_priv->lag);
+ if (lag)
+ err = dpaa2_switch_lag_fdb_add(lag, mdb->addr, mdb->vid);
+ else
+ err = dpaa2_switch_port_fdb_add(port_priv, mdb->addr);
if (err)
return err;
err = dev_mc_add(netdev, mdb->addr);
if (err) {
netdev_err(netdev, "dev_mc_add err %d\n", err);
- dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
+ if (lag)
+ dpaa2_switch_lag_fdb_del(lag, mdb->addr, mdb->vid);
+ else
+ dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
}
return err;
}
-static int dpaa2_switch_port_obj_add(struct net_device *netdev,
- const struct switchdev_obj *obj)
+static int dpaa2_switch_port_obj_add(struct net_device *netdev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
int err;
+ if (ctx && ctx != port_priv)
+ return 0;
+
+ if (!dpaa2_switch_port_offloads_bridge_port(port_priv, obj->orig_dev))
+ return -EOPNOTSUPP;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dpaa2_switch_port_vlans_add(netdev,
@@ -2215,14 +2231,17 @@ static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
const struct switchdev_obj_port_mdb *mdb)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ struct dpaa2_switch_lag *lag;
int err;
if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
return -ENOENT;
- err = dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
- if (err)
- return err;
+ lag = rtnl_dereference(port_priv->lag);
+ if (lag)
+ dpaa2_switch_lag_fdb_del(lag, mdb->addr, mdb->vid);
+ else
+ dpaa2_switch_port_fdb_del(port_priv, mdb->addr);
err = dev_mc_del(netdev, mdb->addr);
if (err) {
@@ -2233,11 +2252,18 @@ static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
return err;
}
-static int dpaa2_switch_port_obj_del(struct net_device *netdev,
+static int dpaa2_switch_port_obj_del(struct net_device *netdev, const void *ctx,
const struct switchdev_obj *obj)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
int err;
+ if (ctx && ctx != port_priv)
+ return 0;
+
+ if (!dpaa2_switch_port_offloads_bridge_port(port_priv, obj->orig_dev))
+ return -EOPNOTSUPP;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj));
@@ -3128,37 +3154,23 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
}
}
-static int dpaa2_switch_port_obj_event(unsigned long event,
- struct net_device *netdev,
- struct switchdev_notifier_port_obj_info *port_obj_info)
-{
- int err = -EOPNOTSUPP;
-
- if (!dpaa2_switch_port_dev_check(netdev))
- return NOTIFY_DONE;
-
- switch (event) {
- case SWITCHDEV_PORT_OBJ_ADD:
- err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
- break;
- case SWITCHDEV_PORT_OBJ_DEL:
- err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj);
- break;
- }
-
- port_obj_info->handled = true;
- return notifier_from_errno(err);
-}
-
static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ int err;
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
+ err = switchdev_handle_port_obj_add(dev, ptr,
+ dpaa2_switch_port_dev_check,
+ dpaa2_switch_port_obj_add);
+ return notifier_from_errno(err);
case SWITCHDEV_PORT_OBJ_DEL:
- return dpaa2_switch_port_obj_event(event, dev, ptr);
+ err = switchdev_handle_port_obj_del(dev, ptr,
+ dpaa2_switch_port_dev_check,
+ dpaa2_switch_port_obj_del);
+ return notifier_from_errno(err);
case SWITCHDEV_PORT_ATTR_SET:
return dpaa2_switch_port_attr_set_event(dev, ptr);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH net-next v3 10/12] dpaa2-switch: offload port objects on an upper bond device
2026-06-03 14:36 ` [PATCH net-next v3 10/12] dpaa2-switch: offload port objects " Ioana Ciornei
@ 2026-06-06 2:10 ` Jakub Kicinski
0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2026-06-06 2:10 UTC (permalink / raw)
To: ioana.ciornei
Cc: Jakub Kicinski, andrew+netdev, davem, edumazet, pabeni, netdev,
linux-kernel
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
dpaa2-switch: offload port objects on an upper bond device
This patch adds support for offloading port objects, VLANs and MDBs,
added on upper bond devices. It introduces switchdev_handle_*() replication
helpers for port objects and a refcount mechanism for handling MDBs in bond
devices.
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> index fadebf6a758d1..9aba5f348abc0 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> @@ -2096,30 +2096,46 @@ static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
> const struct switchdev_obj_port_mdb *mdb)
> {
> struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + struct dpaa2_switch_lag *lag;
> int err;
>
> /* Check if address is already set on this port */
> if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
> return -EEXIST;
[Severity: High]
Does this incorrectly block valid MDB additions for the same MAC address but
different VLAN IDs?
Looking at dpaa2_switch_port_lookup_address(), it checks the VLAN-unaware
netdev->mc list. If an MDB with the same MAC but different VID is added,
it returns -EEXIST.
Later, if the user attempts to delete the un-added MDB,
dpaa2_switch_port_mdb_del() finds the MAC from the first MDB, skips hardware
removal due to VID mismatch, and successfully calls dev_mc_del().
When the legitimate MDB is later deleted, the lookup fails with -ENOENT,
skipping dpaa2_switch_lag_fdb_del() and permanently leaking the dynamically
allocated struct dpaa2_mac_addr and the hardware FDB entry.
[ ... ]
> @@ -2215,14 +2231,17 @@ static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
> const struct switchdev_obj_port_mdb *mdb)
> {
> struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + struct dpaa2_switch_lag *lag;
> int err;
>
> if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
> return -ENOENT;
[Severity: High]
Does returning an error here abort the switchdev_handle_port_obj_del()
replication loop for the remaining bond slaves?
This patch uses switchdev_handle_port_obj_del() to replicate object
deletions across all slave devices. If a driver callback returns an error
other than -EOPNOTSUPP on one of the slaves, the replication loop aborts,
skipping all remaining slaves.
For example, dpaa2_switch_port_mdb_del() returns -ENOENT if the MAC is not
found, and dpaa2_switch_port_vlans_del() propagates hardware deletion errors.
Will this leave the remaining slave ports with stale VLANs and MDBs fully
active in software and hardware, causing silent data leaks and inconsistent
state?
[ ... ]
> @@ -3128,37 +3154,23 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
> }
> }
>
> -static int dpaa2_switch_port_obj_event(unsigned long event,
[ ... ]
> static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
> unsigned long event, void *ptr)
> {
> struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
> + int err;
>
> switch (event) {
> case SWITCHDEV_PORT_OBJ_ADD:
> + err = switchdev_handle_port_obj_add(dev, ptr,
> + dpaa2_switch_port_dev_check,
> + dpaa2_switch_port_obj_add);
> + return notifier_from_errno(err);
[Severity: High]
Can this code leak memory and hardware MDBs when a slave leaves a bond?
When switchdev_handle_port_obj_add() replicates MDB additions across all
bond slaves, it increments the a->refcount in lag->fdbs for each slave.
If a slave leaves the bond (e.g., via ip link set eth1 nomaster), shouldn't
dpaa2_switch_port_bond_leave() be updated to decrement the slave's share
of the MDB refcounts?
Without this, when the bond is later removed from the bridge,
SWITCHDEV_PORT_OBJ_DEL replicates only over the remaining slaves. The MDB
refcounts will not reach zero because the decrements from the departed
slaves are permanently lost. This would leak struct dpaa2_mac_addr memory
and leave the hardware MDB permanently programmed on the LAG.
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH net-next v3 11/12] dpaa2-switch: trap all link local reserved addresses to the CPU
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (9 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 10/12] dpaa2-switch: offload port objects " Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-03 14:36 ` [PATCH net-next v3 12/12] dpaa2-switch: add support for imprecise source port Ioana Ciornei
11 siblings, 0 replies; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
Do not trap only STP frames to the control interface but rather trap all
link local reserved addresses. This will still be done by looking at the
destination MAC address but keeping in mind to not take into account the
last byte.
This change will benefit LACP frames which now will reach the control
interface.
While at it, change the prototype of the
dpaa2_switch_port_trap_mac_addr() function so that we directly pass a
'const u8 *' so that it matches the ether_addr_copy() used.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- Change the mask so that we restrict the trap only to the link local
addresses (01:80:c2:00:00:00 to 01:80:c2:00:00:0F) instead of the entire
reserved bridge block of addresses
Changes in v2:
- none
---
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 9aba5f348abc..7aef34778e22 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -3933,17 +3933,15 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
return err;
}
-/* Add an ACL to redirect frames with specific destination MAC address to
- * control interface
- */
+/* Add an ACL to redirect frames to control interface based on the dst MAC */
static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
- const char *mac)
+ const u8 *mac, const u8 *mask)
{
struct dpaa2_switch_acl_entry acl_entry = {0};
/* Match on the destination MAC address */
ether_addr_copy(acl_entry.key.match.l2_dest_mac, mac);
- eth_broadcast_addr(acl_entry.key.mask.l2_dest_mac);
+ ether_addr_copy(acl_entry.key.mask.l2_dest_mac, mask);
/* Trap to CPU */
acl_entry.cfg.precedence = 0;
@@ -3954,7 +3952,8 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
{
- const char stpa[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+ const u8 ll_mac[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+ const u8 ll_mask[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xf0};
struct switchdev_obj_port_vlan vlan = {
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.vid = DEFAULT_VLAN_ID,
@@ -4029,7 +4028,7 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
if (err)
return err;
- err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa);
+ err = dpaa2_switch_port_trap_mac_addr(port_priv, ll_mac, ll_mask);
if (err)
return err;
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH net-next v3 12/12] dpaa2-switch: add support for imprecise source port
2026-06-03 14:36 [PATCH net-next v3 00/12] dpaa2-switch: add support for LAG offload Ioana Ciornei
` (10 preceding siblings ...)
2026-06-03 14:36 ` [PATCH net-next v3 11/12] dpaa2-switch: trap all link local reserved addresses to the CPU Ioana Ciornei
@ 2026-06-03 14:36 ` Ioana Ciornei
2026-06-06 2:11 ` Jakub Kicinski
11 siblings, 1 reply; 18+ messages in thread
From: Ioana Ciornei @ 2026-06-03 14:36 UTC (permalink / raw)
To: andrew+netdev, davem, edumazet, kuba, pabeni, netdev; +Cc: linux-kernel
Switch ports configured as part of a LAG group are not able to provide
a precise source port for all packets which reach the control interface.
The only frames which will have a precise source port are those that are
explicitly trapped, for example STP and LCAP frames. For any other
frames (for example, those which are flooded) we can only know the
ingress LAG group.
Take into account the DPAA2_ETHSW_FLC_IMPRECISE_IF_ID bit and based on
its value target the bond device or the specific source netdevice.
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
Changes in v3:
- None
Changes in v2:
- Fix 32bit build by using BIT_ULL
- Take a reference to port_priv->lag instead of reading it multiple
times.
---
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 10 +++++++---
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h | 3 +++
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 7aef34778e22..163f6c51c160 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -3222,13 +3222,15 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
u16 vlan_tci, vid;
int if_id, err;
void *vaddr;
+ u64 flc;
vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr);
dma_unmap_page(ethsw->dev, addr, DPAA2_SWITCH_RX_BUF_SIZE,
DMA_FROM_DEVICE);
/* get switch ingress interface ID */
- if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF;
+ flc = dpaa2_fd_get_flc(fd);
+ if_id = DPAA2_ETHSW_FLC_IF_ID(flc);
if (if_id >= ethsw->sw_attr.num_ifs) {
dev_err(ethsw->dev, "Frame received from unknown interface!\n");
goto err_free_fd;
@@ -3270,8 +3272,10 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
rcu_read_lock();
lag = rcu_dereference(port_priv->lag);
-
- skb->dev = netdev;
+ if (DPAA2_ETHSW_FLC_IMPRECISE_IF_ID(flc) && lag)
+ skb->dev = lag->bond_dev;
+ else
+ skb->dev = netdev;
skb->protocol = eth_type_trans(skb, skb->dev);
/* Setup the offload_fwd_mark only if the port is under a bridge
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 106cc9d7f0ec..8cbaea1449fa 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -87,6 +87,9 @@
#define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE 256
+#define DPAA2_ETHSW_FLC_IF_ID(flc) (((flc) >> 32) & GENMASK(15, 0))
+#define DPAA2_ETHSW_FLC_IMPRECISE_IF_ID(flc) ((flc) & BIT_ULL(63))
+
extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
struct ethsw_core;
--
2.25.1
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH net-next v3 12/12] dpaa2-switch: add support for imprecise source port
2026-06-03 14:36 ` [PATCH net-next v3 12/12] dpaa2-switch: add support for imprecise source port Ioana Ciornei
@ 2026-06-06 2:11 ` Jakub Kicinski
0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2026-06-06 2:11 UTC (permalink / raw)
To: ioana.ciornei
Cc: Jakub Kicinski, andrew+netdev, davem, edumazet, pabeni, netdev,
linux-kernel
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
dpaa2-switch: add support for imprecise source port
Switch ports configured as part of a LAG group are not able to provide
a precise source port for all packets which reach the control interface.
The only frames which will have a precise source port are those that are
explicitly trapped, for example STP and LCAP frames. For any other
frames (for example, those which are flooded) we can only know the
ingress LAG group.
Take into account the DPAA2_ETHSW_FLC_IMPRECISE_IF_ID bit and based on
its value target the bond device or the specific source netdevice.
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> index 7aef34778e220..163f6c51c1606 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
> @@ -3270,8 +3272,10 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
> rcu_read_lock();
>
> lag = rcu_dereference(port_priv->lag);
> -
> - skb->dev = netdev;
> + if (DPAA2_ETHSW_FLC_IMPRECISE_IF_ID(flc) && lag)
> + skb->dev = lag->bond_dev;
[Severity: High]
By assigning skb->dev = lag->bond_dev here, isn't there a risk of a
use-after-free on PREEMPT_RT?
The explicit rcu_read_lock() is dropped before handing the SKB over to
netif_receive_skb(). While NAPI's local_bh_disable() provides RCU protection
on standard kernels, softirqs are threaded on PREEMPT_RT and do not provide
an implicit RCU read-side critical section.
Between the rcu_read_unlock() and the internal rcu_read_lock() taken by
netif_receive_skb_core(), the threaded softirq can be preempted. If the bond
device is concurrently unregistered, synchronize_net() will not wait for the
NAPI thread, allowing bond_dev to be freed before netif_receive_skb()
dereferences it.
> + else
> + skb->dev = netdev;
> skb->protocol = eth_type_trans(skb, skb->dev);
>
> /* Setup the offload_fwd_mark only if the port is under a bridge
^ permalink raw reply [flat|nested] 18+ messages in thread