Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop()
From: Jakub Kicinski @ 2026-06-13 18:21 UTC (permalink / raw)
  To: menglong8.dong
  Cc: jasowang, mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet,
	pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
	daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
	virtualization, linux-kernel, bpf
In-Reply-To: <20260611071242.2485058-2-dongml2@chinatelecom.cn>

On Thu, 11 Jun 2026 15:12:40 +0800 menglong8.dong@gmail.com wrote:
> For now, we use sk_busy_loop() for both rx and tx path. The sk_busy_loop()
> will call napi_busy_loop() for the specified napi_id. However, some
> nic drivers have tx napi, such as virtio-net. In this case, sk_busy_loop()
> doesn't work, as it can only schedule the NAPI for the rx queue.
> 
> Therefore, introduce sk_tx_busy_loop() for the nic drivers that support tx
> napi, which will schedule the tx napi if available.

First, I thought the only difference with Tx NAPI is that it can't be
busy polled. So if you want to poll an instance don't register it as 
a Tx one instead of adding all this "tx polling" stuff in the core?

Second, can this problem happen for any other NIC or is it purely 
an artifact of virtio's delayed Tx completion handling?

Third, this series does not apply.

^ permalink raw reply

* [PATCH net-next v5 4/4] net: dsa: initial support for MT7628 embedded switch
From: Joris Vaisvila @ 2026-06-13 18:18 UTC (permalink / raw)
  To: netdev
  Cc: horms, pabeni, kuba, edumazet, davem, olteanv, Andrew Lunn,
	devicetree, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Arınç ÜNAL, Landen Chao, DENG Qingfang, Sean Wang,
	Daniel Golle, Joris Vaisvila
In-Reply-To: <20260613181845.111877-1-joey@tinyisr.com>

Add support for the MT7628 embedded switch.

The switch has 5 built-in 100Mbps user ports (ports 0-4) and one 1Gbps
port that is internally attached to the SoCs CPU MAC and serves as the
CPU port.

The switch hardware has a very limited 16 entry VLAN table. Configuring
VLANs is the only way to control switch forwarding. Currently 6 entries
are used by tag_8021q to isolate the ports. Double tag feature is
enabled to force the switch to append the VLAN tag even if the incoming
packet is already tagged, this simulates VLAN-unaware functionality and
simplifies the tagger implementation.

Signed-off-by: Joris Vaisvila <joey@tinyisr.com>
---
 drivers/net/dsa/Kconfig  |   8 +
 drivers/net/dsa/Makefile |   1 +
 drivers/net/dsa/mt7628.c | 649 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 658 insertions(+)
 create mode 100644 drivers/net/dsa/mt7628.c

diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 4ab567c5bbaf..daa1d3d4ba60 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -63,6 +63,14 @@ config NET_DSA_MT7530_MMIO
 	  are directly mapped into the SoCs register space rather than being
 	  accessible via MDIO.
 
+config NET_DSA_MT7628
+	tristate "MediaTek MT7628 Embedded Ethernet switch support"
+	select NET_DSA_TAG_MT7628
+	select MEDIATEK_FE_SOC_PHY
+	help
+	  This enables support for the built-in Ethernet switch found
+	  in the MT7628 SoC.
+
 config NET_DSA_MV88E6060
 	tristate "Marvell 88E6060 ethernet switch chip support"
 	select NET_DSA_TAG_TRAILER
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index d2975badffc0..6ceb78a755d7 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_NET_DSA_KS8995) 	+= ks8995.o
 obj-$(CONFIG_NET_DSA_MT7530)	+= mt7530.o
 obj-$(CONFIG_NET_DSA_MT7530_MDIO) += mt7530-mdio.o
 obj-$(CONFIG_NET_DSA_MT7530_MMIO) += mt7530-mmio.o
+obj-$(CONFIG_NET_DSA_MT7628) += mt7628.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_RZN1_A5PSW) += rzn1_a5psw.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
diff --git a/drivers/net/dsa/mt7628.c b/drivers/net/dsa/mt7628.c
new file mode 100644
index 000000000000..cedf063ad749
--- /dev/null
+++ b/drivers/net/dsa/mt7628.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediatek MT7628 Embedded Switch (ESW) DSA driver
+ * Copyright (C) 2026 Joris Vaisvila <joey@tinyisr.com>
+ *
+ * Portions derived from OpenWRT esw_rt3050 driver:
+ * Copyright (C) 2009-2015 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2009-2015 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2013-2015 Michael Lee <igvtee@gmail.com>
+ * Copyright (C) 2016 Vittorio Gambaletta <openwrt@vittgam.net>
+ */
+
+#include <linux/platform_device.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/dsa/8021q.h>
+#include <linux/if_bridge.h>
+#include <linux/module.h>
+#include <linux/mdio.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/kernel.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <net/dsa.h>
+
+#define MT7628_ESW_REG_IMR 0x04
+#define MT7628_ESW_REG_FCT0 0x08
+#define MT7628_ESW_REG_PFC1 0x14
+#define MT7628_ESW_REG_PVIDC(port) (0x40 + 4 * ((port) / 2))
+#define MT7628_ESW_REG_VLANI(vlan) (0x50 + 4 * ((vlan) / 2))
+#define MT7628_ESW_REG_VMSC(vlan) (0x70 + 4 * ((vlan) / 4))
+#define MT7628_ESW_REG_VUB(vlan) (0x100 + 4 * ((vlan) / 4))
+#define MT7628_ESW_REG_SOCPC 0x8c
+#define MT7628_ESW_REG_POC0 0x90
+#define MT7628_ESW_REG_POC2 0x98
+#define MT7628_ESW_REG_SGC 0x9c
+#define MT7628_ESW_REG_PCR0 0xc0
+#define MT7628_ESW_REG_PCR1 0xc4
+#define MT7628_ESW_REG_FPA2 0xc8
+#define MT7628_ESW_REG_FCT2 0xcc
+#define MT7628_ESW_REG_SGC2 0xe4
+
+#define MT7628_ESW_FCT0_DROP_SET_TH GENMASK(7, 0)
+#define MT7628_ESW_FCT0_DROP_RLS_TH GENMASK(15, 8)
+#define MT7628_ESW_FCT0_FC_SET_TH GENMASK(23, 16)
+#define MT7628_ESW_FCT0_FC_RLS_TH GENMASK(31, 24)
+
+#define MT7628_ESW_PFC1_EN_VLAN GENMASK(22, 16)
+
+#define MT7628_ESW_PVID_S 12
+#define MT7628_ESW_PVID_M GENMASK(11, 0)
+#define MT7628_ESW_PVID_SHIFT(port) \
+	(MT7628_ESW_PVID_S * ((port) % 2))
+#define MT7628_ESW_PVID_MASK(port) \
+	(MT7628_ESW_PVID_M << MT7628_ESW_PVID_SHIFT(port))
+#define MT7628_ESW_PVID_PREP(port, pvid) \
+	(((pvid) & MT7628_ESW_PVID_M) << MT7628_ESW_PVID_SHIFT(port))
+
+#define MT7628_ESW_VID_S 12
+#define MT7628_ESW_VID_M GENMASK(11, 0)
+#define MT7628_ESW_VID_SHIFT(vlan) \
+	(MT7628_ESW_VID_S * ((vlan) % 2))
+#define MT7628_ESW_VID_MASK(vlan) \
+	(MT7628_ESW_VID_M << MT7628_ESW_VID_SHIFT(vlan))
+#define MT7628_ESW_VID_PREP(vlan, vid) \
+	(((vid) & MT7628_ESW_VID_M) << MT7628_ESW_VID_SHIFT(vlan))
+
+#define MT7628_ESW_VMSC_S 8
+#define MT7628_ESW_VMSC_M GENMASK(7, 0)
+#define MT7628_ESW_VMSC_SHIFT(vlan) \
+	(MT7628_ESW_VMSC_S * ((vlan) % 4))
+#define MT7628_ESW_VMSC_MASK(vlan) \
+	(MT7628_ESW_VMSC_M << MT7628_ESW_VMSC_SHIFT(vlan))
+#define MT7628_ESW_VMSC_PREP(vlan, vmsc) \
+	(((vmsc) & MT7628_ESW_VMSC_M) << MT7628_ESW_VMSC_SHIFT(vlan))
+
+#define MT7628_ESW_VUB_S 7
+#define MT7628_ESW_VUB_M GENMASK(6, 0)
+#define MT7628_ESW_VUB_SHIFT(vlan) \
+	(MT7628_ESW_VUB_S * ((vlan) % 4))
+#define MT7628_ESW_VUB_MASK(vlan) \
+	(MT7628_ESW_VUB_M << MT7628_ESW_VUB_SHIFT(vlan))
+#define MT7628_ESW_VUB_PREP(vlan, vub) \
+	(((vub) & MT7628_ESW_VUB_M) << MT7628_ESW_VUB_SHIFT(vlan))
+
+#define MT7628_ESW_SOCPC_CRC_PADDING BIT(25)
+#define MT7628_ESW_SOCPC_DISBC2CPU GENMASK(22, 16)
+#define MT7628_ESW_SOCPC_DISMC2CPU GENMASK(14, 8)
+#define MT7628_ESW_SOCPC_DISUN2CPU GENMASK(6, 0)
+
+#define MT7628_ESW_POC0_PORT_DISABLE GENMASK(29, 23)
+
+#define MT7628_ESW_POC2_PER_VLAN_UNTAG_EN BIT(15)
+
+#define MT7628_ESW_SGC_AGING_INTERVAL GENMASK(3, 0)
+#define MT7628_ESW_BC_STORM_PROT GENMASK(5, 4)
+#define MT7628_ESW_PKT_MAX_LEN GENMASK(7, 6)
+#define MT7628_ESW_DIS_PKT_ABORT BIT(8)
+#define MT7628_ESW_ADDRESS_HASH_ALG GENMASK(10, 9)
+#define MT7628_ESW_DISABLE_TX_BACKOFF BIT(11)
+#define MT7628_ESW_BP_JAM_CNT GENMASK(15, 12)
+#define MT7628_ESW_DISMIIPORT_WASTX GENMASK(17, 16)
+#define MT7628_ESW_BP_MODE GENMASK(19, 18)
+#define MT7628_ESW_BISH_DIS BIT(20)
+#define MT7628_ESW_BISH_TH GENMASK(22, 21)
+#define MT7628_ESW_LED_FLASH_TIME GENMASK(24, 23)
+#define MT7628_ESW_RMC_RULE GENMASK(26, 25)
+#define MT7628_ESW_IP_MULT_RULE GENMASK(28, 27)
+#define MT7628_ESW_LEN_ERR_CHK BIT(29)
+#define MT7628_ESW_BKOFF_ALG BIT(30)
+
+#define MT7628_ESW_PCR0_WT_NWAY_DATA GENMASK(31, 16)
+#define MT7628_ESW_PCR0_RD_PHY_CMD BIT(14)
+#define MT7628_ESW_PCR0_WT_PHY_CMD BIT(13)
+#define MT7628_ESW_PCR0_CPU_PHY_REG GENMASK(12, 8)
+#define MT7628_ESW_PCR0_CPU_PHY_ADDR GENMASK(4, 0)
+
+#define MT7628_ESW_PCR1_RD_DATA GENMASK(31, 16)
+#define MT7628_ESW_PCR1_RD_DONE BIT(1)
+#define MT7628_ESW_PCR1_WT_DONE BIT(0)
+
+#define MT7628_ESW_FPA2_AP_EN BIT(29)
+#define MT7628_ESW_FPA2_EXT_PHY_ADDR_BASE GENMASK(28, 24)
+#define MT7628_ESW_FPA2_FORCE_RGMII_LINK1 BIT(13)
+#define MT7628_ESW_FPA2_FORCE_RGMII_EN1 BIT(11)
+
+#define MT7628_ESW_FCT2_MUST_DROP_RLS_TH GENMASK(17, 13)
+#define MT7628_ESW_FCT2_MUST_DROP_SET_TH GENMASK(12, 8)
+#define MT7628_ESW_FCT2_MC_PER_PORT_TH GENMASK(5, 0)
+
+#define MT7628_ESW_SGC2_SPECIAL_TAG_EN BIT(23)
+#define MT7628_ESW_SGC2_TX_CPU_TPID_BIT_MAP GENMASK(22, 16)
+#define MT7628_ESW_SGC2_DOUBLE_TAG_EN GENMASK(6, 0)
+
+#define MT7628_ESW_PORTS_NOCPU GENMASK(5, 0)
+#define MT7628_ESW_PORTS_CPU BIT(6)
+#define MT7628_ESW_PORTS_ALL GENMASK(6, 0)
+
+#define MT7628_ESW_NUM_PORTS 7
+#define MT7628_NUM_VLANS 16
+
+static const struct regmap_config mt7628_esw_regmap_cfg = {
+	.name = "mt7628-esw",
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.fast_io = true,
+	.reg_format_endian = REGMAP_ENDIAN_LITTLE,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+struct mt7628_vlan {
+	bool active;
+	u8 members;
+	u8 untag;
+	u16 vid;
+};
+
+struct mt7628_esw {
+	void __iomem *base;
+	struct reset_control *rst_ephy;
+	struct reset_control *rst_esw;
+	struct regmap *regmap;
+	struct dsa_switch *ds;
+	u16 tag_8021q_pvid[MT7628_ESW_NUM_PORTS];
+	struct mt7628_vlan vlans[MT7628_NUM_VLANS];
+	struct device *dev;
+};
+
+static int mt7628_mii_read(struct mii_bus *bus, int port, int regnum)
+{
+	struct mt7628_esw *esw = bus->priv;
+	int ret;
+	u32 val;
+
+	/*
+	 * RD_DONE bit is read to clear. Read PCR1 once to acknowledge any
+	 * stale completion indicator before starting a new transaction.
+	 */
+	ret = regmap_read(esw->regmap, MT7628_ESW_REG_PCR1, &val);
+	if (ret)
+		goto out;
+
+	ret = regmap_write(esw->regmap, MT7628_ESW_REG_PCR0,
+			   FIELD_PREP(MT7628_ESW_PCR0_CPU_PHY_REG,
+				      regnum) |
+			   FIELD_PREP(MT7628_ESW_PCR0_CPU_PHY_ADDR,
+				      port) | MT7628_ESW_PCR0_RD_PHY_CMD);
+	if (ret)
+		goto out;
+
+	ret = regmap_read_poll_timeout(esw->regmap, MT7628_ESW_REG_PCR1, val,
+				       (val & MT7628_ESW_PCR1_RD_DONE), 10,
+				       5000);
+	if (ret)
+		goto out;
+
+	return FIELD_GET(MT7628_ESW_PCR1_RD_DATA, val);
+
+out:
+	dev_err(&bus->dev, "read failed. MDIO timeout?\n");
+	return ret;
+}
+
+static int mt7628_mii_write(struct mii_bus *bus, int port, int regnum, u16 dat)
+{
+	struct mt7628_esw *esw = bus->priv;
+	u32 val;
+	int ret;
+
+	/*
+	 * WT_DONE bit is read to clear. Read PCR1 once to acknowledge any
+	 * stale completion indicator before starting a new transaction.
+	 */
+	ret = regmap_read(esw->regmap, MT7628_ESW_REG_PCR1, &val);
+	if (ret)
+		goto out;
+
+	ret = regmap_write(esw->regmap, MT7628_ESW_REG_PCR0,
+			   FIELD_PREP(MT7628_ESW_PCR0_WT_NWAY_DATA, dat) |
+			   FIELD_PREP(MT7628_ESW_PCR0_CPU_PHY_REG,
+				      regnum) |
+			   FIELD_PREP(MT7628_ESW_PCR0_CPU_PHY_ADDR,
+				      port) | MT7628_ESW_PCR0_WT_PHY_CMD);
+	if (ret)
+		goto out;
+
+	ret = regmap_read_poll_timeout(esw->regmap, MT7628_ESW_REG_PCR1, val,
+				       (val & MT7628_ESW_PCR1_WT_DONE), 10,
+				       5000);
+	if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	dev_err(&bus->dev, "write failed. MDIO timeout?\n");
+	return ret;
+}
+
+static int mt7628_setup_internal_mdio(struct dsa_switch *ds)
+{
+	struct mt7628_esw *esw = ds->priv;
+	struct device *dev = ds->dev;
+	struct mii_bus *bus;
+
+	bus = devm_mdiobus_alloc(dev);
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "MT7628 internal MDIO bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(dev));
+	bus->priv = esw;
+	bus->read = mt7628_mii_read;
+	bus->write = mt7628_mii_write;
+	bus->parent = dev;
+
+	ds->user_mii_bus = bus;
+	bus->phy_mask = ~ds->phys_mii_mask;
+
+	return devm_mdiobus_register(dev, bus);
+}
+
+static void mt7628_switch_init(struct dsa_switch *ds)
+{
+	struct mt7628_esw *esw = ds->priv;
+
+	regmap_write(esw->regmap, MT7628_ESW_REG_FCT0,
+		     FIELD_PREP(MT7628_ESW_FCT0_DROP_SET_TH, 0x50) |
+		     FIELD_PREP(MT7628_ESW_FCT0_DROP_RLS_TH, 0x78) |
+		     FIELD_PREP(MT7628_ESW_FCT0_FC_SET_TH, 0xa0) |
+		     FIELD_PREP(MT7628_ESW_FCT0_FC_RLS_TH, 0xc8));
+
+	regmap_write(esw->regmap, MT7628_ESW_REG_FCT2,
+		     FIELD_PREP(MT7628_ESW_FCT2_MC_PER_PORT_TH, 0xc) |
+		     FIELD_PREP(MT7628_ESW_FCT2_MUST_DROP_SET_TH, 0x10) |
+		     FIELD_PREP(MT7628_ESW_FCT2_MUST_DROP_RLS_TH, 0x12));
+
+	/*
+	 * general switch configuration:
+	 * 300s aging interval
+	 * broadcast storm prevention disabled
+	 * max packet length 1536 bytes
+	 * disable collision 16 packet abort and late collision abort
+	 * use xor48 for address hashing
+	 * disable tx backoff
+	 * 10 packet back pressure jam
+	 * disable was_transmit
+	 * jam until BP condition released
+	 * 30ms LED flash
+	 * rmc tb fault to all ports
+	 * unmatched IGMP as broadcast
+	 */
+	regmap_write(esw->regmap, MT7628_ESW_REG_SGC,
+		     FIELD_PREP(MT7628_ESW_SGC_AGING_INTERVAL, 1) |
+		     FIELD_PREP(MT7628_ESW_BC_STORM_PROT, 0) |
+		     FIELD_PREP(MT7628_ESW_PKT_MAX_LEN, 0) |
+		     MT7628_ESW_DIS_PKT_ABORT |
+		     FIELD_PREP(MT7628_ESW_ADDRESS_HASH_ALG, 1) |
+		     MT7628_ESW_DISABLE_TX_BACKOFF |
+		     FIELD_PREP(MT7628_ESW_BP_JAM_CNT, 10) |
+		     FIELD_PREP(MT7628_ESW_DISMIIPORT_WASTX, 0) |
+		     FIELD_PREP(MT7628_ESW_BP_MODE, 0b10) |
+		     FIELD_PREP(MT7628_ESW_LED_FLASH_TIME, 0) |
+		     FIELD_PREP(MT7628_ESW_RMC_RULE, 0) |
+		     FIELD_PREP(MT7628_ESW_IP_MULT_RULE, 0));
+
+	regmap_write(esw->regmap, MT7628_ESW_REG_SOCPC,
+		     MT7628_ESW_SOCPC_CRC_PADDING |
+		     FIELD_PREP(MT7628_ESW_SOCPC_DISUN2CPU,
+				MT7628_ESW_PORTS_CPU) |
+		     FIELD_PREP(MT7628_ESW_SOCPC_DISMC2CPU,
+				MT7628_ESW_PORTS_CPU) |
+		     FIELD_PREP(MT7628_ESW_SOCPC_DISBC2CPU,
+				MT7628_ESW_PORTS_CPU));
+
+	regmap_set_bits(esw->regmap, MT7628_ESW_REG_FPA2,
+			MT7628_ESW_FPA2_FORCE_RGMII_EN1 |
+			MT7628_ESW_FPA2_FORCE_RGMII_LINK1 |
+			MT7628_ESW_FPA2_AP_EN);
+
+	regmap_update_bits(esw->regmap, MT7628_ESW_REG_FPA2,
+			   MT7628_ESW_FPA2_EXT_PHY_ADDR_BASE,
+			   FIELD_PREP(MT7628_ESW_FPA2_EXT_PHY_ADDR_BASE, 31));
+
+	/* disable all interrupts */
+	regmap_write(esw->regmap, MT7628_ESW_REG_IMR, 0);
+
+	/* enable MT7628 DSA tag on CPU port */
+	regmap_write(esw->regmap, MT7628_ESW_REG_SGC2,
+		     MT7628_ESW_SGC2_SPECIAL_TAG_EN |
+		     FIELD_PREP(MT7628_ESW_SGC2_TX_CPU_TPID_BIT_MAP,
+				MT7628_ESW_PORTS_CPU));
+
+	/*
+	 * Double tag feature allows switch to always append the port PVID VLAN tag
+	 * regardless of if the incoming packet already has a VLAN tag.
+	 * This is enabled to simulate VLAN unawareness.
+	 */
+	regmap_set_bits(esw->regmap, MT7628_ESW_REG_SGC2,
+			FIELD_PREP(MT7628_ESW_SGC2_DOUBLE_TAG_EN,
+				   MT7628_ESW_PORTS_NOCPU));
+
+	regmap_set_bits(esw->regmap, MT7628_ESW_REG_POC2,
+			MT7628_ESW_POC2_PER_VLAN_UNTAG_EN);
+
+	regmap_update_bits(esw->regmap, MT7628_ESW_REG_PFC1,
+			   MT7628_ESW_PFC1_EN_VLAN,
+			   FIELD_PREP(MT7628_ESW_PFC1_EN_VLAN,
+				      MT7628_ESW_PORTS_ALL));
+}
+
+static void mt7628_esw_set_pvid(struct mt7628_esw *esw, unsigned int port,
+				unsigned int pvid)
+{
+	regmap_update_bits(esw->regmap, MT7628_ESW_REG_PVIDC(port),
+			   MT7628_ESW_PVID_MASK(port),
+			   MT7628_ESW_PVID_PREP(port, pvid));
+}
+
+static void mt7628_esw_set_vlan_id(struct mt7628_esw *esw, unsigned int vlan,
+				   unsigned int vid)
+{
+	regmap_update_bits(esw->regmap, MT7628_ESW_REG_VLANI(vlan),
+			   MT7628_ESW_VID_MASK(vlan),
+			   MT7628_ESW_VID_PREP(vlan, vid));
+}
+
+static void mt7628_esw_set_vmsc(struct mt7628_esw *esw, unsigned int vlan,
+				unsigned int msc)
+{
+	regmap_update_bits(esw->regmap, MT7628_ESW_REG_VMSC(vlan),
+			   MT7628_ESW_VMSC_MASK(vlan),
+			   MT7628_ESW_VMSC_PREP(vlan, msc));
+}
+
+static void mt7628_esw_set_vub(struct mt7628_esw *esw, unsigned int vlan,
+			       unsigned int vub)
+{
+	regmap_update_bits(esw->regmap, MT7628_ESW_REG_VUB(vlan),
+			   MT7628_ESW_VUB_MASK(vlan),
+			   MT7628_ESW_VUB_PREP(vlan, vub));
+}
+
+static void mt7628_vlan_sync(struct dsa_switch *ds)
+{
+	struct mt7628_esw *esw = ds->priv;
+	int i;
+
+	for (i = 0; i < MT7628_NUM_VLANS; i++) {
+		struct mt7628_vlan *vlan = &esw->vlans[i];
+
+		mt7628_esw_set_vmsc(esw, i, vlan->members);
+		mt7628_esw_set_vlan_id(esw, i, vlan->vid);
+		mt7628_esw_set_vub(esw, i, vlan->untag);
+	}
+
+	for (i = 0; i < ds->num_ports; i++)
+		mt7628_esw_set_pvid(esw, i, esw->tag_8021q_pvid[i]);
+}
+
+static int mt7628_setup(struct dsa_switch *ds)
+{
+	struct mt7628_esw *esw = ds->priv;
+	int ret;
+
+	ret = reset_control_reset(esw->rst_esw);
+	if (ret)
+		return ret;
+	usleep_range(1000, 2000);
+
+	ret = reset_control_reset(esw->rst_ephy);
+	if (ret)
+		return ret;
+	usleep_range(1000, 2000);
+	/*
+	 * all MMIO reads hang if esw is not out of reset
+	 * ephy needs extra time to get out of reset or it ends up misconfigured
+	 */
+
+	mt7628_switch_init(ds);
+
+	ret = mt7628_setup_internal_mdio(ds);
+	if (ret)
+		return ret;
+
+	rtnl_lock();
+	ret = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q));
+	rtnl_unlock();
+
+	return ret;
+}
+
+static int mt7628_port_enable(struct dsa_switch *ds, int port,
+			      struct phy_device *phy)
+{
+	struct mt7628_esw *esw = ds->priv;
+
+	regmap_clear_bits(esw->regmap, MT7628_ESW_REG_POC0,
+			  FIELD_PREP(MT7628_ESW_POC0_PORT_DISABLE, BIT(port)));
+	return 0;
+}
+
+static void mt7628_port_disable(struct dsa_switch *ds, int port)
+{
+	struct mt7628_esw *esw = ds->priv;
+
+	regmap_set_bits(esw->regmap, MT7628_ESW_REG_POC0,
+			FIELD_PREP(MT7628_ESW_POC0_PORT_DISABLE, BIT(port)));
+}
+
+static enum dsa_tag_protocol
+mt7628_get_tag_proto(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp)
+{
+	return DSA_TAG_PROTO_MT7628;
+}
+
+static void mt7628_phylink_get_caps(struct dsa_switch *ds, int port,
+				    struct phylink_config *config)
+{
+	switch (port) {
+	case 6:
+		config->mac_capabilities |= MAC_1000;
+		fallthrough;
+	case 0 ... 4:
+		config->mac_capabilities |= MAC_100 | MAC_10;
+		__set_bit(PHY_INTERFACE_MODE_INTERNAL,
+			  config->supported_interfaces);
+		break;
+	default:
+		break;		/* port 5 does not exist on MT7628 */
+	}
+}
+
+static int mt7628_dsa_8021q_vlan_add(struct dsa_switch *ds, int port,
+				     u16 vid, u16 flags)
+{
+	struct mt7628_esw *esw = ds->priv;
+	struct mt7628_vlan *vlan = NULL;
+	int i;
+
+	for (i = 0; i < MT7628_NUM_VLANS; i++) {
+		struct mt7628_vlan *check_vlan = &esw->vlans[i];
+
+		if (!check_vlan->active && !vlan)
+			vlan = check_vlan;
+
+		if (check_vlan->active && check_vlan->vid == vid) {
+			vlan = check_vlan;
+			break;
+		}
+	}
+
+	if (!vlan)
+		return -ENOSPC;
+
+	vlan->vid = vid;
+	vlan->active = true;
+	vlan->members |= BIT(port);
+
+	if (flags & BRIDGE_VLAN_INFO_PVID)
+		esw->tag_8021q_pvid[port] = vid;
+
+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
+		vlan->untag |= BIT(port);
+
+	mt7628_vlan_sync(ds);
+	return 0;
+}
+
+static int mt7628_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
+{
+	struct mt7628_esw *esw = ds->priv;
+	struct mt7628_vlan *vlan = NULL;
+	int i;
+
+	for (i = 0; i < MT7628_NUM_VLANS; i++) {
+		struct mt7628_vlan *check_vlan = &esw->vlans[i];
+
+		if (!check_vlan->active || check_vlan->vid != vid)
+			continue;
+		vlan = check_vlan;
+		break;
+	}
+	if (!vlan)
+		return -ENOENT;
+
+	if (esw->tag_8021q_pvid[port] == vid)
+		esw->tag_8021q_pvid[port] = 0;
+
+	vlan->members &= ~BIT(port);
+	vlan->untag &= ~BIT(port);
+
+	if (!vlan->members) {
+		vlan->active = false;
+		vlan->vid = 0;
+	}
+
+	mt7628_vlan_sync(ds);
+	return 0;
+}
+
+static void mt7628_teardown(struct dsa_switch *ds)
+{
+	rtnl_lock();
+	dsa_tag_8021q_unregister(ds);
+	rtnl_unlock();
+}
+
+static const struct dsa_switch_ops mt7628_switch_ops = {
+	.get_tag_protocol = mt7628_get_tag_proto,
+	.setup = mt7628_setup,
+	.teardown = mt7628_teardown,
+	.port_enable = mt7628_port_enable,
+	.port_disable = mt7628_port_disable,
+	.phylink_get_caps = mt7628_phylink_get_caps,
+	.tag_8021q_vlan_add = mt7628_dsa_8021q_vlan_add,
+	.tag_8021q_vlan_del = mt7628_dsa_8021q_vlan_del,
+};
+
+static int mt7628_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mt7628_esw *esw;
+	struct dsa_switch *ds;
+
+	ds = devm_kzalloc(&pdev->dev, sizeof(*ds), GFP_KERNEL);
+	if (!ds)
+		return -ENOMEM;
+
+	esw = devm_kzalloc(&pdev->dev, sizeof(*esw), GFP_KERNEL);
+	if (!esw)
+		return -ENOMEM;
+
+	esw->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(esw->base))
+		return PTR_ERR(esw->base);
+
+	esw->regmap = devm_regmap_init_mmio(&pdev->dev, esw->base,
+					    &mt7628_esw_regmap_cfg);
+	if (IS_ERR(esw->regmap))
+		return PTR_ERR(esw->regmap);
+
+	esw->rst_ephy = devm_reset_control_get_exclusive(&pdev->dev, "ephy");
+	if (IS_ERR(esw->rst_ephy))
+		return dev_err_probe(dev, PTR_ERR(esw->rst_ephy),
+				     "failed to get EPHY reset\n");
+
+	esw->rst_esw = devm_reset_control_get_exclusive(&pdev->dev, "esw");
+	if (IS_ERR(esw->rst_esw))
+		return dev_err_probe(dev, PTR_ERR(esw->rst_esw),
+				     "failed to get ESW reset\n");
+
+	ds->dev = dev;
+	ds->num_ports = MT7628_ESW_NUM_PORTS;
+	ds->ops = &mt7628_switch_ops;
+	ds->priv = esw;
+	esw->ds = ds;
+	esw->dev = dev;
+	dev_set_drvdata(dev, esw);
+
+	return dsa_register_switch(ds);
+}
+
+static void mt7628_remove(struct platform_device *pdev)
+{
+	struct mt7628_esw *esw = platform_get_drvdata(pdev);
+
+	if (!esw)
+		return;
+
+	dsa_unregister_switch(esw->ds);
+}
+
+static void mt7628_shutdown(struct platform_device *pdev)
+{
+	struct mt7628_esw *esw = platform_get_drvdata(pdev);
+
+	if (!esw)
+		return;
+
+	dsa_switch_shutdown(esw->ds);
+	dev_set_drvdata(&pdev->dev, NULL);
+}
+
+static const struct of_device_id mt7628_of_match[] = {
+	{ .compatible = "mediatek,mt7628-esw" },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, mt7628_of_match);
+
+static struct platform_driver mt7628_driver = {
+	.driver = {
+		   .name = "mt7628-esw",
+		   .of_match_table = mt7628_of_match,
+		    },
+	.probe = mt7628_probe,
+	.remove = mt7628_remove,
+	.shutdown = mt7628_shutdown,
+};
+
+module_platform_driver(mt7628_driver);
+
+MODULE_AUTHOR("Joris Vaisvila <joey@tinyisr.com>");
+MODULE_DESCRIPTION("Driver for Mediatek MT7628 embedded switch");
+MODULE_LICENSE("GPL");
-- 
2.54.0


^ permalink raw reply related

* [PATCH net-next v5 3/4] net: dsa: initial MT7628 tagging driver
From: Joris Vaisvila @ 2026-06-13 18:18 UTC (permalink / raw)
  To: netdev
  Cc: horms, pabeni, kuba, edumazet, davem, olteanv, Andrew Lunn,
	devicetree, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Arınç ÜNAL, Landen Chao, DENG Qingfang, Sean Wang,
	Daniel Golle, Joris Vaisvila
In-Reply-To: <20260613181845.111877-1-joey@tinyisr.com>

Add support for the MT7628 embedded switch's tag.

The MT7628 tag is merged with the VLAN TPID field when a VLAN is
appended by the switch hardware. It is not installed if the VLAN tag is
already there on ingress. Due to this hardware quirk the tag cannot be
trusted for port 0 if we don't know that the VLAN was added by the
hardware. As a workaround for this the switch is configured to always
append the port PVID tag even if the incoming packet is already tagged.
The tagging driver can then trust that the tag is always accurate and
the whole VLAN tag can be removed on ingress as it's only metadata for
the tagger.

On egress the MT7628 tag allows precise TX, but the correct VLAN tag
from tag_8021q is still appended or the switch will not forward the
packet.

Signed-off-by: Joris Vaisvila <joey@tinyisr.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
 include/net/dsa.h    |  2 +
 net/dsa/Kconfig      |  6 +++
 net/dsa/Makefile     |  1 +
 net/dsa/tag_mt7628.c | 89 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+)
 create mode 100644 net/dsa/tag_mt7628.c

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8c16ef23cc10..913d1f71e3db 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -59,6 +59,7 @@ struct tc_action;
 #define DSA_TAG_PROTO_MXL_GSW1XX_VALUE		31
 #define DSA_TAG_PROTO_MXL862_VALUE		32
 #define DSA_TAG_PROTO_NETC_VALUE		33
+#define DSA_TAG_PROTO_MT7628_VALUE		34
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
@@ -95,6 +96,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_MXL_GSW1XX	= DSA_TAG_PROTO_MXL_GSW1XX_VALUE,
 	DSA_TAG_PROTO_MXL862		= DSA_TAG_PROTO_MXL862_VALUE,
 	DSA_TAG_PROTO_NETC		= DSA_TAG_PROTO_NETC_VALUE,
+	DSA_TAG_PROTO_MT7628		= DSA_TAG_PROTO_MT7628_VALUE,
 };
 
 struct dsa_switch;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index d5e725b90d78..23b4b74004ed 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -98,6 +98,12 @@ config NET_DSA_TAG_EDSA
 	  Say Y or M if you want to enable support for tagging frames for the
 	  Marvell switches which use EtherType DSA headers.
 
+config NET_DSA_TAG_MT7628
+	tristate "Tag driver for the MT7628 embedded switch"
+	help
+	  Say Y or M if you want to enable support for tagging frames for the
+	  switch embedded in the MT7628 SoC.
+
 config NET_DSA_TAG_MTK
 	tristate "Tag driver for Mediatek switches"
 	help
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index b8c2667cd14a..d15bcf5c68f0 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
 obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
 obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
 obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
+obj-$(CONFIG_NET_DSA_TAG_MT7628) += tag_mt7628.o
 obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
 obj-$(CONFIG_NET_DSA_TAG_MXL_862XX) += tag_mxl862xx.o
 obj-$(CONFIG_NET_DSA_TAG_MXL_GSW1XX) += tag_mxl-gsw1xx.o
diff --git a/net/dsa/tag_mt7628.c b/net/dsa/tag_mt7628.c
new file mode 100644
index 000000000000..f0e346595f30
--- /dev/null
+++ b/net/dsa/tag_mt7628.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026, Joris Vaisvila <joey@tinyisr.com>
+ * MT7628 switch tag support
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/dsa/8021q.h>
+#include <net/dsa.h>
+
+#include "tag.h"
+
+/*
+ * The MT7628 tag is encoded in the VLAN TPID field.
+ * On TX the lower 6 bits encode the destination port bitmask.
+ * On RX the lower 3 bits encode the source port number.
+ *
+ * The switch hardware will not modify the TPID of an incoming packet if it is
+ * already VLAN tagged. To work around this the switch is configured to always
+ * append a tag_8021q standalone VLAN tag for each port. That means we can
+ * safely strip the outer VLAN tag after parsing it.
+ *
+ * A VLAN tag is constructed on egress to target the standalone VLAN and
+ * destination port.
+ */
+
+#define MT7628_TAG_NAME "mt7628"
+
+#define MT7628_TAG_TX_PORT GENMASK(5, 0)
+#define MT7628_TAG_RX_PORT GENMASK(2, 0)
+#define MT7628_TAG_LEN 4
+
+static struct sk_buff *mt7628_tag_xmit(struct sk_buff *skb,
+				       struct net_device *dev)
+{
+	struct dsa_port *dp;
+	u16 xmit_vlan;
+	__be16 *tag;
+
+	dp = dsa_user_to_port(dev);
+	xmit_vlan = dsa_tag_8021q_standalone_vid(dp);
+
+	skb_push(skb, MT7628_TAG_LEN);
+	dsa_alloc_etype_header(skb, MT7628_TAG_LEN);
+
+	tag = dsa_etype_header_pos_tx(skb);
+
+	tag[0] = htons(ETH_P_8021Q |
+		       FIELD_PREP(MT7628_TAG_TX_PORT,
+				  dsa_xmit_port_mask(skb, dev)));
+	tag[1] = htons(xmit_vlan);
+
+	return skb;
+}
+
+static struct sk_buff *mt7628_tag_rcv(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	__be16 *phdr;
+
+	if (unlikely(!pskb_may_pull(skb, MT7628_TAG_LEN)))
+		return NULL;
+
+	phdr = dsa_etype_header_pos_rx(skb);
+	skb->dev =
+	    dsa_conduit_find_user(dev, 0,
+				  FIELD_GET(MT7628_TAG_RX_PORT, ntohs(*phdr)));
+	if (!skb->dev)
+		return NULL;
+
+	skb_pull_rcsum(skb, MT7628_TAG_LEN);
+	dsa_strip_etype_header(skb, MT7628_TAG_LEN);
+	dsa_default_offload_fwd_mark(skb);
+	return skb;
+}
+
+static const struct dsa_device_ops mt7628_tag_ops = {
+	.name = MT7628_TAG_NAME,
+	.proto = DSA_TAG_PROTO_MT7628,
+	.xmit = mt7628_tag_xmit,
+	.rcv = mt7628_tag_rcv,
+	.needed_headroom = MT7628_TAG_LEN,
+};
+
+module_dsa_tag_driver(mt7628_tag_ops);
+
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_MT7628, MT7628_TAG_NAME);
+MODULE_DESCRIPTION("DSA tag driver for MT7628 switch");
+MODULE_LICENSE("GPL");
-- 
2.54.0


^ permalink raw reply related

* [PATCH net-next v5 2/4] net: phy: mediatek: add phy driver for MT7628 built-in Fast Ethernet PHYs
From: Joris Vaisvila @ 2026-06-13 18:18 UTC (permalink / raw)
  To: netdev
  Cc: horms, pabeni, kuba, edumazet, davem, olteanv, Andrew Lunn,
	devicetree, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Arınç ÜNAL, Landen Chao, DENG Qingfang, Sean Wang,
	Daniel Golle, Joris Vaisvila
In-Reply-To: <20260613181845.111877-1-joey@tinyisr.com>

The Fast Ethernet PHYs present in the MT7628 SoCs require an
undocumented bit to be set before they can establish 100mbps links.

This commit adds the Kconfig option MEDIATEK_FE_SOC_PHY and the
corresponding driver mtk-fe-soc.c.

Signed-off-by: Joris Vaisvila <joey@tinyisr.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
 drivers/net/phy/mediatek/Kconfig      | 10 +++++-
 drivers/net/phy/mediatek/Makefile     |  1 +
 drivers/net/phy/mediatek/mtk-fe-soc.c | 50 +++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/phy/mediatek/mtk-fe-soc.c

diff --git a/drivers/net/phy/mediatek/Kconfig b/drivers/net/phy/mediatek/Kconfig
index bb7dc876271e..b6a51f38c358 100644
--- a/drivers/net/phy/mediatek/Kconfig
+++ b/drivers/net/phy/mediatek/Kconfig
@@ -21,8 +21,16 @@ config MEDIATEK_GE_PHY
 	  common operations with MediaTek SoC built-in Gigabit
 	  Ethernet PHYs.
 
+config MEDIATEK_FE_SOC_PHY
+	tristate "MediaTek SoC Fast Ethernet PHYs"
+	help
+	  Support for MediaTek MT7628 built-in Fast Ethernet PHYs.
+	  This driver only sets an initialization bit required for the PHY
+	  to establish 100 Mbps links. All other PHY operations are handled
+	  by the kernel's generic PHY code.
+
 config MEDIATEK_GE_SOC_PHY
-	tristate "MediaTek SoC Ethernet PHYs"
+	tristate "MediaTek SoC Gigabit Ethernet PHYs"
 	depends on ARM64 || COMPILE_TEST
 	depends on ARCH_AIROHA || (ARCH_MEDIATEK && NVMEM_MTK_EFUSE) || \
 		   COMPILE_TEST
diff --git a/drivers/net/phy/mediatek/Makefile b/drivers/net/phy/mediatek/Makefile
index ac57ecc799fc..6f9cacf7f906 100644
--- a/drivers/net/phy/mediatek/Makefile
+++ b/drivers/net/phy/mediatek/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_MEDIATEK_2P5GE_PHY)	+= mtk-2p5ge.o
+obj-$(CONFIG_MEDIATEK_FE_SOC_PHY)	+= mtk-fe-soc.o
 obj-$(CONFIG_MEDIATEK_GE_PHY)		+= mtk-ge.o
 obj-$(CONFIG_MEDIATEK_GE_SOC_PHY)	+= mtk-ge-soc.o
 obj-$(CONFIG_MTK_NET_PHYLIB)		+= mtk-phy-lib.o
diff --git a/drivers/net/phy/mediatek/mtk-fe-soc.c b/drivers/net/phy/mediatek/mtk-fe-soc.c
new file mode 100644
index 000000000000..9eb4960bcaad
--- /dev/null
+++ b/drivers/net/phy/mediatek/mtk-fe-soc.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for MT7628 Embedded Switch internal Fast Ethernet PHYs
+ */
+#include <linux/module.h>
+#include <linux/phy.h>
+
+#define MTK_FPHY_ID_MT7628	0x03a29410
+#define MTK_EXT_PAGE_ACCESS	0x1f
+
+static int mt7628_phy_read_page(struct phy_device *phydev)
+{
+	return __phy_read(phydev, MTK_EXT_PAGE_ACCESS);
+}
+
+static int mt7628_phy_write_page(struct phy_device *phydev, int page)
+{
+	return __phy_write(phydev, MTK_EXT_PAGE_ACCESS, page);
+}
+
+static int mt7628_phy_config_init(struct phy_device *phydev)
+{
+	/*
+	 * This undocumented bit is required for the PHYs to be able to
+	 * establish 100mbps links.
+	 */
+	return phy_modify_paged(phydev, 0x8000, 30, BIT(13), BIT(13));
+}
+
+static struct phy_driver mtk_soc_fe_phy_driver[] = {
+	{
+		PHY_ID_MATCH_EXACT(MTK_FPHY_ID_MT7628),
+		.name = "MediaTek MT7628 PHY",
+		.config_init = mt7628_phy_config_init,
+		.read_page = mt7628_phy_read_page,
+		.write_page = mt7628_phy_write_page,
+	},
+};
+
+module_phy_driver(mtk_soc_fe_phy_driver);
+static const struct mdio_device_id __maybe_unused mtk_soc_fe_phy_tbl[] = {
+	{ PHY_ID_MATCH_EXACT(MTK_FPHY_ID_MT7628) },
+	{ }
+};
+
+MODULE_DESCRIPTION("MediaTek SoC Fast Ethernet PHY driver");
+MODULE_AUTHOR("Joris Vaisvila <joey@tinyisr.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_DEVICE_TABLE(mdio, mtk_soc_fe_phy_tbl);
-- 
2.54.0


^ permalink raw reply related

* [PATCH net-next v5 1/4] dt-bindings: net: dsa: add MT7628 ESW
From: Joris Vaisvila @ 2026-06-13 18:18 UTC (permalink / raw)
  To: netdev
  Cc: horms, pabeni, kuba, edumazet, davem, olteanv, Andrew Lunn,
	devicetree, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Arınç ÜNAL, Landen Chao, DENG Qingfang, Sean Wang,
	Daniel Golle, Joris Vaisvila, Krzysztof Kozlowski
In-Reply-To: <20260613181845.111877-1-joey@tinyisr.com>

Add device tree bindings for the MediaTek MT7628 embedded Ethernet
Switch.

The Switch provides 5 external user ports and 1 internal CPU port, with
integrated 10/100 PHYs and fixed port to PHY mapping.

The CPU port is internally connected and uses port index 6.

Signed-off-by: Joris Vaisvila <joey@tinyisr.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
---
 .../bindings/net/dsa/mediatek,mt7628-esw.yaml | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/dsa/mediatek,mt7628-esw.yaml

diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7628-esw.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7628-esw.yaml
new file mode 100644
index 000000000000..e0e7ffef6648
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7628-esw.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/dsa/mediatek,mt7628-esw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT7628 Embedded Ethernet Switch
+
+maintainers:
+  - Joris Vaisvila <joey@tinyisr.com>
+
+description:
+  The MT7628 SoC's built-in Ethernet Switch has five user ports and one
+  internally connected CPU port. The user ports are all connected to the SoC's
+  integrated Fast Ethernet PHYs. The switch registers are directly mapped in
+  the SoC's memory.
+
+allOf:
+  - $ref: dsa.yaml#/$defs/ethernet-ports
+
+properties:
+  compatible:
+    const: mediatek,mt7628-esw
+
+  reg:
+    maxItems: 1
+
+  resets:
+    items:
+      - description: internal switch block reset
+      - description: internal phy package reset
+
+  reset-names:
+    items:
+      - const: esw
+      - const: ephy
+
+required:
+  - compatible
+  - reg
+  - resets
+  - reset-names
+  - ethernet-ports
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    switch@10110000 {
+        compatible = "mediatek,mt7628-esw";
+        reg = <0x10110000 0x8000>;
+
+        resets = <&sysc 23>, <&sysc 24>;
+        reset-names = "esw", "ephy";
+
+        ethernet-ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ethernet-port@0 {
+                reg = <0>;
+                phy-mode = "internal";
+            };
+
+            ethernet-port@1 {
+                reg = <1>;
+                phy-mode = "internal";
+            };
+
+            ethernet-port@2 {
+                reg = <2>;
+                phy-mode = "internal";
+            };
+
+            ethernet-port@3 {
+                reg = <3>;
+                phy-mode = "internal";
+            };
+
+            ethernet-port@4 {
+                reg = <4>;
+                phy-mode = "internal";
+            };
+
+            ethernet-port@6 {
+                reg = <6>;
+                phy-mode = "internal";
+                ethernet = <&ethernet>;
+
+                fixed-link {
+                    speed = <1000>;
+                    full-duplex;
+                };
+            };
+        };
+    };
-- 
2.54.0


^ permalink raw reply related

* [PATCH net-next v5 0/4] net: dsa: mt7628 embedded switch initial support
From: Joris Vaisvila @ 2026-06-13 18:18 UTC (permalink / raw)
  To: netdev
  Cc: horms, pabeni, kuba, edumazet, davem, olteanv, Andrew Lunn,
	devicetree, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Arınç ÜNAL, Landen Chao, DENG Qingfang, Sean Wang,
	Daniel Golle, Joris Vaisvila

This patch series adds initial support for the MediaTek MT7628 Embedded
Switch.

The driver implements the basic functionality required to operate the
switch using DSA. The hardware provides five internal Fast Ethernet user
ports and one Gigabit port connected internally to the CPU MAC.

Bridge offloading is not yet supported.

Tested on an MT7628NN-based board.

Changes since v4:
	mt7628 dsa driver:
		- fixed mdiobus allocation tied to platform device while
		  being initialized in DSA switch setup
	mt7628 phy driver:
		- replaced phy_write() with phy_modify() when setting PHY init
		  bit (no functional change)
	mt7628 dt binding:
		- moved unevaluatedProperties after required block
		- removed blank line between compatible and reg in example
Link: https://lore.kernel.org/netdev/20260608192948.289745-1-joey@tinyisr.com/t/#u

Changes since v3:
	- rebased on latest net-next
	mt7628 dsa driver:
		- simplified vlan_add hardware vlan slot search
		- fixed vlan_del not removing vid from port pvid
		- separated mii_read/mii_write error handling from return
		  value parsing. Updated RD_DONE/WT_DONE bit checking
		  with clearer logic and a comment.
		- moved NET_DSA_MT7628 after NET_DSA_MT7530 in Kconfig
		- added missing reset return value checks in probe
		- fixed mt7628_switch_ops missing const specifier
		- removed mdio node parsing from of, as there is nothing
		  to configure
	mt7628 dt binding:
		- updated description to be more clear about port count
		- dropped optional mdio subnode. the switch does not
		  expose an external MDIO bus and all integrated PHY
		  access is handled by the driver.
		- removed unused switch0 label in example
Link: https://lore.kernel.org/netdev/20260428185510.261521-1-joey@tinyisr.com/t/#u

Changes since v2:
	- fix binding issues found in review
	- fix ignored dsa_tag_8021q_register return value
	- add switch teardown to clean up tag_8021q
	- fix ordering issue where mdio probe fail would leak tag_8021q
Link: https://lore.kernel.org/netdev/20260330184017.766200-1-joey@tinyisr.com/t/#u

Changes since v1:
	- changed port 6 phy-mode to internal
	- cleaned up tag_mt7628 rcv function and mask defines
	- fixed sorting error in drivers/net/dsa/ Kconfig and Makefile
	- fixed sorting error in net/dsa/ Kconfig and Makefile
	- fixed mt7628_mii_read/write return values on error
Link: https://lore.kernel.org/netdev/20260326204413.3317584-1-joey@tinyisr.com/t/#u

Thanks,
Joris

Joris Vaisvila (4):
  dt-bindings: net: dsa: add MT7628 ESW
  net: phy: mediatek: add phy driver for MT7628 built-in Fast Ethernet
    PHYs
  net: dsa: initial MT7628 tagging driver
  net: dsa: initial support for MT7628 embedded switch

 .../bindings/net/dsa/mediatek,mt7628-esw.yaml |  96 +++
 drivers/net/dsa/Kconfig                       |   8 +
 drivers/net/dsa/Makefile                      |   1 +
 drivers/net/dsa/mt7628.c                      | 649 ++++++++++++++++++
 drivers/net/phy/mediatek/Kconfig              |  10 +-
 drivers/net/phy/mediatek/Makefile             |   1 +
 drivers/net/phy/mediatek/mtk-fe-soc.c         |  50 ++
 include/net/dsa.h                             |   2 +
 net/dsa/Kconfig                               |   6 +
 net/dsa/Makefile                              |   1 +
 net/dsa/tag_mt7628.c                          |  89 +++
 11 files changed, 912 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/net/dsa/mediatek,mt7628-esw.yaml
 create mode 100644 drivers/net/dsa/mt7628.c
 create mode 100644 drivers/net/phy/mediatek/mtk-fe-soc.c
 create mode 100644 net/dsa/tag_mt7628.c

-- 
2.54.0


^ permalink raw reply

* Re: [RFC PATCH bpf-next 0/5] tcp: opportunistic loopback splice for BPF-paired sockets
From: Jakub Kicinski @ 2026-06-13 17:57 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Cong Wang, Network Development, bpf, John Fastabend,
	Jakub Sitnicki, Jiayuan Chen, Hemanth Malla, zijianzhang
In-Reply-To: <CAADnVQ+KTNKkf_Tc-RZR-g8wEfJU4qWcOPnjDbA2=PEtZsYnYg@mail.gmail.com>

On Fri, 12 Jun 2026 09:01:43 -0700 Alexei Starovoitov wrote:
> Just saying that the code is free nowadays, so whether it's 1k lines
> or 10 lines is irrelevant for the discussion.
> 
> As far as the idea goes, I think, it would be interesting in pre-AI era,
> but today splice and friends are a prime target for bugs and more bugs.
> skmsg and tcp_bpf are reeling from unfixed bugs too,
> so my take is that we should not add any new features to skmsg
> and instead deprecate what is already there.

100% agreed. There are so many unfixed skmsg bugs it's hard to know
were to start :( Kernel "intelligence" to help unoptimized applications
is particularly unappealing right now.

^ permalink raw reply

* Re: [PATCH bpf v5 1/2] bpf: Run generic devmap egress prog on private skb
From: Alexei Starovoitov @ 2026-06-13 17:53 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Sun Jian, bpf, Network Development, LKML,
	open list:KERNEL SELFTEST FRAMEWORK, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau,
	David S. Miller, Jesper Dangaard Brouer, John Fastabend,
	Stanislav Fomichev, Shuah Khan, Jiayuan Chen,
	Toke Høiland-Jørgensen, Menglong Dong, Emil Tsalapatis
In-Reply-To: <20260613102549.0061a875@kernel.org>

On Sat, Jun 13, 2026 at 10:25 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri, 12 Jun 2026 19:40:31 +0800 Sun Jian wrote:
> > Suggested-by: Jakub Kicinski <kuba@kernel.org>
>
> I did not suggest this

ohh. I didn't follow discussion closely.
Do you want me to revert the whole set or just remove that line?

^ permalink raw reply

* Re: [PATCH net-next v3 0/4] vsock: consolidate acceptq accounting into core helpers
From: patchwork-bot+netdevbpf @ 2026-06-13 17:50 UTC (permalink / raw)
  To: Raf Dickson
  Cc: netdev, virtualization, pabeni, sgarzare, stefanha, bryan-bt.tan,
	vishnu.dasa, bcm-kernel-feedback-list, bobbyeshleman, leonardi,
	horms, edumazet, kuba
In-Reply-To: <20260612045216.105796-1-rafdog35@gmail.com>

Hello:

This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 12 Jun 2026 04:52:12 +0000 you wrote:
> These patches follow up on commit c05fa14db43e
> ("vsock/vmci: fix sk_ack_backlog leak on failed handshake")
> by consolidating sk_acceptq_added() and sk_acceptq_removed() into
> the core vsock helpers so transports cannot forget them.
> 
> Changes since v2:
>   - Add vsock_pending_to_accept() helper for the vmci pending->accept
>     transition, avoiding a double sk_acceptq_added() (Stefano Garzarella)
>   - Split into 4 patches for bisectability (Stefano Garzarella)
>   - Fold sk_acceptq_added() into vsock_add_pending() as a separate patch
> 
> [...]

Here is the summary with links:
  - [net-next,v3,1/4] vsock: introduce vsock_pending_to_accept() helper
    https://git.kernel.org/netdev/net-next/c/77eee189397d
  - [net-next,v3,2/4] vsock: fold sk_acceptq_added() into vsock_add_pending()
    https://git.kernel.org/netdev/net-next/c/a6fd2cfdcdf5
  - [net-next,v3,3/4] vsock: fold sk_acceptq_added() into vsock_enqueue_accept()
    https://git.kernel.org/netdev/net-next/c/6f6f9b65a991
  - [net-next,v3,4/4] vsock: fold sk_acceptq_removed() into vsock_remove_pending()
    https://git.kernel.org/netdev/net-next/c/27fc25bb82e6

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net v3] net: wwan: t7xx: check skb_clone in control TX
From: patchwork-bot+netdevbpf @ 2026-06-13 17:50 UTC (permalink / raw)
  To: Ruoyu Wang
  Cc: chandrashekar.devegowda, haijun.liu, ricardo.martinez,
	loic.poulain, ryazanov.s.a, johannes, andrew+netdev, davem,
	edumazet, kuba, pabeni, netdev, linux-kernel
In-Reply-To: <20260612035613.1192486-1-ruoyuw560@gmail.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 12 Jun 2026 11:56:13 +0800 you wrote:
> t7xx_port_ctrl_tx() clones each skb fragment before passing it to the
> port transmit path. The clone is used immediately to set cloned->len, so
> an skb_clone() failure results in a NULL pointer dereference.
> 
> Check the clone before using it. If previous fragments were already
> queued, preserve the driver's existing partial-write behavior by
> returning the number of bytes submitted so far.
> 
> [...]

Here is the summary with links:
  - [net,v3] net: wwan: t7xx: check skb_clone in control TX
    https://git.kernel.org/netdev/net/c/05f789fa90d9

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* [PATCH net v2 2/2] selftests/tc-testing: act_ct: add TDC test for skb cb preservation across defrag
From: Ren Wei @ 2026-06-13 17:42 UTC (permalink / raw)
  To: netdev, linux-kselftest, linux-kernel
  Cc: jhs, jiri, kuba, paulb, victor, yuantan098, yifanwucs,
	tomapufckgml, bird, xizh2024, n05ec
In-Reply-To: <cover.1781358691.git.xizh2024@lzu.edu.cn>

From: Zihan Xi <xizh2024@lzu.edu.cn>

Add a tc-testing case that sends IPv4 fragments through act_ct on clsact
egress while a root prio qdisc is present on the transmit path.

The test verifies that packet processing and qdisc accounting continue
to work after conntrack defragmentation, covering tc_skb_cb preservation
across defragmentation.

Signed-off-by: Zihan Xi <xizh2024@lzu.edu.cn>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>

---
changes in v2:
  - Add tc-testing case 9c2a for skb cb preservation across defrag
  - v1 Link: https://lore.kernel.org/all/20260611154939.2615919-1-n05ec@lzu.edu.cn/
 .../tc-testing/tc-tests/actions/ct.json       | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 33bb8f3ff8ed..da65f838bd52 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -664,5 +664,43 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress_block 21 clsact"
         ]
+    },
+    {
+        "id": "9c2a",
+        "name": "Act_ct preserves skb cb across defrag before prio dequeue",
+        "category": [
+            "actions",
+            "ct",
+            "scapy"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin",
+                "scapyPlugin"
+            ]
+        },
+        "setup": [
+            "$TC qdisc add dev $DUMMY root handle 1: prio",
+            "$TC qdisc add dev $DUMMY clsact",
+            "$TC qdisc add dev $DEV1 clsact",
+            "$TC filter add dev $DEV1 ingress protocol ip prio 1 matchall action mirred egress redirect dev $DUMMY"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 1 matchall action ct zone 1 pipe",
+        "scapy": [
+            {
+                "iface": "$DEV0",
+                "count": 1,
+                "packet": "[Ether()/frag for frag in fragment(IP(src='10.0.0.10', dst='10.0.0.1', id=1)/UDP(sport=12345, dport=9)/Raw(b'A' * 4000), fragsize=1400)]"
+            }
+        ],
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 1 '^qdisc prio 1:'",
+        "matchPattern": "Sent [1-9][0-9]* bytes [1-9][0-9]* pkt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 clsact",
+            "$TC qdisc del dev $DUMMY clsact",
+            "$TC qdisc del dev $DUMMY root handle 1:"
+        ]
     }
 ]
-- 
2.43.0


^ permalink raw reply related

* [PATCH net v2 1/2] net/sched: act_ct: preserve tc_skb_cb across defragmentation
From: Ren Wei @ 2026-06-13 17:42 UTC (permalink / raw)
  To: netdev
  Cc: jhs, jiri, kuba, paulb, victor, yuantan098, yifanwucs,
	tomapufckgml, bird, xizh2024, n05ec
In-Reply-To: <cover.1781358691.git.xizh2024@lzu.edu.cn>

From: Zihan Xi <xizh2024@lzu.edu.cn>

tcf_ct_handle_fragments() calls nf_ct_handle_fragments() without saving
and restoring skb->cb. The defrag helper clears IPCB/IP6CB, which aliases
the tc_skb_cb/qdisc_skb_cb control buffer. Fragmented traffic through
act_ct therefore loses qdisc metadata such as pkt_segs and can trigger
WARN_ON_ONCE() in qdisc_pkt_segs() when panic_on_warn is enabled.

Save and restore the full tc_skb_cb around nf_ct_handle_fragments(),
matching the pattern used by ovs_ct_handle_fragments().

Fixes: ec624fe740b4 ("net/sched: Extend qdisc control block with tc control block")
Cc: stable@vger.kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Assisted-by: Codex:gpt-5.4
Signed-off-by: Zihan Xi <xizh2024@lzu.edu.cn>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
changes in v2:
  - Add TDC selftest in patch 2 per maintainer feedback
  - v1 Link: https://lore.kernel.org/all/20260611154939.2615919-1-n05ec@lzu.edu.cn/
 net/sched/act_ct.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 6158e13c98d3..ebd40daf05a6 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -845,10 +845,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
+	struct tc_skb_cb cb;
 	int err = 0;
 	bool frag;
 	u8 proto;
-	u16 mru;
 
 	/* Previously seen (loopback)? Ignore. */
 	ct = nf_ct_get(skb, &ctinfo);
@@ -862,12 +862,13 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 	if (err || !frag)
 		return err;
 
-	err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
+	cb = *tc_skb_cb(skb);
+	err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &cb.mru);
 	if (err)
 		return err;
 
 	*defrag = true;
-	tc_skb_cb(skb)->mru = mru;
+	*tc_skb_cb(skb) = cb;
 
 	return 0;
 }
-- 
2.43.0


^ permalink raw reply related

* [PATCH net v2 0/2] net/sched: act_ct: preserve tc_skb_cb across defragmentation
From: Ren Wei @ 2026-06-13 17:42 UTC (permalink / raw)
  To: netdev, linux-kselftest, linux-kernel
  Cc: jhs, jiri, kuba, paulb, victor, yuantan098, yifanwucs,
	tomapufckgml, bird, xizh2024, n05ec

From: Zihan Xi <xizh2024@lzu.edu.cn>

Hi Linux kernel maintainers,

We found and validated an issue in net/sched/act_ct.c. The bug is
reachable when configuring TC with act_ct on a netdev (requires
CAP_NET_ADMIN). We have tested it, and the fix should not affect
other functionality.

We provide bug details, a PoC, and a crash log below.

v2 adds a tc-testing (TDC) selftest case in patch 2, per maintainer
feedback.

---- details below ----

Bug details:

tcf_ct_handle_fragments() calls nf_ct_handle_fragments() without
saving and restoring skb->cb. The defrag helper clears IPCB/IP6CB,
which aliases the tc_skb_cb/qdisc_skb_cb control buffer in
include/net/sch_generic.h. Fragmented traffic through act_ct
therefore loses qdisc metadata such as pkt_segs.

Later qdisc dequeue paths call qdisc_bstats_update() ->
qdisc_pkt_segs(). For a non-GSO skb, clobbered pkt_segs == 0 trips
DEBUG_NET_WARN_ON_ONCE() in qdisc_pkt_segs(). With panic_on_warn=1
the kernel panics.

Unlike ovs_ct_handle_fragments() in net/openvswitch/conntrack.c, the
act_ct caller only restored mru after defrag, not the full control
buffer. The attached patch saves and restores struct tc_skb_cb around
nf_ct_handle_fragments(), matching the OVS pattern.

Reproducer:

Run as root in the guest (QEMU bullseye image, eth0):

    chmod +x ./poc.sh
    ./poc.sh eth0 10.0.2.2 100

The script installs a root prio qdisc, clsact egress with "action ct",
then sends oversized UDP datagrams with PMTUD disabled to force IPv4
fragmentation through the act_ct defrag path.

We run the PoC in a 2 vCPU, 2 GB RAM x86 QEMU environment.

------BEGIN poc.sh------

#!/bin/sh
set -eu

IFACE="${1:-eth0}"
DST="${2:-10.0.2.2}"
COUNT="${3:-100}"

sysctl -w kernel.panic_on_warn=1 >/dev/null

tc qdisc del dev "$IFACE" clsact 2>/dev/null || true
tc qdisc del dev "$IFACE" root 2>/dev/null || true

tc qdisc add dev "$IFACE" root handle 1: prio
tc qdisc add dev "$IFACE" clsact
tc filter add dev "$IFACE" egress protocol ip pref 1 u32 \
	match u32 0 0 action ct zone 1 pipe

python3 - "$DST" "$COUNT" <<'PY'
import socket
import sys
import time

dst = sys.argv[1]
count = int(sys.argv[2])

IP_MTU_DISCOVER = 10
IP_PMTUDISC_DONT = 0

s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.setsockopt(socket.IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT)
payload = b"A" * 4000

for _ in range(count):
    s.sendto(payload, (dst, 9))
    time.sleep(0.01)
PY

------END poc.sh------

----BEGIN crash log----

[  549.900801][T10210] Kernel panic - not syncing: kernel: panic_on_warn set ...
[  549.901406][T10210] CPU: 2 UID: 0 PID: 10210 Comm: python3 Not tainted 7.1.0-rc1 #2 PREEMPT(full)
[  549.902720][T10210] Call Trace:
[  549.903756][T10210]  ? qdisc_dequeue_head+0x287/0x370
[  549.904713][T10210]  check_panic_on_warn+0x61/0x80
[  549.905053][T10210]  __warn+0xe8/0x330
[  549.905345][T10210]  ? qdisc_dequeue_head+0x287/0x370
[  549.909442][T10210] RIP: 0010:qdisc_dequeue_head+0x287/0x370
[  549.914217][T10210]  prio_dequeue+0x40c/0x6a0
[  549.914539][T10210]  __qdisc_run+0x170/0x1b30
[  549.915561][T10210]  __dev_queue_xmit+0x25e6/0x3ac0
[  549.920352][T10210]  ip_do_fragment+0x1188/0x19a0
[  549.924214][T10210]  udp_send_skb+0x885/0x1270
[  549.924556][T10210]  udp_sendmsg+0x13f3/0x20a0

-----END crash log-----

Best regards,
Zihan Xi

Zihan Xi (2):
  net/sched: act_ct: preserve tc_skb_cb across defragmentation
  selftests/tc-testing: act_ct: add TDC test for skb cb preservation
    across defrag

 net/sched/act_ct.c                            |  7 ++--
 .../tc-testing/tc-tests/actions/ct.json       | 38 +++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

-- 
2.43.0


^ permalink raw reply

* Re: [PATCH net-next v3 4/4] vsock: fold sk_acceptq_removed() into vsock_remove_pending()
From: Jakub Kicinski @ 2026-06-13 17:40 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: Raf Dickson, netdev, virtualization, pabeni, stefanha,
	bryan-bt.tan, vishnu.dasa, bcm-kernel-feedback-list,
	bobbyeshleman, leonardi, horms, edumazet
In-Reply-To: <aivjf4TZU4Q_s20y@sgarzare-redhat>

On Fri, 12 Jun 2026 12:48:14 +0200 Stefano Garzarella wrote:
> >@@ -773,7 +774,6 @@ static void vsock_pending_work(struct work_struct *work)
> > 	if (vsock_is_pending(sk)) {
> > 		vsock_remove_pending(listener, sk);
> >  
>      ^^
> There is an extra blank line that we can now remove here.
> 
> BTW, the code LGTM:

Since the merge window is upon us - also updated when applying.

^ permalink raw reply

* Re: [PATCH v2 net-next 0/2] netdevsim: add fake FT/CLS_FLOWER offload
From: patchwork-bot+netdevbpf @ 2026-06-13 17:40 UTC (permalink / raw)
  To: Florian Westphal
  Cc: netdev, pabeni, davem, edumazet, kuba, netfilter-devel, pablo
In-Reply-To: <20260612092209.11966-1-fw@strlen.de>

Hello:

This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 12 Jun 2026 11:22:07 +0200 you wrote:
> v2: fix up error reporting via extack
>     shellcheck cleanups
>     sort config toggles
> 
> 1) Enable nf_tables offload control plane testing in netdevsim. Tag
>    existing offload fn to allow error injection for testing rollback and abort
>    logic.
> 
> [...]

Here is the summary with links:
  - [v2,net-next,1/2] netdevsim: tc: allow to test nf_tables offload control plane code
    https://git.kernel.org/netdev/net-next/c/07ca2ab4ce84
  - [v2,net-next,2/2] selftests: netfilter: add phony nft_offload test
    https://git.kernel.org/netdev/net-next/c/5394aa0bb00d

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net-next v2] vsock/vmci: use sk_acceptq_is_full() helper
From: patchwork-bot+netdevbpf @ 2026-06-13 17:40 UTC (permalink / raw)
  To: Raf Dickson
  Cc: netdev, virtualization, pabeni, sgarzare, stefanha, bryan-bt.tan,
	vishnu.dasa, bcm-kernel-feedback-list, leonardi, horms, edumazet,
	kuba
In-Reply-To: <20260612045842.122207-1-rafdog35@gmail.com>

Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 12 Jun 2026 04:58:42 +0000 you wrote:
> Replace the open-coded backlog check with sk_acceptq_is_full().
> The helper uses > instead of >=, which is the correct comparison
> per commit 64a146513f8f ("[NET]: Revert incorrect accept queue
> backlog changes."), and adds READ_ONCE() for proper memory ordering.
> 
> Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
> Signed-off-by: Raf Dickson <rafdog35@gmail.com>
> 
> [...]

Here is the summary with links:
  - [net-next,v2] vsock/vmci: use sk_acceptq_is_full() helper
    https://git.kernel.org/netdev/net-next/c/4ff2e84ff1b3

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net] net: ethernet: mtk_wed: debugfs: correct index in wed_amsdu_show()
From: patchwork-bot+netdevbpf @ 2026-06-13 17:40 UTC (permalink / raw)
  To: Wentao Guan
  Cc: lorenzo, nbd, sujuan.chen, netdev, linux-kernel, niecheng1,
	zhanjun
In-Reply-To: <20260612064501.203058-1-guanwentao@uniontech.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 12 Jun 2026 14:45:01 +0800 you wrote:
> WED_MON_AMSDU_ENG_CNT point to different entry by 'base+n*offect' mode,
> correct the wed amsdu entry number in wed_amsdu_show().
> 
> Fixes: 3f3de094e8342 ("net: ethernet: mtk_wed: debugfs: add WED 3.0 debugfs entries")
> Assisted-by: Copilot:gpt-5.2
> Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
> 
> [...]

Here is the summary with links:
  - [net] net: ethernet: mtk_wed: debugfs: correct index in wed_amsdu_show()
    https://git.kernel.org/netdev/net/c/14a8bc41ce9e

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH v2] net: airoha: Fix error handling in airoha_ppe_flush_sram_entries()
From: patchwork-bot+netdevbpf @ 2026-06-13 17:40 UTC (permalink / raw)
  To: Wayen.Yan; +Cc: netdev, lorenzo, linux-arm-kernel, linux-mediatek
In-Reply-To: <6a2bd37a.4034e349.1b41bb.1caf@mx.google.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 12 Jun 2026 17:37:00 +0800 you wrote:
> In airoha_ppe_flush_sram_entries(), the outer "err" variable was never
> updated when the inner loop variable shadowed it, causing the function
> to always return 0 even when airoha_ppe_foe_commit_sram_entry() fails.
> 
> Drop the outer "err" variable and return directly on error, propagating
> the error code from airoha_ppe_foe_commit_sram_entry() correctly.
> 
> [...]

Here is the summary with links:
  - [v2] net: airoha: Fix error handling in airoha_ppe_flush_sram_entries()
    https://git.kernel.org/netdev/net/c/d7d81b003013

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net-next v2] vsock/vmci: use sk_acceptq_is_full() helper
From: Jakub Kicinski @ 2026-06-13 17:37 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: Raf Dickson, netdev, virtualization, pabeni, stefanha,
	bryan-bt.tan, vishnu.dasa, bcm-kernel-feedback-list, leonardi,
	horms, edumazet
In-Reply-To: <aivKma8mRjTXV0BM@sgarzare-redhat>

On Fri, 12 Jun 2026 11:03:24 +0200 Stefano Garzarella wrote:
> nit: title should be updated since now this is not just vmci
> (e.g. vsock: use sk_acceptq_is_full() helper in all transports)
> 
> Not sure if it can be fixed while applying by netdev maintainers.

Updated and applied, thanks!

^ permalink raw reply

* Re: [PATCH bpf v5 1/2] bpf: Run generic devmap egress prog on private skb
From: Jakub Kicinski @ 2026-06-13 17:25 UTC (permalink / raw)
  To: Sun Jian
  Cc: bpf, netdev, linux-kernel, linux-kselftest, ast, daniel, andrii,
	martin.lau, davem, hawk, john.fastabend, sdf, shuah, jiayuan.chen,
	toke, menglong.dong, emil
In-Reply-To: <20260612114032.244616-2-sun.jian.kdev@gmail.com>

On Fri, 12 Jun 2026 19:40:31 +0800 Sun Jian wrote:
> Suggested-by: Jakub Kicinski <kuba@kernel.org>

I did not suggest this

^ permalink raw reply

* Re: [PATCH net-next] tcp: refine tcp_sequence() for the FIN exception
From: Simon Baatz @ 2026-06-13 17:24 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, netdev, eric.dumazet
In-Reply-To: <aigpnYzdgKSF8FZ4@gandalf.schnuecks.de>

On Tue, Jun 09, 2026 at 04:56:29PM +0200, Simon Baatz wrote:
> On Mon, Jun 08, 2026 at 05:45:30PM -0700, Eric Dumazet wrote:
> > On Mon, Jun 8, 2026 at 3:12???PM Simon Baatz <gmbnomis@gmail.com> wrote:
> > >
> > > Hi Eric,
> > >
> > > On Mon, Jun 08, 2026 at 03:14:52PM +0000, Eric Dumazet wrote:
> > > > Commit 0e24d17bd966 ("tcp: implement RFC 7323 window retraction
> > > > receiver requirements") removed the special FIN case that
> > > > was added in commit 1e3bb184e941 ("tcp: re-enable acceptance of
> > > > FIN packets when RWIN is 0").
> > >
> > > Commit 0e24d17bd966 did not remove the special handling; it is still
> > > present and covered by the test "tcp_rcv_zero_wnd_fin.pkt".
> > >
> > > > If a peer sends a segment containing data and a FIN flag before
> > > > it learns about our window retraction and has a buggy TCP stack,
> > > > it might place the FIN one byte beyond what it thinks is the
> > > > right edge of the window (i.e., max_window_edge + 1).
> > >
> > > The FIN exception in tcp_data_queue() is not a generic allowance for
> > > incorrect FIN handling.  It is much more specific and only applies
> > > when:
> > >
> > > 1. the packet is in-sequence
> > > 2. RWIN == 0
> > > 3. the packet is a bare FIN
> > >
> > > > The data portion (end_seq - th->fin) will end exactly at max_window_edge.
> > > > In this case, we will drop the packet if our receive queue is not empty,
> > > > even though the data was sent within the window we previously allowed.
> > > >
> > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > Cc: Simon Baatz <gmbnomis@gmail.com>
> > > > ---
> > > >  net/ipv4/tcp_input.c | 8 +++++---
> > > >  1 file changed, 5 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > > > index ab7a4e5435a8a2cbb532d42c54af76d8541c903b..8560a9c6d38207c098d673497caf2c7652c36f5c 100644
> > > > --- a/net/ipv4/tcp_input.c
> > > > +++ b/net/ipv4/tcp_input.c
> > > > @@ -4812,18 +4812,20 @@ static enum skb_drop_reason tcp_sequence(const struct sock *sk,
> > > >                                        const struct tcphdr *th)
> > > >  {
> > > >       const struct tcp_sock *tp = tcp_sk(sk);
> > > > +     u32 seq_limit;
> > > >
> > > >       if (before(end_seq, tp->rcv_wup))
> > > >               return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
> > > >
> > > > -     if (unlikely(after(end_seq, tp->rcv_nxt + tcp_max_receive_window(tp)))) {
> > > > +     seq_limit = tp->rcv_nxt + tcp_max_receive_window(tp);
> > > > +     if (unlikely(after(end_seq, seq_limit))) {
> > > >               /* Some stacks are known to handle FIN incorrectly; allow the
> > > >                * FIN to extend beyond the window and check it in detail later.
> > > >                */
> > > > -             if (!after(end_seq - th->fin, tp->rcv_nxt + tcp_receive_window(tp)))
> > > > +             if (!after(end_seq - th->fin, seq_limit))
> > > >                       return SKB_NOT_DROPPED_YET;
> > >
> > > It is not clear which additional case this change is intended to
> > > allow.  Are you sure such a packet would not be rejected by later
> > > checks in the data path?
> > >
> > > (For the existing FIN exception, the previous condition also seems
> > > broader than necessary. Actually, it should be sufficient to use
> > > "!after(end_seq - th->fin, tp->rcv_nxt)")
> > 
> > It is possible our internal sashiko instance got this wrong.
> > 
> > <quote>
> > If tcp_max_receive_window() is greater than tcp_receive_window()
> > (i.e. the window was shrunk), and end_seq is after tcp_max_receive_window(),
> >  then end_seq - th->fin will always be after tcp_receive_window().
> > Does this mean the FIN workaround is disabled when the window has been shrunk?
> >  Should this use tcp_max_receive_window() instead?
> > </quote>
> > 
> > Can you suggest an alternative? Why using two confusing variants of
> > what should be the same stuff?
> 
> Judging from past discussions, I think you prefer us to be strict in
> tcp_sequence(), so I think we should replicate the conditions
> (in-sequence, RWIN == 0, bare FIN) for the FIN exception in
> tcp_data_queue() closely here.  I’ll send a patch that mirrors those
> conditions in tcp_sequence().

Based on the discussion on the alternative (see thread
https://lore.kernel.org/netdev/20260610-tcp_fin_more_restrictive-v1-1-eefc30d7ddd8@gmail.com/),
we want to accept the "only FIN is beyond window" case in general,
not just for the RWIN == 0 special case.

It might be worth adding a test for the new "accept data packet with
only FIN extending beyond max win" edge case; I can propose one if
helpful.

Reviewed-by: Simon Baatz <gmbnomis@gmail.com>


-- 
Simon Baatz <gmbnomis@gmail.com>

^ permalink raw reply

* Re: [PATCH v2 bpf-next/net 0/5] bpf: Support RX/TX HW timestamp proxy.
From: Jakub Kicinski @ 2026-06-13 17:20 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau,
	Stanislav Fomichev, Andrii Nakryiko, John Fastabend,
	Kumar Kartikeya Dwivedi, Eduard Zingerman, Song Liu,
	Yonghong Song, Jiri Olsa, Andrew Lunn, David S . Miller,
	Eric Dumazet, Paolo Abeni, Simon Horman, Willem de Bruijn,
	Kuniyuki Iwashima, bpf, netdev
In-Reply-To: <20260613010039.1362312-1-kuniyu@google.com>

On Sat, 13 Jun 2026 00:59:57 +0000 Kuniyuki Iwashima wrote:
> When standard socket applications are run on these hosts,
> a userspace proxy is required to mediate traffic between the
> hardware and the applications.
> 
>             +---------+                 +----------------------+
>             |  proxy  |                 |  socket application  |
>             +---------+                 +----------------------+
>               ^     ^                               ^
>   userspace   |     |                               |
>   -----------| |-----------------------------------------------
>              | |    |    +---------------------+    | skb
>              | |    `--->|  virtual interface  |<---'
>   kernel     | |   skb   +---------------------+
>   -----------| |-----------------------------------------------
>               |
>               v
>        +------------+
>        |  hardware  |
>        +------------+

The first patch looks kinda nonsensical but then I saw this diagram.
Looks like you're vibe coding an integration that makes it easier to
treat netdev as a slow path for a user networking stack.
Please tell me if I'm missing anything otherwise add my nack if you
repost.

^ permalink raw reply

* Re: [PATCH net-next] tcp: tighten the FIN exception in tcp_sequence()
From: Simon Baatz @ 2026-06-13 17:18 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jakub Kicinski, Simon Baatz via B4 Relay, Neal Cardwell,
	Kuniyuki Iwashima, David S. Miller, Paolo Abeni, Simon Horman,
	netdev, linux-kernel
In-Reply-To: <CANn89i+q3Q_k3PHvwOJfOgor4xD1YzSVYO-G+fY4+_ddkKLTQw@mail.gmail.com>

Hi Eric,

On Fri, Jun 12, 2026 at 08:47:51PM -0700, Eric Dumazet wrote:
> On Fri, Jun 12, 2026 at 4:40???PM Simon Baatz <gmbnomis@gmail.com> wrote:
> >
> > Hi Jakub,
> >
> > On Fri, Jun 12, 2026 at 03:43:55PM -0700, Jakub Kicinski wrote:
> > > On Wed, 10 Jun 2026 00:09:24 +0200 Simon Baatz via B4 Relay wrote:
> > > > From: Simon Baatz <gmbnomis@gmail.com>
> > > >
> > > > Commit 1e3bb184e941 ("tcp: re-enable acceptance of FIN packets when
> > > > RWIN is 0") added a special case in tcp_sequence() to mirror the FIN
> > > > exception in tcp_data_queue(), which accepts bare in-order FINs even
> > > > when the advertised window is zero. That behavior is not
> > > > RFC-compliant, but was introduced in commit 2bd99aef1b19 ("tcp: accept
> > > > bare FIN packets under memory pressure") to break tight FIN/ACK loops
> > > > caused by broken clients.
> > > >
> > > > However, the condition added by commit 1e3bb184e941 ("tcp: re-enable
> > > > acceptance of FIN packets when RWIN is 0") is broader than required
> > > > and allows other non-compliant packets as well.
> > > >
> > > > Tighten the tcp_sequence() FIN exception to only allow packets where
> > > > the packet is a bare in-order FIN and only the FIN flag extends beyond
> > > > tcp_max_receive_window(). In particular, this exception is only
> > > > reachable if tcp_max_receive_window() is zero. Otherwise the packet is
> > > > already accepted by the normal sequence check.
> > > >
> > > > The existing packetdrill test tcp_rcv_zero_wnd_fin.pkt exercises this
> > > > behavior already and does not need to be changed.
> > > >
> > > > Signed-off-by: Simon Baatz <gmbnomis@gmail.com>
> > >
> > > This is odd. You are sending this patch which shares a lot of
> > > similarities with Eric's patch:
> > > https://lore.kernel.org/all/20260608151452.706822-1-edumazet@google.com/
> > >
> > > Why are you submitting your own patch instead of discussing it further
> > > with Eric and letting him send v2?
> >
> > That's what I understood from Eric's reply to my comments.  He asked
> > for an alternative, so I sent this as a concrete sugggestion.
> 
> Yes this is fine, please next time includ a link to the 'other patch'
> since this discussion
> was started by someone :)

Hmm, in hindsight this is quite obvious...
 
> About your patch, I thought that it would be fine to allow a remote
> peer to add a FIN
> to a payload packet of N bytes even if RWIN == N

I just realized that RFC 9293 has an "or" in:

  RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND

  or

  RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND

(SEG.LEN includes SYN/FIN)

Linux TCP is stricter here effectively requiring both conditions
since commit 9ca48d6 ("tcp: do not accept packets beyond window"). 
Given that, it's fine to accept packets with an "additional FIN" that
fulfill the first condition.

> It seems the bug of your other stack is that the FIN can be sent with
> no payload,
> and this is a less broader case.
> 
> FIN storage (a bit) is there, we can generalize the acceptance of FIN for free ?

Agreed. Let's drop this patch and generalize the FIN handling as you suggest.

- Simon

-- 
Simon Baatz <gmbnomis@gmail.com>

^ permalink raw reply

* Re: [PATCH bpf-next v4 1/2] bpf: Fix bpf_get/setsockopt to tos for ipv4-mapped ipv6 socket
From: bot+bpf-ci @ 2026-06-13 17:07 UTC (permalink / raw)
  To: leon.hwang, bpf
  Cc: ast, daniel, andrii, eddyz87, memxor, martin.lau, song,
	yonghong.song, jolsa, emil, john.fastabend, sdf, davem, edumazet,
	kuba, pabeni, horms, shuah, leon.hwang, ihor.solodrai, netdev,
	linux-kernel, linux-kselftest, kernel-patches-bot, zhoufeng.zf,
	ast, andrii, daniel, martin.lau, eddyz87, yonghong.song, clm,
	ihor.solodrai
In-Reply-To: <20260613162443.60515-2-leon.hwang@linux.dev>

[-- Attachment #1: Type: text/plain, Size: 1105 bytes --]

> When TCP over IPv4 via INET6 API, bpf_get/setsockopt with ipv4 will
> fail, because sk->sk_family is AF_INET6. With ipv6 will success, not
> take effect, because inet_csk(sk)->icsk_af_ops is ipv6_mapped and
> use ip_queue_xmit, inet_sk(sk)->tos.
> To relax this restriction, allow getting/setting tos for those possible
> ipv4-mapped ipv6 sockets.
> Fixes: ee7f1e1302f5 ("bpf: Change bpf_setsockopt(SOL_IP) to reuse do_ip_setsockopt()")
> Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>

In an earlier review round it was suggested that "bpf_get/setsockopt
with ipv4 will fail" should read "bpf_get/setsockopt with SOL_IP will
fail", since the call uses the SOL_IP level rather than an ipv4 option.

The body here still reads "with ipv4 will fail" - was the s/ipv4/SOL_IP/
reword intended to be picked up?


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/27472657322

^ permalink raw reply

* Re: [PATCH] net: correcting section tags for .init and .exit data/functions
From: Nathan Chancellor @ 2026-06-13 17:01 UTC (permalink / raw)
  To: xur
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Neal Cardwell, Kuniyuki Iwashima, Willem de Bruijn,
	David Ahern, Ido Schimmel, Andreas Färber,
	Manivannan Sadhasivam, Nick Desaulniers, Bill Wendling,
	Justin Stitt, Maciej Żenczykowski, Yue Haibing, Jeff Layton,
	Kees Cook, Fernando Fernandez Mancera, Gustavo A. R. Silva,
	Sabrina Dubroca, Masahiro Yamada, Nicolas Schier, netdev,
	linux-kernel, linux-arm-kernel, linux-actions, llvm,
	kernel test robot
In-Reply-To: <20260612162257.896792-1-xur@google.com>

Hi Rong,

On Fri, Jun 12, 2026 at 09:22:57AM -0700, xur@google.com wrote:
> From: Rong Xu <xur@google.com>
> 
> Fix modpost warnings that have surfaced during Clang's distributed ThinLTO
> builds.
> 
>   WARNING: modpost: vmlinux: section mismatch in reference: tcp4_net_ops.llvm.4527429266264891517+0x8 (section: .data) -> tcp4_proc_init_net (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: udp4_net_ops.llvm.17425824324074326067+0x8 (section: .data) -> udp4_proc_init_net (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: ping_v4_net_ops.llvm.5641696707737373282+0x8 (section: .data) -> ping_v4_proc_init_net (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: if6_proc_net_ops.llvm.7870945277386035298+0x8 (section: .data) -> if6_proc_net_init (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: ipv6_addr_label_ops.llvm.5745897517271459135+0x8 (section: .data) -> ip6addrlbl_net_init (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: ndisc_net_ops.llvm.8806210167060761094+0x8 (section: .data) -> ndisc_net_init (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: raw6_net_ops.llvm.3743523335772203324+0x8 (section: .data) -> raw6_init_net (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: igmp6_net_ops.llvm.7071106350580158050+0x8 (section: .data) -> igmp6_net_init (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: tcpv6_net_ops.llvm.17505177970592326146+0x8 (section: .data) -> tcpv6_net_init (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: ip6_flowlabel_net_ops.llvm.6051723423336054316+0x8 (section: .data) -> ip6_flowlabel_proc_init (section: .init.text)
>   WARNING: modpost: vmlinux: section mismatch in reference: ipv6_proc_ops.llvm.7829948594772821810+0x8 (section: .data) -> ipv6_proc_init_net (section: .init.text)
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202606111233.kM8oo8Df-lkp@intel.com/
> Signed-off-by: Rong Xu <xur@google.com>

Thanks for sending this change to try and clear up those new warnings
from the distributed ThinLTO build. Based on the build reports that
appear from this change downthread, it does not seem like it is quite
right. Additionally, I think the commit message could be a little more
descriptive around the root cause of the warnings and how this patch
actually addresses it (I can infer but I think that information should
be up front and center).

> ---
>  net/ipv4/ping.c          |  6 +++---
>  net/ipv4/tcp_ipv4.c      |  6 +++---
>  net/ipv4/udp.c           |  6 +++---
>  net/ipv6/addrconf.c      |  6 +++---
>  net/ipv6/addrlabel.c     |  6 +++---
>  net/ipv6/ip6_flowlabel.c |  6 +++---
>  net/ipv6/mcast.c         | 10 +++++-----
>  net/ipv6/ndisc.c         | 10 +++++-----
>  net/ipv6/proc.c          |  6 +++---
>  net/ipv6/raw.c           |  6 +++---
>  net/ipv6/tcp_ipv6.c      |  6 +++---
>  11 files changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
> index d36f1e273fde..1dda6d661ad8 100644
> --- a/net/ipv4/ping.c
> +++ b/net/ipv4/ping.c
> @@ -1144,17 +1144,17 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net)
>  	remove_proc_entry("icmp", net->proc_net);
>  }
>  
> -static struct pernet_operations ping_v4_net_ops = {
> +static struct pernet_operations ping_v4_net_ops __net_initdata = {
>  	.init = ping_v4_proc_init_net,
>  	.exit = ping_v4_proc_exit_net,
>  };
>  
> -int __init ping_proc_init(void)
> +int __net_init ping_proc_init(void)
>  {
>  	return register_pernet_subsys(&ping_v4_net_ops);
>  }
>  
> -void ping_proc_exit(void)
> +void __net_exit ping_proc_exit(void)
>  {
>  	unregister_pernet_subsys(&ping_v4_net_ops);
>  }
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index fdc81150ff6c..9caca5879466 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -3317,17 +3317,17 @@ static void __net_exit tcp4_proc_exit_net(struct net *net)
>  	remove_proc_entry("tcp", net->proc_net);
>  }
>  
> -static struct pernet_operations tcp4_net_ops = {
> +static struct pernet_operations tcp4_net_ops __net_initdata = {
>  	.init = tcp4_proc_init_net,
>  	.exit = tcp4_proc_exit_net,
>  };
>  
> -int __init tcp4_proc_init(void)
> +int __net_init tcp4_proc_init(void)
>  {
>  	return register_pernet_subsys(&tcp4_net_ops);
>  }
>  
> -void tcp4_proc_exit(void)
> +void __net_exit tcp4_proc_exit(void)
>  {
>  	unregister_pernet_subsys(&tcp4_net_ops);
>  }
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 70f6cbd4ef73..87f4cced2114 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -3600,17 +3600,17 @@ static void __net_exit udp4_proc_exit_net(struct net *net)
>  	remove_proc_entry("udp", net->proc_net);
>  }
>  
> -static struct pernet_operations udp4_net_ops = {
> +static struct pernet_operations udp4_net_ops __net_initdata = {
>  	.init = udp4_proc_init_net,
>  	.exit = udp4_proc_exit_net,
>  };
>  
> -int __init udp4_proc_init(void)
> +int __net_init udp4_proc_init(void)
>  {
>  	return register_pernet_subsys(&udp4_net_ops);
>  }
>  
> -void udp4_proc_exit(void)
> +void __net_exit udp4_proc_exit(void)
>  {
>  	unregister_pernet_subsys(&udp4_net_ops);
>  }
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index c9e5d3e48ab9..73d9439bd408 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -4527,17 +4527,17 @@ static void __net_exit if6_proc_net_exit(struct net *net)
>  	remove_proc_entry("if_inet6", net->proc_net);
>  }
>  
> -static struct pernet_operations if6_proc_net_ops = {
> +static struct pernet_operations if6_proc_net_ops __net_initdata = {
>  	.init = if6_proc_net_init,
>  	.exit = if6_proc_net_exit,
>  };
>  
> -int __init if6_proc_init(void)
> +int __net_init if6_proc_init(void)
>  {
>  	return register_pernet_subsys(&if6_proc_net_ops);
>  }
>  
> -void if6_proc_exit(void)
> +void __net_exit if6_proc_exit(void)
>  {
>  	unregister_pernet_subsys(&if6_proc_net_ops);
>  }
> diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
> index f4b2618446bd..50f6c1b1edaa 100644
> --- a/net/ipv6/addrlabel.c
> +++ b/net/ipv6/addrlabel.c
> @@ -340,17 +340,17 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
>  	spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
>  }
>  
> -static struct pernet_operations ipv6_addr_label_ops = {
> +static struct pernet_operations ipv6_addr_label_ops __net_initdata = {
>  	.init = ip6addrlbl_net_init,
>  	.exit = ip6addrlbl_net_exit,
>  };
>  
> -int __init ipv6_addr_label_init(void)
> +int __net_init ipv6_addr_label_init(void)
>  {
>  	return register_pernet_subsys(&ipv6_addr_label_ops);
>  }
>  
> -void ipv6_addr_label_cleanup(void)
> +void __net_exit ipv6_addr_label_cleanup(void)
>  {
>  	unregister_pernet_subsys(&ipv6_addr_label_ops);
>  }
> diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
> index b1ccdf0dc646..f6980c403c68 100644
> --- a/net/ipv6/ip6_flowlabel.c
> +++ b/net/ipv6/ip6_flowlabel.c
> @@ -903,17 +903,17 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net)
>  	ip6_flowlabel_proc_fini(net);
>  }
>  
> -static struct pernet_operations ip6_flowlabel_net_ops = {
> +static struct pernet_operations ip6_flowlabel_net_ops __net_initdata = {
>  	.init = ip6_flowlabel_proc_init,
>  	.exit = ip6_flowlabel_net_exit,
>  };
>  
> -int ip6_flowlabel_init(void)
> +int __net_init ip6_flowlabel_init(void)
>  {
>  	return register_pernet_subsys(&ip6_flowlabel_net_ops);
>  }
>  
> -void ip6_flowlabel_cleanup(void)
> +void __net_exit ip6_flowlabel_cleanup(void)
>  {
>  	static_key_deferred_flush(&ipv6_flowlabel_exclusive);
>  	timer_delete(&ip6_fl_gc_timer);
> diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
> index d9b855d5191b..eef5bab1ee13 100644
> --- a/net/ipv6/mcast.c
> +++ b/net/ipv6/mcast.c
> @@ -3209,12 +3209,12 @@ static void __net_exit igmp6_net_exit(struct net *net)
>  	igmp6_proc_exit(net);
>  }
>  
> -static struct pernet_operations igmp6_net_ops = {
> +static struct pernet_operations igmp6_net_ops __net_initdata = {
>  	.init = igmp6_net_init,
>  	.exit = igmp6_net_exit,
>  };
>  
> -int __init igmp6_init(void)
> +int __net_init igmp6_init(void)
>  {
>  	int err;
>  
> @@ -3231,18 +3231,18 @@ int __init igmp6_init(void)
>  	return err;
>  }
>  
> -int __init igmp6_late_init(void)
> +int __net_init igmp6_late_init(void)
>  {
>  	return register_netdevice_notifier(&igmp6_netdev_notifier);
>  }
>  
> -void igmp6_cleanup(void)
> +void __net_exit igmp6_cleanup(void)
>  {
>  	unregister_pernet_subsys(&igmp6_net_ops);
>  	destroy_workqueue(mld_wq);
>  }
>  
> -void igmp6_late_cleanup(void)
> +void __net_exit igmp6_late_cleanup(void)
>  {
>  	unregister_netdevice_notifier(&igmp6_netdev_notifier);
>  }
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index e7ad13c5bd26..3a83280db29d 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -1994,12 +1994,12 @@ static void __net_exit ndisc_net_exit(struct net *net)
>  	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
>  }
>  
> -static struct pernet_operations ndisc_net_ops = {
> +static struct pernet_operations ndisc_net_ops __net_initdata = {
>  	.init = ndisc_net_init,
>  	.exit = ndisc_net_exit,
>  };
>  
> -int __init ndisc_init(void)
> +int __net_init ndisc_init(void)
>  {
>  	int err;
>  
> @@ -2027,17 +2027,17 @@ int __init ndisc_init(void)
>  #endif
>  }
>  
> -int __init ndisc_late_init(void)
> +int __net_init ndisc_late_init(void)
>  {
>  	return register_netdevice_notifier(&ndisc_netdev_notifier);
>  }
>  
> -void ndisc_late_cleanup(void)
> +void __net_exit ndisc_late_cleanup(void)
>  {
>  	unregister_netdevice_notifier(&ndisc_netdev_notifier);
>  }
>  
> -void ndisc_cleanup(void)
> +void __net_exit ndisc_cleanup(void)
>  {
>  #ifdef CONFIG_SYSCTL
>  	neigh_sysctl_unregister(&nd_tbl.parms);
> diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
> index 813013ca4e75..c59bade608cd 100644
> --- a/net/ipv6/proc.c
> +++ b/net/ipv6/proc.c
> @@ -298,17 +298,17 @@ static void __net_exit ipv6_proc_exit_net(struct net *net)
>  	remove_proc_entry("snmp6", net->proc_net);
>  }
>  
> -static struct pernet_operations ipv6_proc_ops = {
> +static struct pernet_operations ipv6_proc_ops __net_initdata = {
>  	.init = ipv6_proc_init_net,
>  	.exit = ipv6_proc_exit_net,
>  };
>  
> -int __init ipv6_misc_proc_init(void)
> +int __net_init ipv6_misc_proc_init(void)
>  {
>  	return register_pernet_subsys(&ipv6_proc_ops);
>  }
>  
> -void ipv6_misc_proc_exit(void)
> +void __net_exit ipv6_misc_proc_exit(void)
>  {
>  	unregister_pernet_subsys(&ipv6_proc_ops);
>  }
> diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
> index 3cc58698cbbd..fe399675b8fc 100644
> --- a/net/ipv6/raw.c
> +++ b/net/ipv6/raw.c
> @@ -1256,17 +1256,17 @@ static void __net_exit raw6_exit_net(struct net *net)
>  	remove_proc_entry("raw6", net->proc_net);
>  }
>  
> -static struct pernet_operations raw6_net_ops = {
> +static struct pernet_operations raw6_net_ops __net_initdata = {
>  	.init = raw6_init_net,
>  	.exit = raw6_exit_net,
>  };
>  
> -int __init raw6_proc_init(void)
> +int __net_init raw6_proc_init(void)
>  {
>  	return register_pernet_subsys(&raw6_net_ops);
>  }
>  
> -void raw6_proc_exit(void)
> +void __net_exit raw6_proc_exit(void)
>  {
>  	unregister_pernet_subsys(&raw6_net_ops);
>  }
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 36d75fb50a70..d0737f16076b 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2335,12 +2335,12 @@ static void __net_exit tcpv6_net_exit(struct net *net)
>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
>  }
>  
> -static struct pernet_operations tcpv6_net_ops = {
> +static struct pernet_operations tcpv6_net_ops __net_initdata = {
>  	.init	    = tcpv6_net_init,
>  	.exit	    = tcpv6_net_exit,
>  };
>  
> -int __init tcpv6_init(void)
> +int __net_init tcpv6_init(void)
>  {
>  	int ret;
>  
> @@ -2378,7 +2378,7 @@ int __init tcpv6_init(void)
>  	goto out;
>  }
>  
> -void tcpv6_exit(void)
> +void __net_exit tcpv6_exit(void)
>  {
>  	unregister_pernet_subsys(&tcpv6_net_ops);
>  	inet6_unregister_protosw(&tcpv6_protosw);
> 
> base-commit: 2b414a95b8f7307d42173ba9e580d6d3e2bcbfce
> -- 
> 2.54.0.1136.gdb2ca164c4-goog
> 
> 

-- 
Cheers,
Nathan

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox