Netdev List

Netdev List
 help / color / mirror / Atom feed

* [Patch net-next 7/9] net: dsa: move mib->cnt_ptr reset code to ksz_common.c
From: Arun Ramadoss @ 2022-05-17  9:43 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Russell King, Woojung Huh, UNGLinuxDriver, Andrew Lunn,
	Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	David S. Miller, Jakub Kicinski, Paolo Abeni, Oleksij Rempel,
	Marek Vasut, Michael Grzeschik, Eric Dumazet
In-Reply-To: <20220517094333.27225-1-arun.ramadoss@microchip.com>

From: Prasanna Vengateshan <prasanna.vengateshan@microchip.com>

mib->cnt_ptr resetting is handled in multiple places as part of
port_init_cnt(). Hence moved mib->cnt_ptr code to ksz common layer
and removed from individual product files.

Signed-off-by: Prasanna Vengateshan <prasanna.vengateshan@microchip.com>
Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/microchip/ksz8795.c    | 2 --
 drivers/net/dsa/microchip/ksz9477.c    | 3 ---
 drivers/net/dsa/microchip/ksz_common.c | 8 +++++++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 251048ffd3d4..d6162b00e4fb 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -388,8 +388,6 @@ static void ksz8_port_init_cnt(struct ksz_device *dev, int port)
 					dropped, &mib->counters[mib->cnt_ptr]);
 		++mib->cnt_ptr;
 	}
-	mib->cnt_ptr = 0;
-	memset(mib->counters, 0, dev->info->mib_cnt * sizeof(u64));
 }
 
 static void ksz8_r_table(struct ksz_device *dev, int table, u16 addr, u64 *data)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 7cffc3388106..22ef56e2cb7b 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -274,9 +274,6 @@ static void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
 	ksz_write8(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FLUSH);
 	ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, 0);
 	mutex_unlock(&mib->cnt_mutex);
-
-	mib->cnt_ptr = 0;
-	memset(mib->counters, 0, dev->info->mib_cnt * sizeof(u64));
 }
 
 static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index c0acb12fab6d..873a6469934f 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -560,8 +560,14 @@ void ksz_init_mib_timer(struct ksz_device *dev)
 
 	INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
 
-	for (i = 0; i < dev->info->port_cnt; i++)
+	for (i = 0; i < dev->info->port_cnt; i++) {
+		struct ksz_port_mib *mib = &dev->ports[i].mib;
+
 		dev->dev_ops->port_init_cnt(dev, i);
+
+		mib->cnt_ptr = 0;
+		memset(mib->counters, 0, dev->info->mib_cnt * sizeof(u64));
+	}
 }
 EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
 
-- 
2.33.0


^ permalink raw reply related

* [Patch net-next 6/9] net: dsa: microchip: move get_strings to ksz_common
From: Arun Ramadoss @ 2022-05-17  9:43 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Russell King, Woojung Huh, UNGLinuxDriver, Andrew Lunn,
	Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	David S. Miller, Jakub Kicinski, Paolo Abeni, Oleksij Rempel,
	Marek Vasut, Michael Grzeschik, Eric Dumazet
In-Reply-To: <20220517094333.27225-1-arun.ramadoss@microchip.com>

ksz8795 and ksz9477 uses the same algorithm for copying the ethtool
strings. Hence moved to ksz_common to remove the redundant code.

Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
---
 drivers/net/dsa/microchip/ksz8795.c    | 14 +-------------
 drivers/net/dsa/microchip/ksz9477.c    | 17 +----------------
 drivers/net/dsa/microchip/ksz_common.c | 16 ++++++++++++++++
 drivers/net/dsa/microchip/ksz_common.h |  2 ++
 4 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 3490b6072641..251048ffd3d4 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -923,18 +923,6 @@ static u32 ksz8_sw_get_phy_flags(struct dsa_switch *ds, int port)
 	return 0;
 }
 
-static void ksz8_get_strings(struct dsa_switch *ds, int port,
-			     u32 stringset, uint8_t *buf)
-{
-	struct ksz_device *dev = ds->priv;
-	int i;
-
-	for (i = 0; i < dev->info->mib_cnt; i++) {
-		memcpy(buf + i * ETH_GSTRING_LEN,
-		       dev->info->mib_names[i].string, ETH_GSTRING_LEN);
-	}
-}
-
 static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member)
 {
 	u8 data;
@@ -1424,7 +1412,7 @@ static const struct dsa_switch_ops ksz8_switch_ops = {
 	.phylink_get_caps	= ksz8_get_caps,
 	.phylink_mac_link_down	= ksz_mac_link_down,
 	.port_enable		= ksz_enable_port,
-	.get_strings		= ksz8_get_strings,
+	.get_strings		= ksz_get_strings,
 	.get_ethtool_stats	= ksz_get_ethtool_stats,
 	.get_sset_count		= ksz_sset_count,
 	.port_bridge_join	= ksz_port_bridge_join,
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index d4729f0dd831..7cffc3388106 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -358,21 +358,6 @@ static int ksz9477_phy_write16(struct dsa_switch *ds, int addr, int reg,
 	return 0;
 }
 
-static void ksz9477_get_strings(struct dsa_switch *ds, int port,
-				u32 stringset, uint8_t *buf)
-{
-	struct ksz_device *dev = ds->priv;
-	int i;
-
-	if (stringset != ETH_SS_STATS)
-		return;
-
-	for (i = 0; i < dev->info->mib_cnt; i++) {
-		memcpy(buf + i * ETH_GSTRING_LEN,
-		       dev->info->mib_names[i].string, ETH_GSTRING_LEN);
-	}
-}
-
 static void ksz9477_cfg_port_member(struct ksz_device *dev, int port,
 				    u8 member)
 {
@@ -1341,7 +1326,7 @@ static const struct dsa_switch_ops ksz9477_switch_ops = {
 	.phy_write		= ksz9477_phy_write16,
 	.phylink_mac_link_down	= ksz_mac_link_down,
 	.port_enable		= ksz_enable_port,
-	.get_strings		= ksz9477_get_strings,
+	.get_strings		= ksz_get_strings,
 	.get_ethtool_stats	= ksz_get_ethtool_stats,
 	.get_sset_count		= ksz_sset_count,
 	.port_bridge_join	= ksz_port_bridge_join,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 8f90bf29fd4c..c0acb12fab6d 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -418,6 +418,22 @@ void ksz_get_stats64(struct dsa_switch *ds, int port,
 }
 EXPORT_SYMBOL_GPL(ksz_get_stats64);
 
+void ksz_get_strings(struct dsa_switch *ds, int port,
+		     u32 stringset, uint8_t *buf)
+{
+	struct ksz_device *dev = ds->priv;
+	int i;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < dev->info->mib_cnt; i++) {
+		memcpy(buf + i * ETH_GSTRING_LEN,
+		       dev->info->mib_names[i].string, ETH_GSTRING_LEN);
+	}
+}
+EXPORT_SYMBOL_GPL(ksz_get_strings);
+
 void ksz_update_port_member(struct ksz_device *dev, int port)
 {
 	struct ksz_port *p = &dev->ports[port];
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 7c0158f20d34..8b2a30ef0664 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -225,6 +225,8 @@ int ksz_port_mdb_del(struct dsa_switch *ds, int port,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct dsa_db db);
 int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+void ksz_get_strings(struct dsa_switch *ds, int port,
+		     u32 stringset, uint8_t *buf);
 
 /* Common register access functions */
 
-- 
2.33.0


^ permalink raw reply related

* [Patch net-next 2/9] net: dsa: microchip: move ksz_chip_data to ksz_common
From: Arun Ramadoss @ 2022-05-17  9:43 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Russell King, Woojung Huh, UNGLinuxDriver, Andrew Lunn,
	Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	David S. Miller, Jakub Kicinski, Paolo Abeni, Oleksij Rempel,
	Marek Vasut, Michael Grzeschik, Eric Dumazet
In-Reply-To: <20220517094333.27225-1-arun.ramadoss@microchip.com>

This patch moves the ksz_chip_data in ksz8795 and ksz9477 to ksz_common.
At present, the dev->chip_id is iterated with the ksz_chip_data and then
copy its value to the ksz_dev structure. These values are declared as
constant.
Instead of copying the values and referencing it, this patch update the
dev->info to the ksz_chip_data based on the chip_id in the init
function. And also update the ksz_chip_data values for the LAN937x based
switches.

Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
---
 drivers/net/dsa/microchip/ksz8795.c    | 114 ++-------------
 drivers/net/dsa/microchip/ksz9477.c    | 108 +++-----------
 drivers/net/dsa/microchip/ksz_common.c | 190 +++++++++++++++++++++++--
 drivers/net/dsa/microchip/ksz_common.h |  53 ++++++-
 4 files changed, 259 insertions(+), 206 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 83bcabf2dc54..b6032b65afc2 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -1036,13 +1036,13 @@ static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
 	int first, index, cnt;
 	struct ksz_port *p;
 
-	if ((uint)port < dev->port_cnt) {
+	if ((uint)port < dev->info->port_cnt) {
 		first = port;
 		cnt = port + 1;
 	} else {
 		/* Flush all ports. */
 		first = 0;
-		cnt = dev->port_cnt;
+		cnt = dev->info->port_cnt;
 	}
 	for (index = first; index < cnt; index++) {
 		p = &dev->ports[index];
@@ -1118,7 +1118,7 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
 		 * Remove Tag flag to be changed, unless there are no
 		 * other VLANs currently configured.
 		 */
-		for (vid = 1; vid < dev->num_vlans; ++vid) {
+		for (vid = 1; vid < dev->info->num_vlans; ++vid) {
 			/* Skip the VID we are going to add or reconfigure */
 			if (vid == vlan->vid)
 				continue;
@@ -1389,7 +1389,7 @@ static int ksz8_handle_global_errata(struct dsa_switch *ds)
 	 *   KSZ879x/KSZ877x/KSZ876x and some EEE link partners may result in
 	 *   the link dropping.
 	 */
-	if (dev->ksz87xx_eee_link_erratum)
+	if (dev->info->ksz87xx_eee_link_erratum)
 		ret = ksz8_ind_write8(dev, TABLE_EEE, REG_IND_EEE_GLOB2_HI, 0);
 
 	return ret;
@@ -1402,7 +1402,7 @@ static int ksz8_setup(struct dsa_switch *ds)
 	int i, ret = 0;
 
 	dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
-				       dev->num_vlans, GFP_KERNEL);
+				       dev->info->num_vlans, GFP_KERNEL);
 	if (!dev->vlan_cache)
 		return -ENOMEM;
 
@@ -1446,7 +1446,7 @@ static int ksz8_setup(struct dsa_switch *ds)
 			   (BROADCAST_STORM_VALUE *
 			   BROADCAST_STORM_PROT_RATE) / 100);
 
-	for (i = 0; i < (dev->num_vlans / 4); i++)
+	for (i = 0; i < (dev->info->num_vlans / 4); i++)
 		ksz8_r_vlan_entries(dev, i);
 
 	/* Setup STP address for STP operation. */
@@ -1571,74 +1571,6 @@ static int ksz8_switch_detect(struct ksz_device *dev)
 	return 0;
 }
 
-struct ksz_chip_data {
-	u16 chip_id;
-	const char *dev_name;
-	int num_vlans;
-	int num_alus;
-	int num_statics;
-	int cpu_ports;
-	int port_cnt;
-	bool ksz87xx_eee_link_erratum;
-};
-
-static const struct ksz_chip_data ksz8_switch_chips[] = {
-	{
-		.chip_id = 0x8795,
-		.dev_name = "KSZ8795",
-		.num_vlans = 4096,
-		.num_alus = 0,
-		.num_statics = 8,
-		.cpu_ports = 0x10,	/* can be configured as cpu port */
-		.port_cnt = 5,		/* total cpu and user ports */
-		.ksz87xx_eee_link_erratum = true,
-	},
-	{
-		/*
-		 * WARNING
-		 * =======
-		 * KSZ8794 is similar to KSZ8795, except the port map
-		 * contains a gap between external and CPU ports, the
-		 * port map is NOT continuous. The per-port register
-		 * map is shifted accordingly too, i.e. registers at
-		 * offset 0x40 are NOT used on KSZ8794 and they ARE
-		 * used on KSZ8795 for external port 3.
-		 *           external  cpu
-		 * KSZ8794   0,1,2      4
-		 * KSZ8795   0,1,2,3    4
-		 * KSZ8765   0,1,2,3    4
-		 * port_cnt is configured as 5, even though it is 4
-		 */
-		.chip_id = 0x8794,
-		.dev_name = "KSZ8794",
-		.num_vlans = 4096,
-		.num_alus = 0,
-		.num_statics = 8,
-		.cpu_ports = 0x10,	/* can be configured as cpu port */
-		.port_cnt = 5,		/* total cpu and user ports */
-		.ksz87xx_eee_link_erratum = true,
-	},
-	{
-		.chip_id = 0x8765,
-		.dev_name = "KSZ8765",
-		.num_vlans = 4096,
-		.num_alus = 0,
-		.num_statics = 8,
-		.cpu_ports = 0x10,	/* can be configured as cpu port */
-		.port_cnt = 5,		/* total cpu and user ports */
-		.ksz87xx_eee_link_erratum = true,
-	},
-	{
-		.chip_id = 0x8830,
-		.dev_name = "KSZ8863/KSZ8873",
-		.num_vlans = 16,
-		.num_alus = 0,
-		.num_statics = 8,
-		.cpu_ports = 0x4,	/* can be configured as cpu port */
-		.port_cnt = 3,
-	},
-};
-
 static int ksz8_switch_init(struct ksz_device *dev)
 {
 	struct ksz8 *ksz8 = dev->priv;
@@ -1646,30 +1578,10 @@ static int ksz8_switch_init(struct ksz_device *dev)
 
 	dev->ds->ops = &ksz8_switch_ops;
 
-	for (i = 0; i < ARRAY_SIZE(ksz8_switch_chips); i++) {
-		const struct ksz_chip_data *chip = &ksz8_switch_chips[i];
-
-		if (dev->chip_id == chip->chip_id) {
-			dev->name = chip->dev_name;
-			dev->num_vlans = chip->num_vlans;
-			dev->num_alus = chip->num_alus;
-			dev->num_statics = chip->num_statics;
-			dev->port_cnt = chip->port_cnt;
-			dev->cpu_port = fls(chip->cpu_ports) - 1;
-			dev->phy_port_cnt = dev->port_cnt - 1;
-			dev->cpu_ports = chip->cpu_ports;
-			dev->host_mask = chip->cpu_ports;
-			dev->port_mask = (BIT(dev->phy_port_cnt) - 1) |
-					 chip->cpu_ports;
-			dev->ksz87xx_eee_link_erratum =
-				chip->ksz87xx_eee_link_erratum;
-			break;
-		}
-	}
-
-	/* no switch found */
-	if (!dev->cpu_ports)
-		return -ENODEV;
+	dev->cpu_port = fls(dev->info->cpu_ports) - 1;
+	dev->host_mask = dev->info->cpu_ports;
+	dev->phy_port_cnt = dev->info->port_cnt - 1;
+	dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | dev->info->cpu_ports;
 
 	if (ksz_is_ksz88x3(dev)) {
 		ksz8->regs = ksz8863_regs;
@@ -1688,11 +1600,11 @@ static int ksz8_switch_init(struct ksz_device *dev)
 	dev->reg_mib_cnt = MIB_COUNTER_NUM;
 
 	dev->ports = devm_kzalloc(dev->dev,
-				  dev->port_cnt * sizeof(struct ksz_port),
+				  dev->info->port_cnt * sizeof(struct ksz_port),
 				  GFP_KERNEL);
 	if (!dev->ports)
 		return -ENOMEM;
-	for (i = 0; i < dev->port_cnt; i++) {
+	for (i = 0; i < dev->info->port_cnt; i++) {
 		mutex_init(&dev->ports[i].mib.cnt_mutex);
 		dev->ports[i].mib.counters =
 			devm_kzalloc(dev->dev,
@@ -1704,7 +1616,7 @@ static int ksz8_switch_init(struct ksz_device *dev)
 	}
 
 	/* set the real number of ports */
-	dev->ds->num_ports = dev->port_cnt;
+	dev->ds->num_ports = dev->info->port_cnt;
 
 	/* We rely on software untagging on the CPU port, so that we
 	 * can support both tagged and untagged VLANs
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 61dd0fa97748..c712a0011367 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -100,7 +100,7 @@ static int ksz9477_change_mtu(struct dsa_switch *ds, int port, int mtu)
 	/* Cache the per-port MTU setting */
 	dev->ports[port].max_frame = frame_size;
 
-	for (i = 0; i < dev->port_cnt; i++)
+	for (i = 0; i < dev->info->port_cnt; i++)
 		max_frame = max(max_frame, dev->ports[i].max_frame);
 
 	return regmap_update_bits(dev->regmap[1], REG_SW_MTU__2,
@@ -434,7 +434,7 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
 			   SW_FLUSH_OPTION_M << SW_FLUSH_OPTION_S,
 			   SW_FLUSH_OPTION_DYN_MAC << SW_FLUSH_OPTION_S);
 
-	if (port < dev->port_cnt) {
+	if (port < dev->info->port_cnt) {
 		/* flush individual port */
 		ksz_pread8(dev, port, P_STP_CTRL, &data);
 		if (!(data & PORT_LEARN_DISABLE))
@@ -756,7 +756,7 @@ static int ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
 
 	mutex_lock(&dev->alu_mutex);
 
-	for (index = 0; index < dev->num_statics; index++) {
+	for (index = 0; index < dev->info->num_statics; index++) {
 		/* find empty slot first */
 		data = (index << ALU_STAT_INDEX_S) |
 			ALU_STAT_READ | ALU_STAT_START;
@@ -787,7 +787,7 @@ static int ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
 	}
 
 	/* no available entry */
-	if (index == dev->num_statics) {
+	if (index == dev->info->num_statics) {
 		err = -ENOSPC;
 		goto exit;
 	}
@@ -832,7 +832,7 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
 
 	mutex_lock(&dev->alu_mutex);
 
-	for (index = 0; index < dev->num_statics; index++) {
+	for (index = 0; index < dev->info->num_statics; index++) {
 		/* find empty slot first */
 		data = (index << ALU_STAT_INDEX_S) |
 			ALU_STAT_READ | ALU_STAT_START;
@@ -861,7 +861,7 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
 	}
 
 	/* no available entry */
-	if (index == dev->num_statics)
+	if (index == dev->info->num_statics)
 		goto exit;
 
 	/* clear port */
@@ -903,7 +903,7 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
 	 * Check if any of the port is already set for sniffing
 	 * If yes, instruct the user to remove the previous entry & exit
 	 */
-	for (p = 0; p < dev->port_cnt; p++) {
+	for (p = 0; p < dev->info->port_cnt; p++) {
 		/* Skip the current sniffing port */
 		if (p == mirror->to_local_port)
 			continue;
@@ -946,7 +946,7 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
 
 
 	/* Check if any of the port is still referring to sniffer port */
-	for (p = 0; p < dev->port_cnt; p++) {
+	for (p = 0; p < dev->info->port_cnt; p++) {
 		ksz_pread8(dev, p, P_MIRROR_CTRL, &data);
 
 		if ((data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) {
@@ -1194,7 +1194,7 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 			     PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL,
 			     false);
 
-		if (dev->phy_errata_9477)
+		if (dev->info->phy_errata_9477)
 			ksz9477_phy_errata_setup(dev, port);
 	} else {
 		/* force flow control */
@@ -1259,8 +1259,9 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds)
 	struct ksz_port *p;
 	int i;
 
-	for (i = 0; i < dev->port_cnt; i++) {
-		if (dsa_is_cpu_port(ds, i) && (dev->cpu_ports & (1 << i))) {
+	for (i = 0; i < dev->info->port_cnt; i++) {
+		if (dsa_is_cpu_port(ds, i) &&
+		    (dev->info->cpu_ports & (1 << i))) {
 			phy_interface_t interface;
 			const char *prev_msg;
 			const char *prev_mode;
@@ -1304,7 +1305,7 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds)
 		}
 	}
 
-	for (i = 0; i < dev->port_cnt; i++) {
+	for (i = 0; i < dev->info->port_cnt; i++) {
 		if (i == dev->cpu_port)
 			continue;
 		p = &dev->ports[i];
@@ -1328,7 +1329,7 @@ static int ksz9477_setup(struct dsa_switch *ds)
 	int ret = 0;
 
 	dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
-				       dev->num_vlans, GFP_KERNEL);
+				       dev->info->num_vlans, GFP_KERNEL);
 	if (!dev->vlan_cache)
 		return -ENOMEM;
 
@@ -1470,96 +1471,23 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
 	return 0;
 }
 
-struct ksz_chip_data {
-	u32 chip_id;
-	const char *dev_name;
-	int num_vlans;
-	int num_alus;
-	int num_statics;
-	int cpu_ports;
-	int port_cnt;
-	bool phy_errata_9477;
-};
-
-static const struct ksz_chip_data ksz9477_switch_chips[] = {
-	{
-		.chip_id = 0x00947700,
-		.dev_name = "KSZ9477",
-		.num_vlans = 4096,
-		.num_alus = 4096,
-		.num_statics = 16,
-		.cpu_ports = 0x7F,	/* can be configured as cpu port */
-		.port_cnt = 7,		/* total physical port count */
-		.phy_errata_9477 = true,
-	},
-	{
-		.chip_id = 0x00989700,
-		.dev_name = "KSZ9897",
-		.num_vlans = 4096,
-		.num_alus = 4096,
-		.num_statics = 16,
-		.cpu_ports = 0x7F,	/* can be configured as cpu port */
-		.port_cnt = 7,		/* total physical port count */
-		.phy_errata_9477 = true,
-	},
-	{
-		.chip_id = 0x00989300,
-		.dev_name = "KSZ9893",
-		.num_vlans = 4096,
-		.num_alus = 4096,
-		.num_statics = 16,
-		.cpu_ports = 0x07,	/* can be configured as cpu port */
-		.port_cnt = 3,		/* total port count */
-	},
-	{
-		.chip_id = 0x00956700,
-		.dev_name = "KSZ9567",
-		.num_vlans = 4096,
-		.num_alus = 4096,
-		.num_statics = 16,
-		.cpu_ports = 0x7F,	/* can be configured as cpu port */
-		.port_cnt = 7,		/* total physical port count */
-		.phy_errata_9477 = true,
-	},
-};
-
 static int ksz9477_switch_init(struct ksz_device *dev)
 {
 	int i;
 
 	dev->ds->ops = &ksz9477_switch_ops;
 
-	for (i = 0; i < ARRAY_SIZE(ksz9477_switch_chips); i++) {
-		const struct ksz_chip_data *chip = &ksz9477_switch_chips[i];
-
-		if (dev->chip_id == chip->chip_id) {
-			dev->name = chip->dev_name;
-			dev->num_vlans = chip->num_vlans;
-			dev->num_alus = chip->num_alus;
-			dev->num_statics = chip->num_statics;
-			dev->port_cnt = chip->port_cnt;
-			dev->cpu_ports = chip->cpu_ports;
-			dev->phy_errata_9477 = chip->phy_errata_9477;
-
-			break;
-		}
-	}
-
-	/* no switch found */
-	if (!dev->port_cnt)
-		return -ENODEV;
-
-	dev->port_mask = (1 << dev->port_cnt) - 1;
+	dev->port_mask = (1 << dev->info->port_cnt) - 1;
 
 	dev->reg_mib_cnt = SWITCH_COUNTER_NUM;
 	dev->mib_cnt = TOTAL_SWITCH_COUNTER_NUM;
 
 	dev->ports = devm_kzalloc(dev->dev,
-				  dev->port_cnt * sizeof(struct ksz_port),
+				  dev->info->port_cnt * sizeof(struct ksz_port),
 				  GFP_KERNEL);
 	if (!dev->ports)
 		return -ENOMEM;
-	for (i = 0; i < dev->port_cnt; i++) {
+	for (i = 0; i < dev->info->port_cnt; i++) {
 		spin_lock_init(&dev->ports[i].mib.stats64_lock);
 		mutex_init(&dev->ports[i].mib.cnt_mutex);
 		dev->ports[i].mib.counters =
@@ -1572,7 +1500,7 @@ static int ksz9477_switch_init(struct ksz_device *dev)
 	}
 
 	/* set the real number of ports */
-	dev->ds->num_ports = dev->port_cnt;
+	dev->ds->num_ports = dev->info->port_cnt;
 
 	return 0;
 }
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 10f127b09e58..ebb4753051d4 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -59,6 +59,172 @@ struct ksz_stats_raw {
 	u64 tx_discards;
 };
 
+static const struct ksz_chip_data ksz_switch_chips[] = {
+	[KSZ8795] = {
+		.chip_id = KSZ8795_CHIP_ID,
+		.dev_name = "KSZ8795",
+		.num_vlans = 4096,
+		.num_alus = 0,
+		.num_statics = 8,
+		.cpu_ports = 0x10,	/* can be configured as cpu port */
+		.port_cnt = 5,		/* total cpu and user ports */
+		.ksz87xx_eee_link_erratum = true,
+	},
+
+	[KSZ8794] = {
+		/* WARNING
+		 * =======
+		 * KSZ8794 is similar to KSZ8795, except the port map
+		 * contains a gap between external and CPU ports, the
+		 * port map is NOT continuous. The per-port register
+		 * map is shifted accordingly too, i.e. registers at
+		 * offset 0x40 are NOT used on KSZ8794 and they ARE
+		 * used on KSZ8795 for external port 3.
+		 *           external  cpu
+		 * KSZ8794   0,1,2      4
+		 * KSZ8795   0,1,2,3    4
+		 * KSZ8765   0,1,2,3    4
+		 * port_cnt is configured as 5, even though it is 4
+		 */
+		.chip_id = KSZ8794_CHIP_ID,
+		.dev_name = "KSZ8794",
+		.num_vlans = 4096,
+		.num_alus = 0,
+		.num_statics = 8,
+		.cpu_ports = 0x10,	/* can be configured as cpu port */
+		.port_cnt = 5,		/* total cpu and user ports */
+		.ksz87xx_eee_link_erratum = true,
+	},
+
+	[KSZ8765] = {
+		.chip_id = KSZ8765_CHIP_ID,
+		.dev_name = "KSZ8765",
+		.num_vlans = 4096,
+		.num_alus = 0,
+		.num_statics = 8,
+		.cpu_ports = 0x10,	/* can be configured as cpu port */
+		.port_cnt = 5,		/* total cpu and user ports */
+		.ksz87xx_eee_link_erratum = true,
+	},
+
+	[KSZ8830] = {
+		.chip_id = KSZ8830_CHIP_ID,
+		.dev_name = "KSZ8863/KSZ8873",
+		.num_vlans = 16,
+		.num_alus = 0,
+		.num_statics = 8,
+		.cpu_ports = 0x4,	/* can be configured as cpu port */
+		.port_cnt = 3,
+	},
+
+	[KSZ9477] = {
+		.chip_id = KSZ9477_CHIP_ID,
+		.dev_name = "KSZ9477",
+		.num_vlans = 4096,
+		.num_alus = 4096,
+		.num_statics = 16,
+		.cpu_ports = 0x7F,	/* can be configured as cpu port */
+		.port_cnt = 7,		/* total physical port count */
+		.phy_errata_9477 = true,
+	},
+
+	[KSZ9897] = {
+		.chip_id = KSZ9897_CHIP_ID,
+		.dev_name = "KSZ9897",
+		.num_vlans = 4096,
+		.num_alus = 4096,
+		.num_statics = 16,
+		.cpu_ports = 0x7F,	/* can be configured as cpu port */
+		.port_cnt = 7,		/* total physical port count */
+		.phy_errata_9477 = true,
+	},
+
+	[KSZ9893] = {
+		.chip_id = KSZ9893_CHIP_ID,
+		.dev_name = "KSZ9893",
+		.num_vlans = 4096,
+		.num_alus = 4096,
+		.num_statics = 16,
+		.cpu_ports = 0x07,	/* can be configured as cpu port */
+		.port_cnt = 3,		/* total port count */
+	},
+
+	[KSZ9567] = {
+		.chip_id = KSZ9567_CHIP_ID,
+		.dev_name = "KSZ9567",
+		.num_vlans = 4096,
+		.num_alus = 4096,
+		.num_statics = 16,
+		.cpu_ports = 0x7F,	/* can be configured as cpu port */
+		.port_cnt = 7,		/* total physical port count */
+		.phy_errata_9477 = true,
+	},
+
+	[LAN9370] = {
+		.chip_id = LAN9370_CHIP_ID,
+		.dev_name = "LAN9370",
+		.num_vlans = 4096,
+		.num_alus = 1024,
+		.num_statics = 256,
+		.cpu_ports = 0x10,	/* can be configured as cpu port */
+		.port_cnt = 5,		/* total physical port count */
+	},
+
+	[LAN9371] = {
+		.chip_id = LAN9371_CHIP_ID,
+		.dev_name = "LAN9371",
+		.num_vlans = 4096,
+		.num_alus = 1024,
+		.num_statics = 256,
+		.cpu_ports = 0x30,	/* can be configured as cpu port */
+		.port_cnt = 6,		/* total physical port count */
+	},
+
+	[LAN9372] = {
+		.chip_id = LAN9372_CHIP_ID,
+		.dev_name = "LAN9372",
+		.num_vlans = 4096,
+		.num_alus = 1024,
+		.num_statics = 256,
+		.cpu_ports = 0x30,	/* can be configured as cpu port */
+		.port_cnt = 8,		/* total physical port count */
+	},
+
+	[LAN9373] = {
+		.chip_id = LAN9373_CHIP_ID,
+		.dev_name = "LAN9373",
+		.num_vlans = 4096,
+		.num_alus = 1024,
+		.num_statics = 256,
+		.cpu_ports = 0x38,	/* can be configured as cpu port */
+		.port_cnt = 5,		/* total physical port count */
+	},
+
+	[LAN9374] = {
+		.chip_id = LAN9374_CHIP_ID,
+		.dev_name = "LAN9374",
+		.num_vlans = 4096,
+		.num_alus = 1024,
+		.num_statics = 256,
+		.cpu_ports = 0x30,	/* can be configured as cpu port */
+		.port_cnt = 8,		/* total physical port count */
+	},
+};
+
+static const struct ksz_chip_data *ksz_lookup_info(unsigned int prod_num)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) {
+		const struct ksz_chip_data *chip = &ksz_switch_chips[i];
+
+		if (chip->chip_id == prod_num)
+			return chip;
+	}
+
+	return NULL;
+}
+
 void ksz_r_mib_stats64(struct ksz_device *dev, int port)
 {
 	struct rtnl_link_stats64 *stats;
@@ -207,7 +373,7 @@ static void ksz_mib_read_work(struct work_struct *work)
 	struct ksz_port *p;
 	int i;
 
-	for (i = 0; i < dev->port_cnt; i++) {
+	for (i = 0; i < dev->info->port_cnt; i++) {
 		if (dsa_is_unused_port(dev->ds, i))
 			continue;
 
@@ -242,7 +408,7 @@ void ksz_init_mib_timer(struct ksz_device *dev)
 
 	INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
 
-	for (i = 0; i < dev->port_cnt; i++)
+	for (i = 0; i < dev->info->port_cnt; i++)
 		dev->dev_ops->port_init_cnt(dev, i);
 }
 EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
@@ -382,7 +548,7 @@ int ksz_port_mdb_add(struct dsa_switch *ds, int port,
 	int empty = 0;
 
 	alu.port_forward = 0;
-	for (index = 0; index < dev->num_statics; index++) {
+	for (index = 0; index < dev->info->num_statics; index++) {
 		if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
 			/* Found one already in static MAC table. */
 			if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
@@ -395,11 +561,11 @@ int ksz_port_mdb_add(struct dsa_switch *ds, int port,
 	}
 
 	/* no available entry */
-	if (index == dev->num_statics && !empty)
+	if (index == dev->info->num_statics && !empty)
 		return -ENOSPC;
 
 	/* add entry */
-	if (index == dev->num_statics) {
+	if (index == dev->info->num_statics) {
 		index = empty - 1;
 		memset(&alu, 0, sizeof(alu));
 		memcpy(alu.mac, mdb->addr, ETH_ALEN);
@@ -426,7 +592,7 @@ int ksz_port_mdb_del(struct dsa_switch *ds, int port,
 	struct alu_struct alu;
 	int index;
 
-	for (index = 0; index < dev->num_statics; index++) {
+	for (index = 0; index < dev->info->num_statics; index++) {
 		if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
 			/* Found one already in static MAC table. */
 			if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
@@ -436,7 +602,7 @@ int ksz_port_mdb_del(struct dsa_switch *ds, int port,
 	}
 
 	/* no available entry */
-	if (index == dev->num_statics)
+	if (index == dev->info->num_statics)
 		goto exit;
 
 	/* clear port */
@@ -537,6 +703,7 @@ EXPORT_SYMBOL(ksz_switch_alloc);
 int ksz_switch_register(struct ksz_device *dev,
 			const struct ksz_dev_ops *ops)
 {
+	const struct ksz_chip_data *info;
 	struct device_node *port, *ports;
 	phy_interface_t interface;
 	unsigned int port_num;
@@ -567,6 +734,13 @@ int ksz_switch_register(struct ksz_device *dev,
 	if (dev->dev_ops->detect(dev))
 		return -EINVAL;
 
+	info = ksz_lookup_info(dev->chip_id);
+	if (!info)
+		return -ENODEV;
+
+	/* Update the compatible info with the probed one */
+	dev->info = info;
+
 	ret = dev->dev_ops->init(dev);
 	if (ret)
 		return ret;
@@ -574,7 +748,7 @@ int ksz_switch_register(struct ksz_device *dev,
 	/* Host port interface will be self detected, or specifically set in
 	 * device tree.
 	 */
-	for (port_num = 0; port_num < dev->port_cnt; ++port_num)
+	for (port_num = 0; port_num < dev->info->port_cnt; ++port_num)
 		dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA;
 	if (dev->dev->of_node) {
 		ret = of_get_phy_mode(dev->dev->of_node, &interface);
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 28cda79b090f..a8f73e9a63ac 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -26,6 +26,18 @@ struct ksz_port_mib {
 	struct spinlock stats64_lock;
 };
 
+struct ksz_chip_data {
+	u32 chip_id;
+	const char *dev_name;
+	int num_vlans;
+	int num_alus;
+	int num_statics;
+	int cpu_ports;
+	int port_cnt;
+	bool phy_errata_9477;
+	bool ksz87xx_eee_link_erratum;
+};
+
 struct ksz_port {
 	bool remove_tag;		/* Remove Tag flag set, for ksz8795 only */
 	int stp_state;
@@ -48,6 +60,7 @@ struct ksz_device {
 	struct dsa_switch *ds;
 	struct ksz_platform_data *pdata;
 	const char *name;
+	const struct ksz_chip_data *info;
 
 	struct mutex dev_mutex;		/* device access */
 	struct mutex regmap_mutex;	/* regmap access */
@@ -64,20 +77,13 @@ struct ksz_device {
 
 	/* chip specific data */
 	u32 chip_id;
-	int num_vlans;
-	int num_alus;
-	int num_statics;
 	int cpu_port;			/* port connected to CPU */
-	int cpu_ports;			/* port bitmap can be cpu port */
 	int phy_port_cnt;
-	int port_cnt;
 	u8 reg_mib_cnt;
 	int mib_cnt;
 	const struct mib_names *mib_names;
 	phy_interface_t compat_interface;
 	u32 regs_size;
-	bool phy_errata_9477;
-	bool ksz87xx_eee_link_erratum;
 	bool synclko_125;
 	bool synclko_disable;
 
@@ -94,6 +100,39 @@ struct ksz_device {
 	u16 port_mask;
 };
 
+/* List of supported models */
+enum ksz_model {
+	KSZ8795,
+	KSZ8794,
+	KSZ8765,
+	KSZ8830,
+	KSZ9477,
+	KSZ9897,
+	KSZ9893,
+	KSZ9567,
+	LAN9370,
+	LAN9371,
+	LAN9372,
+	LAN9373,
+	LAN9374,
+};
+
+enum ksz_chip_id {
+	KSZ8795_CHIP_ID = 0x8795,
+	KSZ8794_CHIP_ID = 0x8794,
+	KSZ8765_CHIP_ID = 0x8765,
+	KSZ8830_CHIP_ID = 0x8830,
+	KSZ9477_CHIP_ID = 0x00947700,
+	KSZ9897_CHIP_ID = 0x00989700,
+	KSZ9893_CHIP_ID = 0x00989300,
+	KSZ9567_CHIP_ID = 0x00956700,
+	LAN9370_CHIP_ID = 0x00937000,
+	LAN9371_CHIP_ID = 0x00937100,
+	LAN9372_CHIP_ID = 0x00937200,
+	LAN9373_CHIP_ID = 0x00937300,
+	LAN9374_CHIP_ID = 0x00937400,
+};
+
 struct alu_struct {
 	/* entry 1 */
 	u8	is_static:1;
-- 
2.33.0


^ permalink raw reply related

* [Patch net-next 1/9] net: dsa: microchip: ksz8795: update the port_cnt value in ksz_chip_data
From: Arun Ramadoss @ 2022-05-17  9:43 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Russell King, Woojung Huh, UNGLinuxDriver, Andrew Lunn,
	Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	David S. Miller, Jakub Kicinski, Paolo Abeni, Oleksij Rempel,
	Marek Vasut, Michael Grzeschik, Eric Dumazet
In-Reply-To: <20220517094333.27225-1-arun.ramadoss@microchip.com>

The port_cnt value in the structure is not used in the switch_init.
Instead it uses the fls(chip->cpu_port), this is due to one of port in
the ksz8794 unavailable. The cpu_port for the 8794 is 0x10, fls(0x10) =
5, hence updating it directly in the ksz_chip_data structure in order to
same with all the other switches in ksz8795.c and ksz9477.c files.

Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
---
 drivers/net/dsa/microchip/ksz8795.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index f91deea9368e..83bcabf2dc54 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -1607,6 +1607,7 @@ static const struct ksz_chip_data ksz8_switch_chips[] = {
 		 * KSZ8794   0,1,2      4
 		 * KSZ8795   0,1,2,3    4
 		 * KSZ8765   0,1,2,3    4
+		 * port_cnt is configured as 5, even though it is 4
 		 */
 		.chip_id = 0x8794,
 		.dev_name = "KSZ8794",
@@ -1614,7 +1615,7 @@ static const struct ksz_chip_data ksz8_switch_chips[] = {
 		.num_alus = 0,
 		.num_statics = 8,
 		.cpu_ports = 0x10,	/* can be configured as cpu port */
-		.port_cnt = 4,		/* total cpu and user ports */
+		.port_cnt = 5,		/* total cpu and user ports */
 		.ksz87xx_eee_link_erratum = true,
 	},
 	{
@@ -1653,7 +1654,7 @@ static int ksz8_switch_init(struct ksz_device *dev)
 			dev->num_vlans = chip->num_vlans;
 			dev->num_alus = chip->num_alus;
 			dev->num_statics = chip->num_statics;
-			dev->port_cnt = fls(chip->cpu_ports);
+			dev->port_cnt = chip->port_cnt;
 			dev->cpu_port = fls(chip->cpu_ports) - 1;
 			dev->phy_port_cnt = dev->port_cnt - 1;
 			dev->cpu_ports = chip->cpu_ports;
-- 
2.33.0


^ permalink raw reply related

* [Patch net-next 0/9] net: dsa: microchip: refactor the ksz switch init function
From: Arun Ramadoss @ 2022-05-17  9:43 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Russell King, Woojung Huh, UNGLinuxDriver, Andrew Lunn,
	Vivien Didelot, Florian Fainelli, Vladimir Oltean,
	David S. Miller, Jakub Kicinski, Paolo Abeni, Oleksij Rempel,
	Marek Vasut, Michael Grzeschik, Eric Dumazet

During the ksz_switch_register function, it calls the individual switches init
functions (ksz8795.c and ksz9477.c). Both these functions have few things in
common like, copying the chip specific data to struct ksz_dev, allocating
ksz_port memory and mib_names memory & cnt. And to add the new LAN937x series
switch, these allocations has to be replicated.
Based on the review feedback of LAN937x part support patch, refactored the
switch init function to move allocations to switch register.

Link:https://patchwork.kernel.org/project/netdevbpf/patch/20220504151755.11737-8-arun.ramadoss@microchip.com/

Changes in Patch v1
- Added the macros for the chip id
- Updated the ksz8863_smi of_device_id data
- Moved the patch 4 port allocation after the patch on mib_names to ksz_common

Changes in RFC v2
- Fixed the compilation issue

Arun Ramadoss (8):
  net: dsa: microchip: ksz8795: update the port_cnt value in
    ksz_chip_data
  net: dsa: microchip: move ksz_chip_data to ksz_common
  net: dsa: microchip: perform the compatibility check for dev probed
  net: dsa: microchip: move struct mib_names to ksz_chip_data
  net: dsa: microchip: move port memory allocation to ksz_common
  net: dsa: microchip: move get_strings to ksz_common
  net: dsa: microchip: add the phylink get_caps
  net: dsa: microchip: remove unused members in ksz_device

Prasanna Vengateshan (1):
  net: dsa: move mib->cnt_ptr reset code to ksz_common.c

 drivers/net/dsa/microchip/ksz8795.c     | 252 +-----------
 drivers/net/dsa/microchip/ksz8795_spi.c |  35 +-
 drivers/net/dsa/microchip/ksz8863_smi.c |  10 +-
 drivers/net/dsa/microchip/ksz9477.c     | 200 ++--------
 drivers/net/dsa/microchip/ksz9477_i2c.c |  30 +-
 drivers/net/dsa/microchip/ksz9477_spi.c |  30 +-
 drivers/net/dsa/microchip/ksz_common.c  | 485 +++++++++++++++++++++++-
 drivers/net/dsa/microchip/ksz_common.h  |  79 +++-
 8 files changed, 664 insertions(+), 457 deletions(-)


base-commit: 6251264fedde83ade6f0f1f7049037469dd4de0b
-- 
2.33.0


^ permalink raw reply

* Re: 答复: 答复: [PATCH bpf-next] samples/bpf: check detach prog exist or not in xdp_fwd
From: Toke Høiland-Jørgensen @ 2022-05-17  9:42 UTC (permalink / raw)
  To: shaozhengchao, bpf@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, ast@kernel.org,
	daniel@iogearbox.net, davem@davemloft.net, kuba@kernel.org,
	hawk@kernel.org, john.fastabend@gmail.com, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	kpsingh@kernel.org
  Cc: weiyongjun (A), yuehaibing
In-Reply-To: <942eaafecf074ae8a5bb336c18658453@huawei.com>

> Could I add helper function to implement this function which can check
> the program name and see if it attach to the device.

You just need to call bpf_prog_get_fd_by_id() followed by
bpf_obj_get_info_by_fd(), and the program name will be in info.name.
Here's an example in libxdp where we pull out the prog name:

https://github.com/xdp-project/xdp-tools/blob/master/lib/libxdp/libxdp.c#L1165

-Toke

^ permalink raw reply

* [PATCH nf,v2] netfilter: flowtable: fix TCP flow teardown
From: Pablo Neira Ayuso @ 2022-05-17  9:42 UTC (permalink / raw)
  To: netfilter-devel; +Cc: ozsh, nbd, fw, paulb, netdev, sven.auhagen

This patch addresses three possible problems:

1. ct gc may race to undo the timeout adjustment of the packet path, leaving
   the conntrack entry in place with the internal offload timeout (one day).

2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
   timeout is reached before the flow offload del.

3. tcp ct is always set to ESTABLISHED with a very long timeout
   in flow offload teardown/delete even though the state might be already
   CLOSED. Also as a remark we cannot assume that the FIN or RST packet
   is hitting flow table teardown as the packet might get bumped to the
   slow path in nftables.

This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
state transition.

Moreover, teturn the connection's ownership to conntrack upon teardown
by clearing the offload flag and fixing the established timeout value.
The flow table GC thread will asynchonrnously free the flow table and
hardware offload entries.

Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
which is also misleading since the flow is back to classic conntrack
path.

If nf_ct_delete() removes the entry from the conntrack table, then it
calls nf_ct_put() which decrements the refcnt. This is not a problem
because the flowtable holds a reference to the conntrack object from
flow_offload_alloc() path which is released via flow_offload_free().

This patch also updates nft_flow_offload to skip packets in SYN_RECV
state. Since we might miss or bump packets to slow path, we do not know
what will happen there while we are still in SYN_RECV, this patch
postpones offload up to the next packet which also aligns to the
existing behaviour in tc-ct.

flow_offload_teardown() does not reset the existing tcp state from
flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
path might have already update the state to CLOSE/FIN.

Joint work with Oz and Sven.

Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v2: fix nf_conntrack_tcp_established() call, reported by Oz

 net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
 net/netfilter/nft_flow_offload.c   |  3 ++-
 2 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 20b4a14e5d4e..ebdf5332e838 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);

 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 {
-	tcp->state = TCP_CONNTRACK_ESTABLISHED;
 	tcp->seen[0].td_maxwin = 0;
 	tcp->seen[1].td_maxwin = 0;
 }

-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 	if (l4num == IPPROTO_TCP) {
 		struct nf_tcp_net *tn = nf_tcp_pernet(net);

-		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+		flow_offload_fixup_tcp(&ct->proto.tcp);
+
+		timeout = tn->timeouts[ct->proto.tcp.state];
 		timeout -= tn->offload_timeout;
 	} else if (l4num == IPPROTO_UDP) {
 		struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
 }

-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
-	if (nf_ct_protonum(ct) == IPPROTO_TCP)
-		flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
-	flow_offload_fixup_ct_state(ct);
-	flow_offload_fixup_ct_timeout(ct);
-}
-
 static void flow_offload_route_release(struct flow_offload *flow)
 {
 	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -361,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 			       nf_flow_offload_rhash_params);
-
-	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
-	if (nf_flow_has_expired(flow))
-		flow_offload_fixup_ct(flow->ct);
-	else
-		flow_offload_fixup_ct_timeout(flow->ct);
-
 	flow_offload_free(flow);
 }

 void flow_offload_teardown(struct flow_offload *flow)
 {
+	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
 	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
-	flow_offload_fixup_ct_state(flow->ct);
+	flow_offload_fixup_ct(flow->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);

@@ -466,7 +447,7 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
 	if (nf_flow_has_expired(flow) ||
 	    nf_ct_is_dying(flow->ct) ||
 	    nf_flow_has_stale_dst(flow))
-		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+		flow_offload_teardown(flow);

 	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
 		if (test_bit(NF_FLOW_HW, &flow->flags)) {
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 187b8cb9a510..6f0b07fe648d 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -298,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	case IPPROTO_TCP:
 		tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
 					  sizeof(_tcph), &_tcph);
-		if (unlikely(!tcph || tcph->fin || tcph->rst))
+		if (unlikely(!tcph || tcph->fin || tcph->rst ||
+			     !nf_conntrack_tcp_established(ct)))
 			goto out;
 		break;
 	case IPPROTO_UDP:
-- 
2.30.2

^ permalink raw reply related

* Re: [PATCH net-next] net/mlxbf_gige: use eth_zero_addr() to clear mac address
From: patchwork-bot+netdevbpf @ 2022-05-17  9:40 UTC (permalink / raw)
  To: Lu Wei
  Cc: davem, edumazet, kuba, pabeni, asmaa, davthompson, limings,
	cai.huoqing, arnd, netdev, linux-kernel
In-Reply-To: <20220516033343.329178-1-luwei32@huawei.com>

Hello:

This patch was applied to netdev/net-next.git (master)
by Paolo Abeni <pabeni@redhat.com>:

On Mon, 16 May 2022 11:33:43 +0800 you wrote:
> Use eth_zero_addr() to clear mac address instead of memset().
> 
> Signed-off-by: Lu Wei <luwei32@huawei.com>
> ---
>  drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Here is the summary with links:
  - [net-next] net/mlxbf_gige: use eth_zero_addr() to clear mac address
    https://git.kernel.org/netdev/net-next/c/bcdcf2c466d3

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH] ethernet/ti: delete if NULL check befort devm_kfree
From: patchwork-bot+netdevbpf @ 2022-05-17  9:40 UTC (permalink / raw)
  To: Bernard Zhao
  Cc: davem, kuba, pabeni, 11115066, netdev, linux-kernel,
	zhaojunkui2008
In-Reply-To: <20220516015208.6526-1-bernard@vivo.com>

Hello:

This patch was applied to netdev/net-next.git (master)
by Paolo Abeni <pabeni@redhat.com>:

On Sun, 15 May 2022 18:52:05 -0700 you wrote:
> devm_kfree check the pointer, there is no need to check before
> devm_kfree call.
> This change is to cleanup the code a bit.
> 
> Signed-off-by: Bernard Zhao <bernard@vivo.com>
> ---
>  drivers/net/ethernet/ti/am65-cpsw-qos.c | 13 ++++---------
>  1 file changed, 4 insertions(+), 9 deletions(-)

Here is the summary with links:
  - ethernet/ti: delete if NULL check befort devm_kfree
    https://git.kernel.org/netdev/net-next/c/1588f5a91b16

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH nf] netfilter: flowtable: fix TCP flow teardown
From: Oz Shlomo @ 2022-05-17  9:36 UTC (permalink / raw)
  To: Pablo Neira Ayuso, netfilter-devel; +Cc: fw, sven.auhagen, nbd, netdev
In-Reply-To: <20220517091830.7276-1-pablo@netfilter.org>



On 5/17/2022 12:18 PM, Pablo Neira Ayuso wrote:
> This patch addresses three possible problems:
> 
> 1. ct gc may race to undo the timeout adjustment of the packet path, leaving
>     the conntrack entry in place with the internal offload timeout (one day).
> 
> 2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
>     timeout is reached before the flow offload del.
> 
> 3. tcp ct is always set to ESTABLISHED with a very long timeout
>     in flow offload teardown/delete even though the state might be already
>     CLOSED. Also as a remark we cannot assume that the FIN or RST packet
>     is hitting flow table teardown as the packet might get bumped to the
>     slow path in nftables.
> 
> This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
> conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
> state transition.
> 
> Moreover, teturn the connection's ownership to conntrack upon teardown
> by clearing the offload flag and fixing the established timeout value.
> The flow table GC thread will asynchonrnously free the flow table and
> hardware offload entries.
> 
> Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
> which is also misleading since the flow is back to classic conntrack
> path.
> 
> If nf_ct_delete() removes the entry from the conntrack table, then it
> calls nf_ct_put() which decrements the refcnt. This is not a problem
> because the flowtable holds a reference to the conntrack object from
> flow_offload_alloc() path which is released via flow_offload_free().
> 
> This patch also updates nft_flow_offload to skip packets in SYN_RECV
> state. Since we might miss or bump packets to slow path, we do not know
> what will happen there while we are still in SYN_RECV, this patch
> postpones offload up to the next packet which also aligns to the
> existing behaviour in tc-ct.
> 
> flow_offload_teardown() does not reset the existing tcp state from
> flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
> path might have already update the state to CLOSE/FIN.
> 
> Joint work with Oz and Sven.
> 
> Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> ---
> Everyone happy with this? Please add your Signed-off-by tag.
> 
> I'm showing as the author in this patch, but this is basically a mix and
> match of Oz's and Sven's patches.
> 
> Thanks a lot for your patience !
> 
>   net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
>   net/netfilter/nft_flow_offload.c   |  3 ++-
>   2 files changed, 9 insertions(+), 27 deletions(-)
> 
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 20b4a14e5d4e..ebdf5332e838 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
>   
>   static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
>   {
> -	tcp->state = TCP_CONNTRACK_ESTABLISHED;
>   	tcp->seen[0].td_maxwin = 0;
>   	tcp->seen[1].td_maxwin = 0;
>   }
>   
> -static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
> +static void flow_offload_fixup_ct(struct nf_conn *ct)
>   {
>   	struct net *net = nf_ct_net(ct);
>   	int l4num = nf_ct_protonum(ct);
> @@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>   	if (l4num == IPPROTO_TCP) {
>   		struct nf_tcp_net *tn = nf_tcp_pernet(net);
>   
> -		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
> +		flow_offload_fixup_tcp(&ct->proto.tcp);
> +
> +		timeout = tn->timeouts[ct->proto.tcp.state];
>   		timeout -= tn->offload_timeout;
>   	} else if (l4num == IPPROTO_UDP) {
>   		struct nf_udp_net *tn = nf_udp_pernet(net);
> @@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>   		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
>   }
>   
> -static void flow_offload_fixup_ct_state(struct nf_conn *ct)
> -{
> -	if (nf_ct_protonum(ct) == IPPROTO_TCP)
> -		flow_offload_fixup_tcp(&ct->proto.tcp);
> -}
> -
> -static void flow_offload_fixup_ct(struct nf_conn *ct)
> -{
> -	flow_offload_fixup_ct_state(ct);
> -	flow_offload_fixup_ct_timeout(ct);
> -}
> -
>   static void flow_offload_route_release(struct flow_offload *flow)
>   {
>   	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
> @@ -361,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
>   	rhashtable_remove_fast(&flow_table->rhashtable,
>   			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
>   			       nf_flow_offload_rhash_params);
> -
> -	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
> -
> -	if (nf_flow_has_expired(flow))
> -		flow_offload_fixup_ct(flow->ct);
> -	else
> -		flow_offload_fixup_ct_timeout(flow->ct);
> -
>   	flow_offload_free(flow);
>   }
>   
>   void flow_offload_teardown(struct flow_offload *flow)
>   {
> +	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
>   	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
> -
> -	flow_offload_fixup_ct_state(flow->ct);
> +	flow_offload_fixup_ct(flow->ct);
>   }
>   EXPORT_SYMBOL_GPL(flow_offload_teardown);
>   
> @@ -466,7 +447,7 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
>   	if (nf_flow_has_expired(flow) ||
>   	    nf_ct_is_dying(flow->ct) ||
>   	    nf_flow_has_stale_dst(flow))
> -		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
> +		flow_offload_teardown(flow);
>   
>   	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
>   		if (test_bit(NF_FLOW_HW, &flow->flags)) {
> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
> index 187b8cb9a510..6612ad8f1565 100644
> --- a/net/netfilter/nft_flow_offload.c
> +++ b/net/netfilter/nft_flow_offload.c
> @@ -298,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>   	case IPPROTO_TCP:
>   		tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
>   					  sizeof(_tcph), &_tcph);
> -		if (unlikely(!tcph || tcph->fin || tcph->rst))
> +		if (unlikely(!tcph || tcph->fin || tcph->rst ||
> +			     !nf_conntrack_tcp_established(&ct->proto.tcp)))

net/netfilter/nft_flow_offload.c:297:39: error: passing argument 1 of 
‘nf_conntrack_tcp_established’ from incompatible pointer type 
[-Werror=incompatible-pointer-types]
          !nf_conntrack_tcp_established(&ct->proto.tcp)))
                                       ^~~~~~~~~~~~~~

I assume this should be:
nf_conntrack_tcp_established(ct)


>   			goto out;
>   		break;
>   	case IPPROTO_UDP:

^ permalink raw reply

* Re: [PATCH wpan-next v2 05/11] net: mac802154: Bring the hability to hold the transmit queue
From: Miquel Raynal @ 2022-05-17  9:27 UTC (permalink / raw)
  To: Alexander Aring
  Cc: Stefan Schmidt, linux-wpan - ML, David S. Miller, Jakub Kicinski,
	Paolo Abeni, open list:NETWORKING [GENERAL], David Girault,
	Romuald Despres, Frederic Blain, Nicolas Schodet,
	Thomas Petazzoni
In-Reply-To: <CAB_54W605SGbkNHhOLG5WEKsvccUvJ=rBnHErcyrte8_H=rY+g@mail.gmail.com>

Hi Alex,

> > @@ -84,7 +118,7 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
> >                                       hw->phy->sifs_period * NSEC_PER_USEC,
> >                                       HRTIMER_MODE_REL);
> >         } else {
> > -               ieee802154_wake_queue(hw);
> > +               ieee802154_release_queue(local);
> >         }
> >
> >         dev_consume_skb_any(skb);
> > @@ -98,7 +132,7 @@ void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb,
> >         struct ieee802154_local *local = hw_to_local(hw);
> >
> >         local->tx_result = reason;
> > -       ieee802154_wake_queue(hw);
> > +       ieee802154_release_queue(local);
> >         dev_kfree_skb_any(skb);
> >         atomic_dec(&hw->phy->ongoing_txs);  
> 
> I am pretty sure that will end in a scheduling while atomic warning
> with hwsim. If you don't hit it you have the wrong config, you need to
> enable such warnings and have the right preemption model setting.

I was using the "desktop" kernel preemption model (voluntary), I've
switched to CONFIG_PREEMPT ("Preemptible kernel (Low-latency)"),
and enabled CONFIG_DEBUG_ATOMIC_SLEEP. You are right that we should use
a spinlock instead of a mutex here. However I don't think disabling
IRQs is necessary, so I'll switch to spin_(un)lock() calls.

> These calls xmit complete/error should even be allowed to be called
> from a hardware irq context, however I _think_ we don't have a driver
> which currently does that, but the mutex will break stuff here in the
> xmit_do() callback of netdev which hwsim is calling it from.
> 
> Please check again...

Will perform a new set of test runs with the new configuration, yes.

Thanks,
Miquèl

^ permalink raw reply

* [PATCH net v3] net: ftgmac100: Disable hardware checksum on AST2600
From: Joel Stanley @ 2022-05-17  9:22 UTC (permalink / raw)
  To: Andrew Lunn, David S . Miller, Jakub Kicinski, Andrew Jeffery
  Cc: netdev, linux-aspeed, David Wilder, Dylan Hung

The AST2600 when using the i210 NIC over NC-SI has been observed to
produce incorrect checksum results with specific MTU values. This was
first observed when sending data across a long distance set of networks.

On a local network, the following test was performed using a 1MB file of
random data.

On the receiver run this script:

 #!/bin/bash
 while [ 1 ]; do
        # Zero the stats
        nstat -r  > /dev/null
        nc -l 9899 > test-file
        # Check for checksum errors
        TcpInCsumErrors=$(nstat | grep TcpInCsumErrors)
        if [ -z "$TcpInCsumErrors" ]; then
                echo No TcpInCsumErrors
        else
                echo TcpInCsumErrors = $TcpInCsumErrors
        fi
 done

On an AST2600 system:

 # nc <IP of  receiver host> 9899 < test-file

The test was repeated with various MTU values:

 # ip link set mtu 1410 dev eth0

The observed results:

 1500 - good
 1434 - bad
 1400 - good
 1410 - bad
 1420 - good

The test was repeated after disabling tx checksumming:

 # ethtool -K eth0 tx-checksumming off

And all MTU values tested resulted in transfers without error.

An issue with the driver cannot be ruled out, however there has been no
bug discovered so far.

David has done the work to take the original bug report of slow data
transfer between long distance connections and triaged it down to this
test case.

The vendor suspects this this is a hardware issue when using NC-SI. The
fixes line refers to the patch that introduced AST2600 support.

Reported-by: David Wilder <wilder@us.ibm.com>
Reviewed-by: Dylan Hung <dylan_hung@aspeedtech.com>
Signed-off-by: Joel Stanley <joel@jms.id.au>
---
v3 modifies the wrapping of the commit message.

v2 updates the commit message with confirmation from the vendor that
this is a hardware issue, and clarifies why the commit used in the fixes

 drivers/net/ethernet/faraday/ftgmac100.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index caf48023f8ea..5231818943c6 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1928,6 +1928,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	/* AST2400  doesn't have working HW checksum generation */
 	if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
 		netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
+	/* AST2600 tx checksum with NCSI is broken */
+	if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac"))
+		netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
 	if (np && of_get_property(np, "no-hw-checksum", NULL))
 		netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM);
 	netdev->features |= netdev->hw_features;
-- 
2.35.1

^ permalink raw reply related

* Re: [PATCH v5 1/3] dt-bindings: net: adin: document phy clock output properties
From: Krzysztof Kozlowski @ 2022-05-17  9:20 UTC (permalink / raw)
  To: Josua Mayer, netdev
  Cc: alvaro.karsz, Michael Hennerich, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski,
	Alexandru Ardelean
In-Reply-To: <20220517085431.3895-1-josua@solid-run.com>

On 17/05/2022 10:54, Josua Mayer wrote:
> The ADIN1300 supports generating certain clocks on its GP_CLK pin, as
> well as providing the reference clock on CLK25_REF.
> 
> Add DT properties to configure both pins.
> 
> Technically the phy also supports a recovered 125MHz clock for
> synchronous ethernet. However SyncE should be configured dynamically at
> runtime, so it is explicitly omitted in this binding.
> 
> Signed-off-by: Josua Mayer <josua@solid-run.com>


Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>

No need to remove my tag, please keep it.


Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH net v2] net: ftgmac100: Disable hardware checksum on AST2600
From: Joel Stanley @ 2022-05-17  9:20 UTC (permalink / raw)
  To: Dylan Hung
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni,
	Benjamin Herrenschmidt, David Wilder, openbmc@lists.ozlabs.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	David Wilder
In-Reply-To: <HK0PR06MB28341F811AD74F52ACA5D19B9CCA9@HK0PR06MB2834.apcprd06.prod.outlook.com>

On Fri, 13 May 2022 at 01:46, Dylan Hung <dylan_hung@aspeedtech.com> wrote:
>
> > -----Original Message-----
> > From: joel.stan@gmail.com [mailto:joel.stan@gmail.com] On Behalf Of Joel
> > Stanley
> > Sent: Friday, May 13, 2022 7:20 AM
> > To: David S . Miller <davem@davemloft.net>; Jakub Kicinski
> > <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; Benjamin
> > Herrenschmidt <benh@kernel.crashing.org>; Dylan Hung
> > <dylan_hung@aspeedtech.com>; David Wilder <dwilder@us.ibm.com>
> > Cc: openbmc@lists.ozlabs.org; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; David Wilder <wilder@us.ibm.com>
> > Subject: [PATCH net v2] net: ftgmac100: Disable hardware checksum on
> > AST2600
> >
> > The AST2600 when using the i210 NIC over NC-SI has been observed to
> > produce incorrect checksum results with specific MTU values. This was first
> > observed when sending data across a long distance set of networks.
> >
> > On a local network, the following test was performed using a 1MB file of
> > random data.
> >
> > On the receiver run this script:
> >
> >  #!/bin/bash
> >  while [ 1 ]; do
> >         # Zero the stats
> >         nstat -r  > /dev/null
> >         nc -l 9899 > test-file
> >         # Check for checksum errors
> >         TcpInCsumErrors=$(nstat | grep TcpInCsumErrors)
> >         if [ -z "$TcpInCsumErrors" ]; then
> >                 echo No TcpInCsumErrors
> >         else
> >                 echo TcpInCsumErrors = $TcpInCsumErrors
> >         fi
> >  done
> >
> > On an AST2600 system:
> >
> >  # nc <IP of  receiver host> 9899 < test-file
> >
> > The test was repeated with various MTU values:
> >
> >  # ip link set mtu 1410 dev eth0
> >
> > The observed results:
> >
> >  1500 - good
> >  1434 - bad
> >  1400 - good
> >  1410 - bad
> >  1420 - good
> >
> > The test was repeated after disabling tx checksumming:
> >
> >  # ethtool -K eth0 tx-checksumming off
> >
> > And all MTU values tested resulted in transfers without error.
> >
> > An issue with the driver cannot be ruled out, however there has been no bug
> > discovered so far.
> >
> > David has done the work to take the original bug report of slow data transfer
> > between long distance connections and triaged it down to this test case.
> >
> > The vendor suspects this this is a hardware issue when using NC-SI. The fixes
> > line refers to the patch that introduced AST2600 support.
> >
> > Fixes: 39bfab8844a0 ("net: ftgmac100: Add support for DT phy-handle
> > property")
> > Reported-by: David Wilder <wilder@us.ibm.com>
> > Signed-off-by: Joel Stanley <joel@jms.id.au>

> Reviewed-by: Dylan Hung <dylan_hung@aspeedtech.com>

Thank you Dylan. I've added your r-b to v3, as the only changes are to
the wrapping of the commit message.

^ permalink raw reply

* Re: [PATCHv2 ipsec] xfrm: set dst dev to blackhole_netdev instead of loopback_dev in ifdown
From: patchwork-bot+netdevbpf @ 2022-05-17  9:20 UTC (permalink / raw)
  To: Xin Long; +Cc: netdev, davem, kuba, edumazet, steffen.klassert
In-Reply-To: <e8c87482998ca6fcdab214f5a9d582899ec0c648.1652665047.git.lucien.xin@gmail.com>

Hello:

This patch was applied to netdev/net.git (master)
by Paolo Abeni <pabeni@redhat.com>:

On Sun, 15 May 2022 21:37:27 -0400 you wrote:
> The global blackhole_netdev has replaced pernet loopback_dev to become the
> one given to the object that holds an netdev when ifdown in many places of
> ipv4 and ipv6 since commit 8d7017fd621d ("blackhole_netdev: use
> blackhole_netdev to invalidate dst entries").
> 
> Especially after commit faab39f63c1f ("net: allow out-of-order netdev
> unregistration"), it's no longer safe to use loopback_dev that may be
> freed before other netdev.
> 
> [...]

Here is the summary with links:
  - [PATCHv2,ipsec] xfrm: set dst dev to blackhole_netdev instead of loopback_dev in ifdown
    https://git.kernel.org/netdev/net/c/4d33ab08c0af

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net v2] net: ftgmac100: Disable hardware checksum on AST2600
From: Joel Stanley @ 2022-05-17  9:19 UTC (permalink / raw)
  To: Paul Menzel
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni,
	Benjamin Herrenschmidt, Dylan Hung, David Wilder, Networking,
	OpenBMC Maillist, David Wilder, Linux Kernel Mailing List
In-Reply-To: <b6da2e5a-eb85-d3cf-d4c3-ca9c0f0c04a4@molgen.mpg.de>

On Fri, 13 May 2022 at 05:11, Paul Menzel <pmenzel@molgen.mpg.de> wrote:
>
> Dear Joel,
>
>
> Am 13.05.22 um 01:19 schrieb Joel Stanley:
> > The AST2600 when using the i210 NIC over NC-SI has been observed to
> > produce incorrect checksum results with specific MTU values. This was
> > first observed when sending data across a long distance set of networks.
> >
> > On a local network, the following test was performed using a 1MB file of
> > random data.
> >
> > On the receiver run this script:
> >
> >   #!/bin/bash
> >   while [ 1 ]; do
> >          # Zero the stats
> >          nstat -r  > /dev/null
> >          nc -l 9899 > test-file
> >          # Check for checksum errors
> >          TcpInCsumErrors=$(nstat | grep TcpInCsumErrors)
> >          if [ -z "$TcpInCsumErrors" ]; then
> >                  echo No TcpInCsumErrors
> >          else
> >                  echo TcpInCsumErrors = $TcpInCsumErrors
> >          fi
> >   done
> >
> > On an AST2600 system:
> >
> >   # nc <IP of  receiver host> 9899 < test-file
> >
> > The test was repeated with various MTU values:
> >
> >   # ip link set mtu 1410 dev eth0
> >
> > The observed results:
> >
> >   1500 - good
> >   1434 - bad
> >   1400 - good
> >   1410 - bad
> >   1420 - good
>
> Sort the values? As some MTUs are good, should a allow list for these
> values be added?

No.

>
> > The test was repeated after disabling tx checksumming:
> >
> >   # ethtool -K eth0 tx-checksumming off
> >
> > And all MTU values tested resulted in transfers without error.
> >
> > An issue with the driver cannot be ruled out, however there has been no
> > bug discovered so far.
> >
> > David has done the work to take the original bug report of slow data
> > transfer between long distance connections and triaged it down to this
> > test case.
> >
> > The vendor suspects this this is a hardware issue when using NC-SI. The fixes line refers
> > to the patch that introduced AST2600 support.
>
> Please wrap the line after 75 characters.
>
> Can the problem be reproduced with QEMU?

It can not. If you wanted to try you could modify the model to corrupt
tx checksums, but I would consider this of limited value.

>
> > Fixes: 39bfab8844a0 ("net: ftgmac100: Add support for DT phy-handle property")
> > Reported-by: David Wilder <wilder@us.ibm.com>
> > Signed-off-by: Joel Stanley <joel@jms.id.au>
>
> Should the intel-wired-lan folks be put in Cc?

No, as all evidence points towards this being an AST2600 problem.

ASPEED did not report the issue relates to the i210 doing anything
wrong. The issue is not seen from the host PCIe interface, and the
i210 is in widespread use with other BMCs without issue.

>
> > ---
> > v2 updates the commit message with confirmation form the vendor that
>
> from
>
> > this is a hardware issue, and clarifes why the commit used in the fixes
>
> clarifies
>
> > tag was chosen.
> >
> >   drivers/net/ethernet/faraday/ftgmac100.c | 5 +++++
> >   1 file changed, 5 insertions(+)
> >
> > diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
> > index caf48023f8ea..5231818943c6 100644
> > --- a/drivers/net/ethernet/faraday/ftgmac100.c
> > +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> > @@ -1928,6 +1928,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
> >       /* AST2400  doesn't have working HW checksum generation */
> >       if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
> >               netdev->hw_features &= ~NETIF_F_HW_CSUM;
> > +
> > +     /* AST2600 tx checksum with NC-SI is broken */
>
> Does ASPEED have an internal bug for this, so should there be new
> revisions of the AST2600, the bug can be fixed?

There are no plans to fix it that I'm aware of.

>
> > +     if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac"))
> > +             netdev->hw_features &= ~NETIF_F_HW_CSUM;
> > +
>
> I would fancy a note or even warning about this hardware issue.

I don't see the need to clutter up the kernel logs.

We've had a similar workaround for the 2400 since support was added
for the aspeed part. It doesn't affect the operation of the system; in
fact it improves it as without this we see degraded throughput due to
retransmissions.

We have git history for detailed notes on why a change was made.

>
> >       if (np && of_get_property(np, "no-hw-checksum", NULL))
> >               netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM);
> >       netdev->features |= netdev->hw_features;
>
>
> Kind regards,
>
> Paul

^ permalink raw reply

* Re: [PATCH v5 15/15] samples/landlock: adds network demo
From: Mickaël Salaün @ 2022-05-17  9:19 UTC (permalink / raw)
  To: Konstantin Meskhidze
  Cc: willemdebruijn.kernel, linux-security-module, netdev,
	netfilter-devel, yusongping, anton.sirazetdinov
In-Reply-To: <20220516152038.39594-16-konstantin.meskhidze@huawei.com>



On 16/05/2022 17:20, Konstantin Meskhidze wrote:
> This commit adds network demo. It's possible to
> allow a sandoxer to bind/connect to a list of
> particular ports restricting networks actions to
> the rest of ports.
> 
> Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
> ---
> 
> Changes since v4:
> * Adds ENV_TCP_BIND_NAME "LL_TCP_BIND" and
> ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT" variables
> to insert TCP ports.
> * Renames populate_ruleset() to populate_ruleset_fs().
> * Adds populate_ruleset_net() and parse_port_num() helpers.
> * Refactoring main() to support network sandboxing.
> 
> ---
>   samples/landlock/sandboxer.c | 105 +++++++++++++++++++++++++++++++----
>   security/landlock/ruleset.h  |   4 +-
>   2 files changed, 95 insertions(+), 14 deletions(-)
> 
> diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
> index 3e404e51ec64..4006c42eec1c 100644
> --- a/samples/landlock/sandboxer.c
> +++ b/samples/landlock/sandboxer.c
> @@ -51,6 +51,8 @@ static inline int landlock_restrict_self(const int ruleset_fd,
> 
>   #define ENV_FS_RO_NAME "LL_FS_RO"
>   #define ENV_FS_RW_NAME "LL_FS_RW"
> +#define ENV_TCP_BIND_NAME "LL_TCP_BIND"
> +#define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT"
>   #define ENV_PATH_TOKEN ":"
> 
>   static int parse_path(char *env_path, const char ***const path_list)
> @@ -71,6 +73,20 @@ static int parse_path(char *env_path, const char ***const path_list)
>   	return num_paths;
>   }
> 
> +static int parse_port_num(char *env_port)
> +{
> +	int i, num_ports = 0;
> +
> +	if (env_port) {
> +		num_ports++;
> +		for (i = 0; env_port[i]; i++) {
> +			if (env_port[i] == ENV_PATH_TOKEN[0])
> +				num_ports++;
> +		}
> +	}
> +	return num_ports;
> +}
> +
>   /* clang-format off */
> 
>   #define ACCESS_FILE ( \
> @@ -80,7 +96,7 @@ static int parse_path(char *env_path, const char ***const path_list)
> 
>   /* clang-format on */
> 
> -static int populate_ruleset(const char *const env_var, const int ruleset_fd,
> +static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd,
>   			    const __u64 allowed_access)
>   {
>   	int num_paths, i, ret = 1;
> @@ -142,6 +158,49 @@ static int populate_ruleset(const char *const env_var, const int ruleset_fd,
>   	return ret;
>   }
> 
> +static int populate_ruleset_net(const char *const env_var,
> +				const int ruleset_fd,
> +				const __u64 allowed_access)
> +{
> +	int num_ports, i, ret = 1;
> +	char *env_port_name;
> +	struct landlock_net_service_attr net_service = {
> +		.allowed_access = 0,
> +		.port = 0,
> +	};
> +
> +	env_port_name = getenv(env_var);
> +	if (!env_port_name) {
> +		/* Prevents users to forget a setting. */
> +		fprintf(stderr, "Missing environment variable %s\n", env_var);
> +		return 1;

I think network ports should be optional to be able to test without that 
(and not break compatibility). You can pass &ruleset_attr as argument to 
update it accordingly:
- without environment variable: no network restriction;
- with empty environment variable: all connect (or bind) denied;
- otherwise: only allow the listed ports.


> +	}
> +	env_port_name = strdup(env_port_name);
> +	unsetenv(env_var);
> +	num_ports = parse_port_num(env_port_name);
> +
> +	if (num_ports == 1 && (strtok(env_port_name, ENV_PATH_TOKEN) == NULL)) {
> +		ret = 0;
> +		goto out_free_name;
> +	}
> +
> +	for (i = 0; i < num_ports; i++) {
> +		net_service.allowed_access = allowed_access;
> +		net_service.port = atoi(strsep(&env_port_name, ENV_PATH_TOKEN));
> +		if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_SERVICE,
> +					&net_service, 0)) {
> +			fprintf(stderr, "Failed to update the ruleset with port \"%d\": %s\n",
> +					net_service.port, strerror(errno));
> +			goto out_free_name;
> +		}
> +	}
> +	ret = 0;
> +
> +out_free_name:
> +	free(env_port_name);
> +	return ret;
> +}
> +
>   /* clang-format off */
> 
>   #define ACCESS_FS_ROUGHLY_READ ( \
> @@ -173,19 +232,24 @@ int main(const int argc, char *const argv[], char *const *const envp)
>   	char *const *cmd_argv;
>   	int ruleset_fd, abi;
>   	__u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ,
> -	      access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE;
> +	      access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE,
> +	      access_net_tcp = LANDLOCK_ACCESS_NET_BIND_TCP |
> +					LANDLOCK_ACCESS_NET_CONNECT_TCP;
>   	struct landlock_ruleset_attr ruleset_attr = {
>   		.handled_access_fs = access_fs_rw,
> +		.handled_access_net = access_net_tcp,
>   	};
> 
>   	if (argc < 2) {
>   		fprintf(stderr,
> -			"usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
> -			ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
> +			"usage: %s=\"...\" %s=\"...\" %s=\"...\" %s=\"...\"%s "
> +			"<cmd> [args]...\n\n", ENV_FS_RO_NAME, ENV_FS_RW_NAME,
> +			ENV_TCP_BIND_NAME, ENV_TCP_CONNECT_NAME, argv[0]);
>   		fprintf(stderr,
>   			"Launch a command in a restricted environment.\n\n");
> -		fprintf(stderr, "Environment variables containing paths, "
> -				"each separated by a colon:\n");
> +		fprintf(stderr,
> +			"Environment variables containing paths and ports "
> +			"each separated by a colon:\n");
>   		fprintf(stderr,
>   			"* %s: list of paths allowed to be used in a read-only way.\n",
>   			ENV_FS_RO_NAME);
> @@ -193,11 +257,19 @@ int main(const int argc, char *const argv[], char *const *const envp)
>   			"* %s: list of paths allowed to be used in a read-write way.\n",
>   			ENV_FS_RW_NAME);
>   		fprintf(stderr,
> -			"\nexample:\n"
> +			"* %s: list of ports allowed to bind (server).\n",
> +			ENV_TCP_BIND_NAME);
> +		fprintf(stderr,
> +			"* %s: list of ports allowed to connect (client).\n",
> +			ENV_TCP_CONNECT_NAME);

This is good and will be better with clang-format. ;)

> +		fprintf(stderr, "\nexample:\n"
>   			"%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
>   			"%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
> +			"%s=\"15000:16000\" "

Bind ports example should reference unprivileged ports such as "9418" 
(git, not well-known but OK).


> +			"%s=\"10000:12000\" "

Connect ports example should reference well-known ports such as "80:443".

>   			"%s bash -i\n",
> -			ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
> +			ENV_FS_RO_NAME, ENV_FS_RW_NAME, ENV_TCP_BIND_NAME,
> +			ENV_TCP_CONNECT_NAME, argv[0]);
>   		return 1;
>   	}
> 
> @@ -234,16 +306,25 @@ int main(const int argc, char *const argv[], char *const *const envp)
> 
>   	ruleset_fd =
>   		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> +

Why?


>   	if (ruleset_fd < 0) {
>   		perror("Failed to create a ruleset");
>   		return 1;
>   	}
> -	if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) {
> +	if (populate_ruleset_fs(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro))
>   		goto err_close_ruleset;
> -	}

Why? I know that checkpatch.pl prints a warning for that but I 
delibirately chooe to use curly braces even for "if" statements with one 
line because it is safer. This code may be copied/pasted and I'd like 
others to avoid introducing goto-fail-like issues.



> -	if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) {
> +
> +	if (populate_ruleset_fs(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw))
>   		goto err_close_ruleset;
> -	}
> +
> +	if (populate_ruleset_net(ENV_TCP_BIND_NAME, ruleset_fd,
> +				 LANDLOCK_ACCESS_NET_BIND_TCP))

So please use curly braces here too.

> +		goto err_close_ruleset;
> +
> +	if (populate_ruleset_net(ENV_TCP_CONNECT_NAME, ruleset_fd,
> +				 LANDLOCK_ACCESS_NET_CONNECT_TCP))
> +		goto err_close_ruleset;
> +
>   	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
>   		perror("Failed to restrict privileges");
>   		goto err_close_ruleset;
> diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
> index 916b30b31c06..e1ff40f238a6 100644
> --- a/security/landlock/ruleset.h
> +++ b/security/landlock/ruleset.h
> @@ -19,7 +19,7 @@
>   #include "limits.h"
>   #include "object.h"
> 
> -typedef u16 access_mask_t;
> +typedef u32 access_mask_t;

What‽


> 
>   /* Makes sure all filesystem access rights can be stored. */
>   static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
> @@ -157,7 +157,7 @@ struct landlock_ruleset {
>   			 * layers are set once and never changed for the
>   			 * lifetime of the ruleset.
>   			 */
> -			u32 access_masks[];
> +			access_mask_t access_masks[];
>   		};
>   	};
>   };
> --
> 2.25.1
> 

^ permalink raw reply

* Re: [PATCH bpf-next 1/2] cpuidle/rcu: Making arch_cpu_idle and rcu_idle_exit noinstr
From: kernel test robot @ 2022-05-17  9:19 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Masami Hiramatsu, Paul E. McKenney
  Cc: kbuild-all, netdev, bpf, lkml, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Steven Rostedt
In-Reply-To: <20220515203653.4039075-1-jolsa@kernel.org>

Hi Jiri,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Jiri-Olsa/cpuidle-rcu-Making-arch_cpu_idle-and-rcu_idle_exit-noinstr/20220516-043752
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-a014-20220516 (https://download.01.org/0day-ci/archive/20220517/202205171711.hqxFhp5l-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.2.0-20) 11.2.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/0b6fee32d730f621f2bfc4d8d9f0729814398415
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jiri-Olsa/cpuidle-rcu-Making-arch_cpu_idle-and-rcu_idle_exit-noinstr/20220516-043752
        git checkout 0b6fee32d730f621f2bfc4d8d9f0729814398415
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   vmlinux.o: warning: objtool: vmx_l1d_flush()+0x13: call to static_key_count() leaves .noinstr.text section
   vmlinux.o: warning: objtool: vmx_vcpu_enter_exit()+0x29: call to static_key_count() leaves .noinstr.text section
   vmlinux.o: warning: objtool: vmx_update_host_rsp()+0x3e: call to static_key_count() leaves .noinstr.text section
   vmlinux.o: warning: objtool: arch_cpu_idle()+0xb: call to {dynamic}() leaves .noinstr.text section
>> vmlinux.o: warning: objtool: rcu_idle_exit()+0x25: call to trace_hardirqs_on() leaves .noinstr.text section
   vmlinux.o: warning: objtool: enter_from_user_mode()+0x1c: call to __kcsan_check_access() leaves .noinstr.text section
   vmlinux.o: warning: objtool: syscall_enter_from_user_mode()+0x21: call to __kcsan_check_access() leaves .noinstr.text section
   vmlinux.o: warning: objtool: syscall_enter_from_user_mode_prepare()+0x1c: call to __kcsan_check_access() leaves .noinstr.text section
   vmlinux.o: warning: objtool: exit_to_user_mode()+0x1b: call to static_key_count() leaves .noinstr.text section
   vmlinux.o: warning: objtool: syscall_exit_to_user_mode()+0x36: call to static_key_count() leaves .noinstr.text section
   vmlinux.o: warning: objtool: irqentry_enter_from_user_mode()+0x1c: call to __kcsan_check_access() leaves .noinstr.text section
   vmlinux.o: warning: objtool: irqentry_exit_to_user_mode()+0x22: call to static_key_count() leaves .noinstr.text section

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply

* [PATCH nf] netfilter: flowtable: fix TCP flow teardown
From: Pablo Neira Ayuso @ 2022-05-17  9:18 UTC (permalink / raw)
  To: netfilter-devel; +Cc: ozsh, fw, sven.auhagen, nbd, netdev

This patch addresses three possible problems:

1. ct gc may race to undo the timeout adjustment of the packet path, leaving
   the conntrack entry in place with the internal offload timeout (one day).

2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
   timeout is reached before the flow offload del.

3. tcp ct is always set to ESTABLISHED with a very long timeout
   in flow offload teardown/delete even though the state might be already
   CLOSED. Also as a remark we cannot assume that the FIN or RST packet
   is hitting flow table teardown as the packet might get bumped to the
   slow path in nftables.

This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
state transition.

Moreover, teturn the connection's ownership to conntrack upon teardown
by clearing the offload flag and fixing the established timeout value.
The flow table GC thread will asynchonrnously free the flow table and
hardware offload entries.

Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
which is also misleading since the flow is back to classic conntrack
path.

If nf_ct_delete() removes the entry from the conntrack table, then it
calls nf_ct_put() which decrements the refcnt. This is not a problem
because the flowtable holds a reference to the conntrack object from
flow_offload_alloc() path which is released via flow_offload_free().

This patch also updates nft_flow_offload to skip packets in SYN_RECV
state. Since we might miss or bump packets to slow path, we do not know
what will happen there while we are still in SYN_RECV, this patch
postpones offload up to the next packet which also aligns to the
existing behaviour in tc-ct.

flow_offload_teardown() does not reset the existing tcp state from
flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
path might have already update the state to CLOSE/FIN.

Joint work with Oz and Sven.

Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
Everyone happy with this? Please add your Signed-off-by tag.

I'm showing as the author in this patch, but this is basically a mix and
match of Oz's and Sven's patches.

Thanks a lot for your patience !

 net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
 net/netfilter/nft_flow_offload.c   |  3 ++-
 2 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 20b4a14e5d4e..ebdf5332e838 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);

 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 {
-	tcp->state = TCP_CONNTRACK_ESTABLISHED;
 	tcp->seen[0].td_maxwin = 0;
 	tcp->seen[1].td_maxwin = 0;
 }

-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 	if (l4num == IPPROTO_TCP) {
 		struct nf_tcp_net *tn = nf_tcp_pernet(net);

-		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+		flow_offload_fixup_tcp(&ct->proto.tcp);
+
+		timeout = tn->timeouts[ct->proto.tcp.state];
 		timeout -= tn->offload_timeout;
 	} else if (l4num == IPPROTO_UDP) {
 		struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
 }

-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
-	if (nf_ct_protonum(ct) == IPPROTO_TCP)
-		flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
-	flow_offload_fixup_ct_state(ct);
-	flow_offload_fixup_ct_timeout(ct);
-}
-
 static void flow_offload_route_release(struct flow_offload *flow)
 {
 	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -361,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 			       nf_flow_offload_rhash_params);
-
-	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
-	if (nf_flow_has_expired(flow))
-		flow_offload_fixup_ct(flow->ct);
-	else
-		flow_offload_fixup_ct_timeout(flow->ct);
-
 	flow_offload_free(flow);
 }

 void flow_offload_teardown(struct flow_offload *flow)
 {
+	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
 	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
-	flow_offload_fixup_ct_state(flow->ct);
+	flow_offload_fixup_ct(flow->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);

@@ -466,7 +447,7 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
 	if (nf_flow_has_expired(flow) ||
 	    nf_ct_is_dying(flow->ct) ||
 	    nf_flow_has_stale_dst(flow))
-		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+		flow_offload_teardown(flow);

 	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
 		if (test_bit(NF_FLOW_HW, &flow->flags)) {
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 187b8cb9a510..6612ad8f1565 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -298,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	case IPPROTO_TCP:
 		tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
 					  sizeof(_tcph), &_tcph);
-		if (unlikely(!tcph || tcph->fin || tcph->rst))
+		if (unlikely(!tcph || tcph->fin || tcph->rst ||
+			     !nf_conntrack_tcp_established(&ct->proto.tcp)))
 			goto out;
 		break;
 	case IPPROTO_UDP:
-- 
2.30.2

^ permalink raw reply related

* Re: [PATCH wpan-next 06/11] net: mac802154: Hold the transmit queue when relevant
From: Miquel Raynal @ 2022-05-17  9:13 UTC (permalink / raw)
  To: Alexander Aring
  Cc: Alexander Aring, Stefan Schmidt, linux-wpan, David S. Miller,
	Jakub Kicinski, Network Development, David Girault,
	Romuald Despres, Frederic Blain, Nicolas Schodet,
	Thomas Petazzoni
In-Reply-To: <CAK-6q+h07LM1-Cu_mkxAZWN2kG9LLxoKvXxUiQ5DPSYwRkbXZw@mail.gmail.com>

Hi Alex,

aahringo@redhat.com wrote on Thu, 12 May 2022 10:44:35 -0400:

> Hi,
> 
> On Thu, May 12, 2022 at 10:33 AM Miquel Raynal
> <miquel.raynal@bootlin.com> wrote:
> >
> > Hi Alexander,
> >
> > aahringo@redhat.com wrote on Wed, 11 May 2022 09:09:40 -0400:
> >  
> > > Hi,
> > >
> > > On Tue, May 10, 2022 at 10:52 AM Miquel Raynal
> > > <miquel.raynal@bootlin.com> wrote:  
> > > >
> > > > Hi Alex,
> > > >  
> > > > > > --- a/net/mac802154/tx.c
> > > > > > +++ b/net/mac802154/tx.c
> > > > > > @@ -106,6 +106,21 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
> > > > > >         return NETDEV_TX_OK;
> > > > > >  }
> > > > > >
> > > > > > +void ieee802154_hold_queue(struct ieee802154_local *local)
> > > > > > +{
> > > > > > +       atomic_inc(&local->phy->hold_txs);
> > > > > > +}
> > > > > > +
> > > > > > +void ieee802154_release_queue(struct ieee802154_local *local)
> > > > > > +{
> > > > > > +       atomic_dec(&local->phy->hold_txs);
> > > > > > +}
> > > > > > +
> > > > > > +bool ieee802154_queue_is_held(struct ieee802154_local *local)
> > > > > > +{
> > > > > > +       return atomic_read(&local->phy->hold_txs);
> > > > > > +}  
> > > > >
> > > > > I am not getting this, should the release_queue() function not do
> > > > > something like:
> > > > >
> > > > > if (atomic_dec_and_test(hold_txs))
> > > > >       ieee802154_wake_queue(local);
> > > > >
> > > > > I think we don't need the test of "ieee802154_queue_is_held()" here,
> > > > > then we need to replace all stop_queue/wake_queue with hold and
> > > > > release?  
> > > >
> > > > That's actually a good idea. I've implemented it and it looks nice too.
> > > > I'll clean this up and share a new version with:
> > > > - The wake call checked everytime hold_txs gets decremented
> > > > - The removal of the _queue_is_held() helper
> > > > - _wake/stop_queue() turned static
> > > > - _hold/release_queue() used everywhere
> > > >  
> > >
> > > I think there is also a lock necessary for atomic inc/dec hitting zero
> > > and the stop/wake call afterwards...  
> >
> > Mmmh that is true, it can race. I've introduced a mutex (I think it's
> > safe but it can be turned into a spinlock if proven necessary) to
> > secure these increment/decrement+wakeup operations.
> >  
> 
> be aware that you might call these functions from different contexts,
> test your patches with PROVE_LOCKING enabled.

Right, I've added it to my .config, let's see what it tells me.

> > > ,there are also a lot of
> > > optimization techniques to only hold the lock for hitting zero cases
> > > in such areas. However we will see...  
> >
> > I am not aware of technical solutions to avoid the locking in these
> > cases, what do you have in mind? Otherwise I propose just to come up
> > with a working and hopefully solid solution and then we'll see how to
> > optimize.  
> 
> Yes, it's not so important...
> 
> - Alex
> 

Thanks,
Miquèl

^ permalink raw reply

* [PATCH net-next v2] net: txgbe: Add build support for txgbe
From: Jiawen Wu @ 2022-05-17  9:21 UTC (permalink / raw)
  To: netdev; +Cc: Jiawen Wu

Add doc build infrastructure for txgbe driver.
Initialize PCI memory space for WangXun 10 Gigabit Ethernet devices.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 .../device_drivers/ethernet/index.rst         |   1 +
 .../device_drivers/ethernet/wangxun/txgbe.rst |  19 +
 MAINTAINERS                                   |   7 +
 drivers/net/ethernet/Kconfig                  |   1 +
 drivers/net/ethernet/Makefile                 |   1 +
 drivers/net/ethernet/wangxun/Kconfig          |  34 ++
 drivers/net/ethernet/wangxun/Makefile         |   6 +
 drivers/net/ethernet/wangxun/txgbe/Makefile   |   9 +
 drivers/net/ethernet/wangxun/txgbe/txgbe.h    |  76 ++++
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   | 332 ++++++++++++++++++
 .../net/ethernet/wangxun/txgbe/txgbe_type.h   |  87 +++++
 11 files changed, 573 insertions(+)
 create mode 100644 Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
 create mode 100644 drivers/net/ethernet/wangxun/Kconfig
 create mode 100644 drivers/net/ethernet/wangxun/Makefile
 create mode 100644 drivers/net/ethernet/wangxun/txgbe/Makefile
 create mode 100644 drivers/net/ethernet/wangxun/txgbe/txgbe.h
 create mode 100644 drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
 create mode 100644 drivers/net/ethernet/wangxun/txgbe/txgbe_type.h

diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 6b5dc203da2b..4766ac9d260e 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -52,6 +52,7 @@ Contents:
    ti/am65_nuss_cpsw_switchdev
    ti/tlan
    toshiba/spider_net
+   wangxun/txgbe
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst b/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
new file mode 100644
index 000000000000..de5661ba0603
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================================
+Linux Base Driver for WangXun(R) 10 Gigabit PCI Express Adapters
+================================================================
+
+WangXun 10 Gigabit Linux driver.
+Copyright (c) 2015 - 2022 Beijing WangXun Technology Co., Ltd.
+
+
+Contents
+========
+
+- Support
+
+
+Support
+=======
+If you got any problem, contact Wangxun support team via support@trustnetic.com
diff --git a/MAINTAINERS b/MAINTAINERS
index b7b1dfba707c..37d1043d1926 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21207,6 +21207,13 @@ L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	drivers/input/tablet/wacom_serial4.c
 
+WANGXUN ETHERNET DRIVER
+M:	Jiawen Wu <jiawenwu@trustnetic.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst
+F:	drivers/net/ethernet/wangxun/
+
 WATCHDOG DEVICE DRIVERS
 M:	Wim Van Sebroeck <wim@linux-watchdog.org>
 M:	Guenter Roeck <linux@roeck-us.net>
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 827993022386..e505cb1c171b 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -84,6 +84,7 @@ source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
+source "drivers/net/ethernet/wangxun/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
 
 config JME
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 8ef43e0c33c0..82db3b15e421 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -96,6 +96,7 @@ obj-$(CONFIG_NET_VENDOR_TOSHIBA) += toshiba/
 obj-$(CONFIG_NET_VENDOR_TUNDRA) += tundra/
 obj-$(CONFIG_NET_VENDOR_VERTEXCOM) += vertexcom/
 obj-$(CONFIG_NET_VENDOR_VIA) += via/
+obj-$(CONFIG_NET_VENDOR_WANGXUN) += wangxun/
 obj-$(CONFIG_NET_VENDOR_WIZNET) += wiznet/
 obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/
 obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/
diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig
new file mode 100644
index 000000000000..251f222f84db
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/Kconfig
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Wangxun network device configuration
+#
+
+config NET_VENDOR_WANGXUN
+	bool "Wangxun devices"
+	default y
+	help
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Intel cards. If you say Y, you will be asked for
+	  your specific card in the following questions.
+
+if NET_VENDOR_WANGXUN
+
+config TXGBE
+	tristate "Wangxun(R) 10GbE PCI Express adapters support"
+	depends on PCI
+	depends on PTP_1588_CLOCK_OPTIONAL
+	select PHYLIB
+	help
+	  This driver supports Wangxun(R) 10GbE PCI Express family of
+	  adapters.
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called txgbe.
+
+endif # NET_VENDOR_WANGXUN
diff --git a/drivers/net/ethernet/wangxun/Makefile b/drivers/net/ethernet/wangxun/Makefile
new file mode 100644
index 000000000000..c34db1bead25
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Wangxun network device drivers.
+#
+
+obj-$(CONFIG_TXGBE) += txgbe/
diff --git a/drivers/net/ethernet/wangxun/txgbe/Makefile b/drivers/net/ethernet/wangxun/txgbe/Makefile
new file mode 100644
index 000000000000..725aa1f721f6
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/txgbe/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd.
+#
+# Makefile for the Wangxun(R) 10GbE PCI Express ethernet driver
+#
+
+obj-$(CONFIG_TXGBE) += txgbe.o
+
+txgbe-objs := txgbe_main.o
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe.h b/drivers/net/ethernet/wangxun/txgbe/txgbe.h
new file mode 100644
index 000000000000..d51b5a2c5356
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd. */
+
+#ifndef _TXGBE_H_
+#define _TXGBE_H_
+
+#include "txgbe_type.h"
+
+#ifndef MAX_REQUEST_SIZE
+#define MAX_REQUEST_SIZE 256
+#endif
+
+#define TXGBE_MAX_FDIR_INDICES          63
+
+#define MAX_TX_QUEUES   (TXGBE_MAX_FDIR_INDICES + 1)
+
+/* board specific private data structure */
+struct txgbe_adapter {
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	unsigned long state;
+
+	/* structs defined in txgbe_hw.h */
+	struct txgbe_hw hw;
+	u16 msg_enable;
+
+	u8 __iomem *io_addr;    /* Mainly for iounmap use */
+};
+
+enum txgbe_state_t {
+	__TXGBE_TESTING,
+	__TXGBE_RESETTING,
+	__TXGBE_DOWN,
+	__TXGBE_HANGING,
+	__TXGBE_DISABLED,
+	__TXGBE_REMOVING,
+	__TXGBE_SERVICE_SCHED,
+	__TXGBE_SERVICE_INITED,
+	__TXGBE_IN_SFP_INIT,
+	__TXGBE_PTP_RUNNING,
+	__TXGBE_PTP_TX_IN_PROGRESS,
+};
+
+#define TXGBE_NAME "txgbe"
+
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+	return &pdev->dev;
+}
+
+#define txgbe_dev_info(format, arg...) \
+	dev_info(&adapter->pdev->dev, format, ## arg)
+#define txgbe_dev_warn(format, arg...) \
+	dev_warn(&adapter->pdev->dev, format, ## arg)
+#define txgbe_dev_err(format, arg...) \
+	dev_err(&adapter->pdev->dev, format, ## arg)
+#define txgbe_dev_notice(format, arg...) \
+	dev_notice(&adapter->pdev->dev, format, ## arg)
+#define txgbe_dbg(msglvl, format, arg...) \
+	netif_dbg(adapter, msglvl, adapter->netdev, format, ## arg)
+#define txgbe_info(msglvl, format, arg...) \
+	netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define txgbe_err(msglvl, format, arg...) \
+	netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define txgbe_warn(msglvl, format, arg...) \
+	netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define txgbe_crit(msglvl, format, arg...) \
+	netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
+
+#define TXGBE_FAILED_READ_CFG_DWORD 0xffffffffU
+#define TXGBE_FAILED_READ_CFG_WORD  0xffffU
+#define TXGBE_FAILED_READ_CFG_BYTE  0xffU
+
+#endif /* _TXGBE_H_ */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
new file mode 100644
index 000000000000..17a30629f76a
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/aer.h>
+#include <linux/etherdevice.h>
+
+#include "txgbe.h"
+
+char txgbe_driver_name[32] = TXGBE_NAME;
+static const char txgbe_driver_string[] =
+			"WangXun 10 Gigabit PCI Express Network Driver";
+
+static const char txgbe_copyright[] =
+	"Copyright (c) 2015 -2017 Beijing WangXun Technology Co., Ltd";
+
+/* txgbe_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id txgbe_pci_tbl[] = {
+	{ PCI_VDEVICE(TRUSTNETIC, TXGBE_DEV_ID_SP1000), 0},
+	{ PCI_VDEVICE(TRUSTNETIC, TXGBE_DEV_ID_WX1820), 0},
+	/* required last entry */
+	{ .device = 0 }
+};
+MODULE_DEVICE_TABLE(pci, txgbe_pci_tbl);
+
+MODULE_AUTHOR("Beijing WangXun Technology Co., Ltd, <software@trustnetic.com>");
+MODULE_DESCRIPTION("WangXun(R) 10 Gigabit PCI Express Network Driver");
+MODULE_LICENSE("GPL");
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+
+static struct workqueue_struct *txgbe_wq;
+
+static bool txgbe_check_cfg_remove(struct txgbe_hw *hw, struct pci_dev *pdev);
+
+static void txgbe_remove_adapter(struct txgbe_hw *hw)
+{
+	struct txgbe_adapter *adapter = hw->back;
+
+	if (!hw->hw_addr)
+		return;
+	hw->hw_addr = NULL;
+	txgbe_dev_err("Adapter removed\n");
+}
+
+/**
+ * txgbe_sw_init - Initialize general software structures (struct txgbe_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * txgbe_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int txgbe_sw_init(struct txgbe_adapter *adapter)
+{
+	struct txgbe_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	if (hw->revision_id == TXGBE_FAILED_READ_CFG_BYTE &&
+	    txgbe_check_cfg_remove(hw, pdev)) {
+		txgbe_err(probe, "read of revision id failed\n");
+		err = -ENODEV;
+		goto out;
+	}
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &hw->subsystem_id);
+	if (hw->subsystem_id == TXGBE_FAILED_READ_CFG_WORD) {
+		txgbe_err(probe, "read of subsystem id failed\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+out:
+	return err;
+}
+
+static int __txgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
+{
+	struct txgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+
+	netif_device_detach(netdev);
+
+	if (!test_and_set_bit(__TXGBE_DISABLED, &adapter->state))
+		pci_disable_device(pdev);
+
+	return 0;
+}
+
+static void txgbe_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+
+	__txgbe_shutdown(pdev, &wake);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+/**
+ * txgbe_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in txgbe_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * txgbe_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int txgbe_probe(struct pci_dev *pdev,
+		       const struct pci_device_id __always_unused *ent)
+{
+	struct net_device *netdev;
+	struct txgbe_adapter *adapter = NULL;
+	struct txgbe_hw *hw = NULL;
+	int err, pci_using_dac;
+	unsigned int indices = MAX_TX_QUEUES;
+	bool disable_dev = false;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	if (!dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)) &&
+	    !dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64))) {
+		pci_using_dac = 1;
+	} else {
+		err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+		if (err) {
+			err = dma_set_coherent_mask(pci_dev_to_dev(pdev),
+						    DMA_BIT_MASK(32));
+			if (err) {
+				dev_err(pci_dev_to_dev(pdev),
+					"No usable DMA configuration, aborting\n");
+				goto err_dma;
+			}
+		}
+		pci_using_dac = 0;
+	}
+
+	err = pci_request_selected_regions(pdev,
+					   pci_select_bars(pdev, IORESOURCE_MEM),
+					   txgbe_driver_name);
+	if (err) {
+		dev_err(pci_dev_to_dev(pdev),
+			"pci_request_selected_regions failed 0x%x\n", err);
+		goto err_pci_reg;
+	}
+
+	hw = vmalloc(sizeof(*hw));
+	if (!hw)
+		return -ENOMEM;
+
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	vfree(hw);
+
+	pci_enable_pcie_error_reporting(pdev);
+	pci_set_master(pdev);
+	/* errata 16 */
+	if (MAX_REQUEST_SIZE == 512) {
+		pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+						   PCI_EXP_DEVCTL_READRQ,
+						   0x2000);
+	} else {
+		pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+						   PCI_EXP_DEVCTL_READRQ,
+						   0x1000);
+	}
+
+	netdev = alloc_etherdev_mq(sizeof(struct txgbe_adapter), indices);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(netdev, pci_dev_to_dev(pdev));
+
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+			      pci_resource_len(pdev, 0));
+	adapter->io_addr = hw->hw_addr;
+	if (!hw->hw_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	/* setup the private structure */
+	err = txgbe_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	if (pci_using_dac)
+		netdev->features |= NETIF_F_HIGHDMA;
+
+err_sw_init:
+	iounmap(adapter->io_addr);
+err_ioremap:
+	disable_dev = !test_and_set_bit(__TXGBE_DISABLED, &adapter->state);
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_selected_regions(pdev,
+				     pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+	if (!adapter || disable_dev)
+		pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * txgbe_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * txgbe_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void txgbe_remove(struct pci_dev *pdev)
+{
+	struct txgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev;
+	bool disable_dev;
+
+	/* if !adapter then we already cleaned up in probe */
+	if (!adapter)
+		return;
+
+	netdev = adapter->netdev;
+
+	iounmap(adapter->io_addr);
+	pci_release_selected_regions(pdev,
+				     pci_select_bars(pdev, IORESOURCE_MEM));
+
+	disable_dev = !test_and_set_bit(__TXGBE_DISABLED, &adapter->state);
+	free_netdev(netdev);
+
+	pci_disable_pcie_error_reporting(pdev);
+
+	if (disable_dev)
+		pci_disable_device(pdev);
+}
+
+static bool txgbe_check_cfg_remove(struct txgbe_hw *hw, struct pci_dev *pdev)
+{
+	u16 value;
+
+	pci_read_config_word(pdev, PCI_VENDOR_ID, &value);
+	if (value == TXGBE_FAILED_READ_CFG_WORD) {
+		txgbe_remove_adapter(hw);
+		return true;
+	}
+	return false;
+}
+
+static struct pci_driver txgbe_driver = {
+	.name     = txgbe_driver_name,
+	.id_table = txgbe_pci_tbl,
+	.probe    = txgbe_probe,
+	.remove   = txgbe_remove,
+	.shutdown = txgbe_shutdown,
+};
+
+/**
+ * txgbe_init_module - Driver Registration Routine
+ *
+ * txgbe_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init txgbe_init_module(void)
+{
+	int ret;
+
+	pr_info("%s\n", txgbe_driver_string);
+	pr_info("%s\n", txgbe_copyright);
+
+	txgbe_wq = create_singlethread_workqueue(txgbe_driver_name);
+	if (!txgbe_wq) {
+		pr_err("%s: Failed to create workqueue\n", txgbe_driver_name);
+		return -ENOMEM;
+	}
+
+	ret = pci_register_driver(&txgbe_driver);
+	return ret;
+}
+
+module_init(txgbe_init_module);
+
+/**
+ * txgbe_exit_module - Driver Exit Cleanup Routine
+ *
+ * txgbe_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit txgbe_exit_module(void)
+{
+	pci_unregister_driver(&txgbe_driver);
+	if (txgbe_wq)
+		destroy_workqueue(txgbe_wq);
+}
+
+module_exit(txgbe_exit_module);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
new file mode 100644
index 000000000000..ba9306982317
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2015 - 2017 Beijing WangXun Technology Co., Ltd. */
+
+#ifndef _TXGBE_TYPE_H_
+#define _TXGBE_TYPE_H_
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+
+/* Little Endian defines */
+#ifndef __le16
+#define __le16  u16
+#endif
+#ifndef __le32
+#define __le32  u32
+#endif
+#ifndef __le64
+#define __le64  u64
+
+#endif
+#ifndef __be16
+/* Big Endian defines */
+#define __be16  u16
+#define __be32  u32
+#define __be64  u64
+
+#endif
+
+/************ txgbe_register.h ************/
+/* Vendor ID */
+#ifndef PCI_VENDOR_ID_TRUSTNETIC
+#define PCI_VENDOR_ID_TRUSTNETIC                0x8088
+#endif
+
+/* Device IDs */
+#define TXGBE_DEV_ID_SP1000                     0x1001
+#define TXGBE_DEV_ID_WX1820                     0x2001
+
+/* Subsystem IDs */
+/* SFP */
+#define TXGBE_ID_SP1000_SFP                     0x0000
+#define TXGBE_ID_WX1820_SFP                     0x2000
+#define TXGBE_ID_SFP                            0x00
+
+/* copper */
+#define TXGBE_ID_SP1000_XAUI                    0x1010
+#define TXGBE_ID_WX1820_XAUI                    0x2010
+#define TXGBE_ID_XAUI                           0x10
+#define TXGBE_ID_SP1000_SGMII                   0x1020
+#define TXGBE_ID_WX1820_SGMII                   0x2020
+#define TXGBE_ID_SGMII                          0x20
+/* backplane */
+#define TXGBE_ID_SP1000_KR_KX_KX4               0x1030
+#define TXGBE_ID_WX1820_KR_KX_KX4               0x2030
+#define TXGBE_ID_KR_KX_KX4                      0x30
+/* MAC Interface */
+#define TXGBE_ID_SP1000_MAC_XAUI                0x1040
+#define TXGBE_ID_WX1820_MAC_XAUI                0x2040
+#define TXGBE_ID_MAC_XAUI                       0x40
+#define TXGBE_ID_SP1000_MAC_SGMII               0x1060
+#define TXGBE_ID_WX1820_MAC_SGMII               0x2060
+#define TXGBE_ID_MAC_SGMII                      0x60
+
+#define TXGBE_NCSI_SUP                          0x8000
+#define TXGBE_NCSI_MASK                         0x8000
+#define TXGBE_WOL_SUP                           0x4000
+#define TXGBE_WOL_MASK                          0x4000
+#define TXGBE_DEV_MASK                          0xf0
+
+/* Combined interface*/
+#define TXGBE_ID_SFI_XAUI			0x50
+
+/* Revision ID */
+#define TXGBE_SP_MPW  1
+
+struct txgbe_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+	u16 subsystem_id;
+};
+
+#endif /* _TXGBE_TYPE_H_ */
-- 
2.27.0




^ permalink raw reply related

* Re: [PATCH bpf-next v3 2/4] bpf_trace: support 32-bit kernels in bpf_kprobe_multi_link_attach
From: Jiri Olsa @ 2022-05-17  9:12 UTC (permalink / raw)
  To: Eugene Syromiatnikov
  Cc: Masami Hiramatsu, Steven Rostedt, Ingo Molnar, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, netdev, bpf,
	linux-kernel, Shuah Khan, linux-kselftest
In-Reply-To: <525b99881dc144b986e381eb23b12617a311f243.1652772731.git.esyr@redhat.com>

On Tue, May 17, 2022 at 09:36:26AM +0200, Eugene Syromiatnikov wrote:
> It seems that there is no reason not to support 32-bit architectures;
> doing so requires a bit of rework with respect to cookies handling,
> however, as the current code implicitly assumes
> that sizeof(long) == sizeof(u64).
> 
> Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
> ---
>  kernel/trace/bpf_trace.c | 17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 9c041be..a93a54f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2435,16 +2435,12 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  	struct bpf_link_primer link_primer;
>  	void __user *ucookies;
>  	unsigned long *addrs;
> -	u32 flags, cnt, size;
> +	u32 flags, cnt, size, cookies_size;
>  	void __user *uaddrs;
>  	u64 *cookies = NULL;
>  	void __user *usyms;
>  	int err;
>  
> -	/* no support for 32bit archs yet */
> -	if (sizeof(u64) != sizeof(void *))
> -		return -EOPNOTSUPP;
> -
>  	if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI)
>  		return -EINVAL;
>  
> @@ -2454,6 +2450,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  
>  	uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
>  	usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
> +	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
>  	if (!!uaddrs == !!usyms)
>  		return -EINVAL;
>  
> @@ -2461,8 +2458,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  	if (!cnt)
>  		return -EINVAL;
>  
> -	if (check_mul_overflow(cnt, (u32)sizeof(*addrs), &size))
> +	if (check_mul_overflow(cnt, (u32)sizeof(*addrs), &size) ||
> +	    (ucookies &&
> +	     check_mul_overflow(cnt, (u32)sizeof(*cookies), &cookies_size))) {
>  		return -EOVERFLOW;
> +	}
>  	addrs = kvmalloc(size, GFP_KERNEL);
>  	if (!addrs)
>  		return -ENOMEM;
> @@ -2486,14 +2486,13 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  			goto error;
>  	}
>  
> -	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
>  	if (ucookies) {

could we check all that in here? so the ucookies checks are on the
one place.. also you would not need cookies_size

jirka

> -		cookies = kvmalloc(size, GFP_KERNEL);
> +		cookies = kvmalloc(cookies_size, GFP_KERNEL);
>  		if (!cookies) {
>  			err = -ENOMEM;
>  			goto error;
>  		}
> -		if (copy_from_user(cookies, ucookies, size)) {
> +		if (copy_from_user(cookies, ucookies, cookies_size)) {
>  			err = -EFAULT;
>  			goto error;
>  		}
> -- 
> 2.1.4
> 

^ permalink raw reply

* Re: [PATCH bpf-next v3 1/4] bpf_trace: check size for overflow in bpf_kprobe_multi_link_attach
From: Jiri Olsa @ 2022-05-17  9:12 UTC (permalink / raw)
  To: Eugene Syromiatnikov
  Cc: Masami Hiramatsu, Steven Rostedt, Ingo Molnar, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, netdev, bpf,
	linux-kernel, Shuah Khan, linux-kselftest
In-Reply-To: <9e4171972a3d75e656073e0c25cd4071a6f652e4.1652772731.git.esyr@redhat.com>

On Tue, May 17, 2022 at 09:36:15AM +0200, Eugene Syromiatnikov wrote:
> Check that size would not overflow before calculation (and return
> -EOVERFLOW if it will), to prevent potential out-of-bounds write
> with the following copy_from_user.  Use kvmalloc_array
> in copy_user_syms to prevent out-of-bounds write into syms
> (and especially buf) as well.
> 
> Fixes: 0dcac272540613d4 ("bpf: Add multi kprobe link")
> Cc: <stable@vger.kernel.org> # 5.18
> Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>

Acked-by: Jiri Olsa <jolsa@kernel.org>

thanks,
jirka

> ---
>  kernel/trace/bpf_trace.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 7141ca8..9c041be 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2261,11 +2261,11 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32
>  	int err = -ENOMEM;
>  	unsigned int i;
>  
> -	syms = kvmalloc(cnt * sizeof(*syms), GFP_KERNEL);
> +	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
>  	if (!syms)
>  		goto error;
>  
> -	buf = kvmalloc(cnt * KSYM_NAME_LEN, GFP_KERNEL);
> +	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
>  	if (!buf)
>  		goto error;
>  
> @@ -2461,7 +2461,8 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  	if (!cnt)
>  		return -EINVAL;
>  
> -	size = cnt * sizeof(*addrs);
> +	if (check_mul_overflow(cnt, (u32)sizeof(*addrs), &size))
> +		return -EOVERFLOW;
>  	addrs = kvmalloc(size, GFP_KERNEL);
>  	if (!addrs)
>  		return -ENOMEM;
> -- 
> 2.1.4
> 

^ permalink raw reply

* RE: UDP receive performance drop since 3.10
From: David Laight @ 2022-05-17  9:12 UTC (permalink / raw)
  To: 'Paolo Abeni', netdev@vger.kernel.org
In-Reply-To: <ca1ade8ae0f20695c687580b2e1fbb75bf8a5d4b.camel@redhat.com>

From: Paolo Abeni
> Sent: 16 May 2022 15:29
> 
> On Mon, 2022-05-16 at 12:58 +0000, David Laight wrote:
> > I've noticed a doubling in the cpu cost of udp processing
> > between a RHEL 3.10 kernel and a 5.18-rc6 one.
> >
> > This is (probably) all within ip_rcv().
> >
> > I'm testing very high rate UDP receive of RTP audio.
> > (The target is 500000 udp/sec.)
> > I've enable RPS so that ip_rcv() runs on different multiple
> > cpus from the ethernet code.
> > (RSS on the BCM5720 (tg3) doesn't seem to work very well.)
> >
> > On the 3.10 kernel the 'RPS' cpu show about 5% 'soft int' time.
> > With 5.10 this has doubled to 10% for much the same test.
> >
...
> >
> > Now I know the cost of ftrace is significant (and seems to be
> > higher in 5.18) but there also seems to be a lot more code.
> > As well as the extra rcu locks (which are probably mostly ftrace
> > overhead, a few other things stick out:
> >
> > 1) The sock_net_uid(net, NULL) calls.
> >    These are make_kuid(net->user_ns, 0) - so pretty much constant.
> >    They seem to end up in a loop in map_id_range_down_base().
> >    All looks expensive in the default network namespace where
> >    0 maps to 0.
> >
> > 2) Extra code in fib_lookup().
> >
> > 3) A lot more locking in ep_poll_callback().
> >
> > The 5.18 kernel also seems to have CONFIG_DEBUG_PREEMPT set.
> > I can't find the Kconfig entry for it.
> > It doesn't exist in the old .config at all.
> > So I'm not sure why 'make oldconfig' picked it up.
> >
> > The other possibility is that the extra code is tick_nohz_idle_exit().
> > The 3.10 trace is from a non-RPS config so I can't compare it.
> >
> > I'm going to disable CONFIG_DEBUG_PREEMPT to see how much
> > difference it makes.
> > Any idea if any other debug options will have got picked up?
> 
> Do you have CONFIG_PREEMPT_DYNAMIC in your config? That was not
> available in 3.10 and apparently it pulls quite a bit of stuff, which
> in the end should be quite measurable. The preempt count alone adds
> ~7us to the above sample.

That was enabled, I've now turned it off.
But the timings from a full ftrace are dominated by ftrace itself.
So the ~7us is a massive overstatement.

I will do some timings for just ip_rcv() and probably ep_poll_callback()
to see if I can isolate the increase.
I'll need to go into the office to boot the old kernel.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply

* Re: [PATCH bpf-next v3 4/4] bpf_trace: pass array of u64 values in kprobe_multi.addrs
From: Jiri Olsa @ 2022-05-17  9:12 UTC (permalink / raw)
  To: Eugene Syromiatnikov
  Cc: Masami Hiramatsu, Steven Rostedt, Ingo Molnar, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, netdev, bpf,
	linux-kernel, Shuah Khan, linux-kselftest
In-Reply-To: <6ef675aeeea442fa8fc168cd1cb4e4e474f65a3f.1652772731.git.esyr@redhat.com>

On Tue, May 17, 2022 at 09:36:47AM +0200, Eugene Syromiatnikov wrote:
> With the interface as defined, it is impossible to pass 64-bit kernel
> addresses from a 32-bit userspace process in BPF_LINK_TYPE_KPROBE_MULTI,
> which severly limits the useability of the interface, change the ABI
> to accept an array of u64 values instead of (kernel? user?) longs.
> Interestingly, the rest of the libbpf infrastructure uses 64-bit values
> for kallsyms addresses already, so this patch also eliminates
> the sym_addr cast in tools/lib/bpf/libbpf.c:resolve_kprobe_multi_cb().

so the problem is when we have 32bit user sace on 64bit kernel right?

I think we should keep addrs as longs in uapi and have kernel to figure out
if it needs to read u32 or u64, like you did for symbols in previous patch

we'll need to fix also bpf_kprobe_multi_cookie_swap because it assumes
64bit user space pointers

would be gret if we could have selftest for this

thanks,
jirka

> 
> Fixes: 0dcac272540613d4 ("bpf: Add multi kprobe link")
> Fixes: 5117c26e877352bc ("libbpf: Add bpf_link_create support for multi kprobes")
> Fixes: ddc6b04989eb0993 ("libbpf: Add bpf_program__attach_kprobe_multi_opts function")
> Fixes: f7a11eeccb111854 ("selftests/bpf: Add kprobe_multi attach test")
> Fixes: 9271a0c7ae7a9147 ("selftests/bpf: Add attach test for bpf_program__attach_kprobe_multi_opts")
> Fixes: 2c6401c966ae1fbe ("selftests/bpf: Add kprobe_multi bpf_cookie test")
> Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
> ---
>  kernel/trace/bpf_trace.c                           | 25 ++++++++++++++++++----
>  tools/lib/bpf/bpf.h                                |  2 +-
>  tools/lib/bpf/libbpf.c                             |  8 +++----
>  tools/lib/bpf/libbpf.h                             |  2 +-
>  .../testing/selftests/bpf/prog_tests/bpf_cookie.c  |  2 +-
>  .../selftests/bpf/prog_tests/kprobe_multi_test.c   |  8 +++----
>  6 files changed, 32 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 9d3028a..30a15b3 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2454,7 +2454,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  	void __user *ucookies;
>  	unsigned long *addrs;
>  	u32 flags, cnt, size, cookies_size;
> -	void __user *uaddrs;
> +	u64 __user *uaddrs;
>  	u64 *cookies = NULL;
>  	void __user *usyms;
>  	int err;
> @@ -2486,9 +2486,26 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  		return -ENOMEM;
>  
>  	if (uaddrs) {
> -		if (copy_from_user(addrs, uaddrs, size)) {
> -			err = -EFAULT;
> -			goto error;
> +		if (sizeof(*addrs) == sizeof(*uaddrs)) {
> +			if (copy_from_user(addrs, uaddrs, size)) {
> +				err = -EFAULT;
> +				goto error;
> +			}
> +		} else {
> +			u32 i;
> +			u64 addr;
> +
> +			for (i = 0; i < cnt; i++) {
> +				if (get_user(addr, uaddrs + i)) {
> +					err = -EFAULT;
> +					goto error;
> +				}
> +				if (addr > ULONG_MAX) {
> +					err = -EINVAL;
> +					goto error;
> +				}
> +				addrs[i] = addr;
> +			}
>  		}
>  	} else {
>  		struct user_syms us;
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index 2e0d373..da9c6037 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -418,7 +418,7 @@ struct bpf_link_create_opts {
>  			__u32 flags;
>  			__u32 cnt;
>  			const char **syms;
> -			const unsigned long *addrs;
> +			const __u64 *addrs;
>  			const __u64 *cookies;
>  		} kprobe_multi;
>  		struct {
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index ef7f302..35fa9c5 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10737,7 +10737,7 @@ static bool glob_match(const char *str, const char *pat)
>  
>  struct kprobe_multi_resolve {
>  	const char *pattern;
> -	unsigned long *addrs;
> +	__u64 *addrs;
>  	size_t cap;
>  	size_t cnt;
>  };
> @@ -10752,12 +10752,12 @@ resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>  	if (!glob_match(sym_name, res->pattern))
>  		return 0;
>  
> -	err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
> +	err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(__u64),
>  				res->cnt + 1);
>  	if (err)
>  		return err;
>  
> -	res->addrs[res->cnt++] = (unsigned long) sym_addr;
> +	res->addrs[res->cnt++] = sym_addr;
>  	return 0;
>  }
>  
> @@ -10772,7 +10772,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
>  	};
>  	struct bpf_link *link = NULL;
>  	char errmsg[STRERR_BUFSIZE];
> -	const unsigned long *addrs;
> +	const __u64 *addrs;
>  	int err, link_fd, prog_fd;
>  	const __u64 *cookies;
>  	const char **syms;
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 9e9a3fd..76e171d 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -489,7 +489,7 @@ struct bpf_kprobe_multi_opts {
>  	/* array of function symbols to attach */
>  	const char **syms;
>  	/* array of function addresses to attach */
> -	const unsigned long *addrs;
> +	const __u64 *addrs;
>  	/* array of user-provided values fetchable through bpf_get_attach_cookie */
>  	const __u64 *cookies;
>  	/* number of elements in syms/addrs/cookies arrays */
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> index 83ef55e3..e843840 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> @@ -140,7 +140,7 @@ static void kprobe_multi_link_api_subtest(void)
>  	cookies[6] = 7;
>  	cookies[7] = 8;
>  
> -	opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
> +	opts.kprobe_multi.addrs = (const __u64 *) &addrs;
>  	opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
>  	opts.kprobe_multi.cookies = (const __u64 *) &cookies;
>  	prog_fd = bpf_program__fd(skel->progs.test_kprobe);
> diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> index 586dc52..7646112 100644
> --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> @@ -108,7 +108,7 @@ static void test_link_api_addrs(void)
>  	GET_ADDR("bpf_fentry_test7", addrs[6]);
>  	GET_ADDR("bpf_fentry_test8", addrs[7]);
>  
> -	opts.kprobe_multi.addrs = (const unsigned long*) addrs;
> +	opts.kprobe_multi.addrs = (const __u64 *) addrs;
>  	opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
>  	test_link_api(&opts);
>  }
> @@ -186,7 +186,7 @@ static void test_attach_api_addrs(void)
>  	GET_ADDR("bpf_fentry_test7", addrs[6]);
>  	GET_ADDR("bpf_fentry_test8", addrs[7]);
>  
> -	opts.addrs = (const unsigned long *) addrs;
> +	opts.addrs = (const __u64 *) addrs;
>  	opts.cnt = ARRAY_SIZE(addrs);
>  	test_attach_api(NULL, &opts);
>  }
> @@ -244,7 +244,7 @@ static void test_attach_api_fails(void)
>  		goto cleanup;
>  
>  	/* fail_2 - both addrs and syms set */
> -	opts.addrs = (const unsigned long *) addrs;
> +	opts.addrs = (const __u64 *) addrs;
>  	opts.syms = syms;
>  	opts.cnt = ARRAY_SIZE(syms);
>  	opts.cookies = NULL;
> @@ -258,7 +258,7 @@ static void test_attach_api_fails(void)
>  		goto cleanup;
>  
>  	/* fail_3 - pattern and addrs set */
> -	opts.addrs = (const unsigned long *) addrs;
> +	opts.addrs = (const __u64 *) addrs;
>  	opts.syms = NULL;
>  	opts.cnt = ARRAY_SIZE(syms);
>  	opts.cookies = NULL;
> -- 
> 2.1.4
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox