Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v1 3/4] net: phy: broadcom: Hook up the PTP PHY functions
From: Jonathan Lemon @ 2022-04-24  2:23 UTC (permalink / raw)
  To: f.fainelli, bcm-kernel-feedback-list, andrew, hkallweit1, linux,
	richardcochran
  Cc: netdev, kernel-team
In-Reply-To: <20220424022356.587949-1-jonathan.lemon@gmail.com>

Add 'struct bcm_ptp_private' to bcm54xx_phy_priv which points to
an optional PTP structure attached to the PHY.  This is allocated
on probe, if PHY PTP support is configured, and if the PHY has a
PTP supported by the driver.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/phy/broadcom.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index e36809aa6d30..a7722599b5f9 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -27,6 +27,11 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
+struct bcm54xx_phy_priv {
+	u64	*stats;
+	struct bcm_ptp_private *ptp;
+};
+
 static int bcm54xx_config_clock_delay(struct phy_device *phydev)
 {
 	int rc, val;
@@ -313,6 +318,14 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 		bcm_phy_write_shadow(phydev, BCM54XX_SHD_APD, val);
 }
 
+static void bcm54xx_ptp_config_init(struct phy_device *phydev)
+{
+	struct bcm54xx_phy_priv *priv = phydev->priv;
+
+	if (priv->ptp)
+		bcm_ptp_config_init(phydev);
+}
+
 static int bcm54xx_config_init(struct phy_device *phydev)
 {
 	int reg, err, val;
@@ -390,6 +403,8 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 		bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val);
 	}
 
+	bcm54xx_ptp_config_init(phydev);
+
 	return 0;
 }
 
@@ -741,10 +756,6 @@ static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev)
 	return IRQ_HANDLED;
 }
 
-struct bcm54xx_phy_priv {
-	u64	*stats;
-};
-
 static int bcm54xx_phy_probe(struct phy_device *phydev)
 {
 	struct bcm54xx_phy_priv *priv;
@@ -761,6 +772,10 @@ static int bcm54xx_phy_probe(struct phy_device *phydev)
 	if (!priv->stats)
 		return -ENOMEM;
 
+	priv->ptp = bcm_ptp_probe(phydev);
+	if (IS_ERR(priv->ptp))
+		return PTR_ERR(priv->ptp);
+
 	return 0;
 }
 
-- 
2.31.1


^ permalink raw reply related

* [PATCH net-next v1 2/4] net: phy: broadcom: Add Broadcom PTP hooks to bcm-phy-lib
From: Jonathan Lemon @ 2022-04-24  2:23 UTC (permalink / raw)
  To: f.fainelli, bcm-kernel-feedback-list, andrew, hkallweit1, linux,
	richardcochran
  Cc: netdev, kernel-team
In-Reply-To: <20220424022356.587949-1-jonathan.lemon@gmail.com>

Add the public bcm_ptp_probe() and bcm_ptp_config_init() functions
to the bcm-phy library.  The PTP functions are contained in a separate
file for clarity, and also to simplify the PTP clock dependencies.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/phy/bcm-phy-lib.c | 13 +++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  3 +++
 2 files changed, 16 insertions(+)

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 287cccf8f7f4..b9d2d1d48402 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -816,6 +816,19 @@ int bcm_phy_cable_test_get_status_rdb(struct phy_device *phydev,
 }
 EXPORT_SYMBOL_GPL(bcm_phy_cable_test_get_status_rdb);
 
+#if !IS_ENABLED(CONFIG_BCM_NET_PHYPTP)
+struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev)
+{
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_probe);
+
+void bcm_ptp_config_init(struct phy_device *phydev)
+{
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_config_init);
+#endif
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index c3842f87c33b..66fa731554a3 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -87,4 +87,7 @@ int bcm_phy_cable_test_start_rdb(struct phy_device *phydev);
 int bcm_phy_cable_test_start(struct phy_device *phydev);
 int bcm_phy_cable_test_get_status(struct phy_device *phydev, bool *finished);
 
+struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev);
+void bcm_ptp_config_init(struct phy_device *phydev);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
-- 
2.31.1


^ permalink raw reply related

* [PATCH net-next v1 1/4] net: phy: broadcom: Add PTP support for some Broadcom PHYs.
From: Jonathan Lemon @ 2022-04-24  2:23 UTC (permalink / raw)
  To: f.fainelli, bcm-kernel-feedback-list, andrew, hkallweit1, linux,
	richardcochran
  Cc: netdev, kernel-team
In-Reply-To: <20220424022356.587949-1-jonathan.lemon@gmail.com>

This adds PTP support for BCM54210E Broadcom PHYs, in particular,
the BCM54213PE, as used in the Rasperry PI CM4.  It has only been
tested on that hardware.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/phy/bcm-phy-ptp.c | 736 ++++++++++++++++++++++++++++++++++
 1 file changed, 736 insertions(+)
 create mode 100644 drivers/net/phy/bcm-phy-ptp.c

diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c
new file mode 100644
index 000000000000..64c2c96dcad4
--- /dev/null
+++ b/drivers/net/phy/bcm-phy-ptp.c
@@ -0,0 +1,736 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Meta Platforms Inc.
+ * Copyright (C) 2022 Jonathan Lemon <jonathan.lemon@gmail.com>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+#include <linux/netdevice.h>
+
+#include "bcm-phy-lib.h"
+
+/* IEEE 1588 Expansion registers */
+#define SLICE_CTRL		0x0810
+#define  SLICE_TX_EN			BIT(0)
+#define  SLICE_RX_EN			BIT(8)
+#define TX_EVENT_MODE		0x0811
+#define  MODE_TX_UPDATE_CF		BIT(0)
+#define  MODE_TX_REPLACE_TS_CF		BIT(1)
+#define  MODE_TX_REPLACE_TS		GENMASK(1, 0)
+#define RX_EVENT_MODE		0x0819
+#define  MODE_RX_UPDATE_CF		BIT(0)
+#define  MODE_RX_INSERT_TS_48		BIT(1)
+#define  MODE_RX_INSERT_TS_64		GENMASK(1, 0)
+
+#define MODE_EVT_SHIFT_SYNC		0
+#define MODE_EVT_SHIFT_DELAY_REQ	2
+#define MODE_EVT_SHIFT_PDELAY_REQ	4
+#define MODE_EVT_SHIFT_PDELAY_RESP	6
+
+#define MODE_SEL_SHIFT_PORT		0
+#define MODE_SEL_SHIFT_CPU		8
+
+#define rx_mode(sel, evt, act) \
+	(((MODE_RX_##act) << (MODE_EVT_SHIFT_##evt)) << (MODE_SEL_SHIFT_##sel))
+
+#define tx_mode(sel, evt, act) \
+	(((MODE_TX_##act) << (MODE_EVT_SHIFT_##evt)) << (MODE_SEL_SHIFT_##sel))
+
+/* needs global TS capture first */
+#define TX_TS_CAPTURE		0x0821
+#define  TX_TS_CAP_EN			BIT(0)
+#define RX_TS_CAPTURE		0x0822
+#define  RX_TS_CAP_EN			BIT(0)
+
+#define TIME_CODE_0		0x0854
+#define TIME_CODE_1		0x0855
+#define TIME_CODE_2		0x0856
+#define TIME_CODE_3		0x0857
+#define TIME_CODE_4		0x0858
+
+#define DPLL_SELECT		0x085b
+#define  DPLL_HB_MODE2			BIT(6)
+#define SHADOW_CTRL		0x085c
+#define SHADOW_LOAD		0x085d
+#define  TIME_CODE_LOAD			BIT(10)
+#define  SYNC_OUT_LOAD			BIT(9)
+#define  NCO_TIME_LOAD			BIT(7)
+#define  FREQ_LOAD			BIT(6)
+#define INTR_MASK		0x085e
+#define INTR_STATUS		0x085f
+#define  INTC_FSYNC			BIT(0)
+#define  INTC_SOP			BIT(1)
+
+#define FREQ_REG_LSB		0x0873
+#define FREQ_REG_MSB		0x0874
+
+#define NCO_TIME_0		0x0875
+#define NCO_TIME_1		0x0876
+#define NCO_TIME_2_CTRL		0x0877
+#define  FREQ_MDIO_SEL			BIT(14)
+
+#define SYNC_OUT_0		0x0878
+#define SYNC_OUT_1		0x0879
+#define SYNC_OUT_2		0x087a
+
+#define TS_READ_CTRL		0x0885
+#define  TS_READ_START			BIT(0)
+#define  TS_READ_END			BIT(1)
+
+#define TIMECODE_CTRL		0x08c3
+#define  TX_TIMECODE_SEL		GENMASK(7, 0)
+#define  RX_TIMECODE_SEL		GENMASK(15, 8)
+
+#define TS_REG_0		0x0889
+#define TS_REG_1		0x088a
+#define TS_REG_2		0x088b
+#define TS_REG_3		0x08c4
+#define TS_INFO_0		0x088c
+#define TS_INFO_1		0x088d
+
+#define HB_REG_0		0x0886
+#define HB_REG_1		0x0887
+#define HB_REG_2		0x0888
+#define HB_REG_3		0x08ec
+#define HB_REG_4		0x08ed
+#define HB_STAT_CTRL		0x088e
+#define  HB_READ_START			BIT(10)
+#define  HB_READ_END			BIT(11)
+#define  HB_READ_MASK			GENMASK(11, 10)
+
+#define NSE_CTRL		0x087f
+#define  NSE_GMODE_EN			GENMASK(15, 14)
+#define  NSE_CAPTURE_EN			BIT(13)
+#define  NSE_INIT			BIT(12)
+#define  NSE_CPU_FRAMESYNC		BIT(5)
+#define  NSE_FRAMESYNC_MASK		GENMASK(5, 2)
+#define  NSE_PEROUT_EN			BIT(1)
+#define  NSE_SYNC_OUT_MASK		GENMASK(1, 0)
+
+#define TIME_SYNC		0x0ff5
+#define  TIME_SYNC_EN			BIT(0)
+
+struct bcm_ptp_private {
+	struct phy_device *phydev;
+	struct mii_timestamper mii_ts;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_info;
+	struct mutex mutex;
+	struct sk_buff_head tx_queue;
+	int tx_type;
+	bool hwts_rx;
+	u16 nse_ctrl;
+};
+
+struct bcm_ptp_skb_cb {
+	unsigned long timeout;
+	u16 seq_id;
+	u8 msgtype;
+	bool discard;
+};
+
+struct bcm_ptp_capture {
+	ktime_t	hwtstamp;
+	u16 seq_id;
+	u8 msgtype;
+	bool tx_dir;
+};
+
+#define BCM_SKB_CB(skb)		((struct bcm_ptp_skb_cb *)(skb)->cb)
+#define SKB_TS_TIMEOUT		10			/* jiffies */
+
+#define BCM_MAX_PULSE_8NS	((1U << 9) - 1)
+#define BCM_MAX_PERIOD_8NS	((1U << 30) - 1)
+
+#define BRCM_PHY_MODEL(phydev) \
+	((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
+
+static struct bcm_ptp_private *mii2priv(struct mii_timestamper *mii_ts)
+{
+	return container_of(mii_ts, struct bcm_ptp_private, mii_ts);
+}
+
+static struct bcm_ptp_private *ptp2priv(struct ptp_clock_info *info)
+{
+	return container_of(info, struct bcm_ptp_private, ptp_info);
+}
+
+static int bcm_ptp_framesync(struct phy_device *phydev,
+			     struct ptp_system_timestamp *sts,
+			     u16 ctrl)
+{
+	u16 reg;
+	int i;
+
+	/* prep for framesync */
+	bcm_phy_write_exp(phydev, NSE_CTRL, ctrl);
+
+	ptp_read_system_prets(sts);
+
+	/* trigger framesync */
+	bcm_phy_write_exp(phydev, NSE_CTRL, ctrl | NSE_CPU_FRAMESYNC);
+
+	ptp_read_system_postts(sts);
+
+	if ((ctrl & NSE_CAPTURE_EN) == 0)
+		return 0;
+
+	/* poll for FSYNC interrupt from TS capture */
+	for (i = 0; i < 10; i++) {
+		reg = bcm_phy_read_exp(phydev, INTR_STATUS);
+		if (reg & INTC_FSYNC)
+			break;
+	}
+
+	return reg & INTC_FSYNC ? 0 : -ETIMEDOUT;
+}
+
+static int bcm_ptp_gettime_locked(struct bcm_ptp_private *priv,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct phy_device *phydev = priv->phydev;
+	u16 hb[4], ctrl;
+	int err;
+
+	ctrl = priv->nse_ctrl;
+	err = bcm_ptp_framesync(phydev, sts, ctrl | NSE_CAPTURE_EN);
+	if (err)
+		return err;
+
+	bcm_phy_write_exp(phydev, HB_STAT_CTRL, HB_READ_START);
+
+	hb[0] = bcm_phy_read_exp(phydev, HB_REG_0);
+	hb[1] = bcm_phy_read_exp(phydev, HB_REG_1);
+	hb[2] = bcm_phy_read_exp(phydev, HB_REG_2);
+	hb[3] = bcm_phy_read_exp(phydev, HB_REG_3);
+
+	bcm_phy_write_exp(phydev, HB_STAT_CTRL, HB_READ_END);
+	bcm_phy_write_exp(phydev, HB_STAT_CTRL, 0);
+
+	ts->tv_sec = (hb[3] << 16) | hb[2];
+	ts->tv_nsec = (hb[1] << 16) | hb[0];
+
+	return 0;
+}
+
+static int bcm_ptp_gettimex(struct ptp_clock_info *info,
+			    struct timespec64 *ts,
+			    struct ptp_system_timestamp *sts)
+{
+	struct bcm_ptp_private *priv = ptp2priv(info);
+	int err;
+
+	mutex_lock(&priv->mutex);
+	err = bcm_ptp_gettime_locked(priv, ts, sts);
+	mutex_unlock(&priv->mutex);
+
+	return err;
+}
+
+static int bcm_ptp_settime_locked(struct bcm_ptp_private *priv,
+				  const struct timespec64 *ts)
+{
+	struct phy_device *phydev = priv->phydev;
+	u16 ctrl;
+
+	/* set up time code */
+	bcm_phy_write_exp(phydev, TIME_CODE_0, ts->tv_nsec);
+	bcm_phy_write_exp(phydev, TIME_CODE_1, ts->tv_nsec >> 16);
+	bcm_phy_write_exp(phydev, TIME_CODE_2, ts->tv_sec);
+	bcm_phy_write_exp(phydev, TIME_CODE_3, ts->tv_sec >> 16);
+	bcm_phy_write_exp(phydev, TIME_CODE_4, ts->tv_sec >> 32);
+
+	/* zero out NCO counter */
+	bcm_phy_write_exp(phydev, NCO_TIME_0, 0);
+	bcm_phy_write_exp(phydev, NCO_TIME_1, 0);
+	bcm_phy_write_exp(phydev, NCO_TIME_2_CTRL, 0);
+
+	/* set up load on next frame sync */
+	bcm_phy_write_exp(phydev, SHADOW_LOAD, TIME_CODE_LOAD | NCO_TIME_LOAD);
+
+	ctrl = priv->nse_ctrl;
+	return bcm_ptp_framesync(phydev, NULL, ctrl | NSE_INIT);
+}
+
+static int bcm_ptp_settime(struct ptp_clock_info *info,
+			   const struct timespec64 *ts)
+{
+	struct bcm_ptp_private *priv = ptp2priv(info);
+	int err;
+
+	mutex_lock(&priv->mutex);
+	err = bcm_ptp_settime_locked(priv, ts);
+	mutex_unlock(&priv->mutex);
+
+	return err;
+}
+
+static int bcm_ptp_adjtime_locked(struct bcm_ptp_private *priv,
+				  s64 delta_ns)
+{
+	struct timespec64 ts;
+	int err;
+
+	err = bcm_ptp_gettime_locked(priv, &ts, NULL);
+	if (!err) {
+		timespec64_add_ns(&ts, delta_ns);
+		err = bcm_ptp_settime_locked(priv, &ts);
+	}
+	return err;
+}
+
+static int bcm_ptp_adjtime(struct ptp_clock_info *info, s64 delta_ns)
+{
+	struct bcm_ptp_private *priv = ptp2priv(info);
+	int err;
+
+	mutex_lock(&priv->mutex);
+	err = bcm_ptp_adjtime_locked(priv, delta_ns);
+	mutex_unlock(&priv->mutex);
+
+	return err;
+}
+
+/* A 125Mhz clock should adjust 8ns per pulse.
+ * The frequency adjustment base is 0x8000 0000, or 8*2^28.
+ *
+ * Frequency adjustment is
+ * adj = scaled_ppm * 8*2^28 / (10^6 * 2^16)
+ *   which simplifies to:
+ * adj = scaled_ppm * 2^9 / 5^6
+ */
+static int bcm_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+{
+	struct bcm_ptp_private *priv = ptp2priv(info);
+	int neg_adj = 0;
+	u32 diff, freq;
+	u64 adj;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	adj = scaled_ppm << 9;
+	diff = div_u64(adj, 15625);
+	freq = (8 << 28) + (neg_adj ? -diff : diff);
+
+	mutex_lock(&priv->mutex);
+
+	bcm_phy_write_exp(priv->phydev, FREQ_REG_LSB, freq);
+	bcm_phy_write_exp(priv->phydev, FREQ_REG_MSB, freq >> 16);
+
+	bcm_phy_write_exp(priv->phydev, NCO_TIME_2_CTRL, FREQ_MDIO_SEL);
+
+	/* load on next framesync */
+	bcm_phy_write_exp(priv->phydev, SHADOW_LOAD, FREQ_LOAD);
+
+	bcm_ptp_framesync(priv->phydev, NULL, priv->nse_ctrl);
+
+	mutex_unlock(&priv->mutex);
+
+	return 0;
+}
+
+static int bcm_ptp_perout_locked(struct bcm_ptp_private *priv,
+				 struct ptp_perout_request *req, int on)
+{
+	u64 period, pulse;
+	u16 val;
+
+	if (!on) {
+		priv->nse_ctrl &= ~NSE_SYNC_OUT_MASK;
+		bcm_phy_write_exp(priv->phydev, NSE_CTRL, priv->nse_ctrl);
+		return 0;
+	}
+
+	if (req->flags & PTP_PEROUT_PHASE)
+		return -EOPNOTSUPP;
+
+	period = ktime_to_ns(ktime_set(req->period.sec, req->period.nsec));
+	if (req->flags & PTP_PEROUT_DUTY_CYCLE)
+		pulse = ktime_to_ns(ktime_set(req->on.sec, req->on.nsec));
+	else
+		pulse = min(period / 2, (u64)BCM_MAX_PULSE_8NS << 3);
+
+	/* convert to 8ns units */
+	pulse >>= 3;
+	period >>= 3;
+
+	if (!pulse || !period)
+		return -EINVAL;
+
+	if (pulse > period)
+		return -EINVAL;
+
+	if (pulse > BCM_MAX_PULSE_8NS || period > BCM_MAX_PERIOD_8NS)
+		return -EINVAL;
+
+	bcm_phy_write_exp(priv->phydev, SYNC_OUT_0, period);
+
+	val = ((pulse & 0x3) << 14) | ((period >> 16) & 0x3fff);
+	bcm_phy_write_exp(priv->phydev, SYNC_OUT_1, val);
+
+	val = (pulse >> 2) & 0x7f;
+	bcm_phy_write_exp(priv->phydev, SYNC_OUT_2, val);
+
+	/* load values on next framesync */
+	bcm_phy_write_exp(priv->phydev, SHADOW_LOAD, SYNC_OUT_LOAD);
+
+	priv->nse_ctrl |= NSE_PEROUT_EN;
+	return bcm_ptp_framesync(priv->phydev, NULL, priv->nse_ctrl);
+}
+
+static int bcm_ptp_enable(struct ptp_clock_info *info,
+			  struct ptp_clock_request *rq, int on)
+{
+	struct bcm_ptp_private *priv = ptp2priv(info);
+	int err = 0;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PEROUT:
+		mutex_lock(&priv->mutex);
+		err = bcm_ptp_perout_locked(priv, &rq->perout, on);
+		mutex_unlock(&priv->mutex);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static bool bcm_ptp_rxtstamp(struct mii_timestamper *mii_ts,
+			     struct sk_buff *skb, int type)
+{
+	struct bcm_ptp_private *priv = mii2priv(mii_ts);
+	struct skb_shared_hwtstamps *hwts;
+	struct ptp_header *header;
+	u32 sec, nsec;
+	u8 *data;
+
+	if (!priv->hwts_rx)
+		return false;
+
+	header = ptp_parse_header(skb, type);
+	if (!header)
+		return false;
+
+	data = (u8 *)(header + 1);
+	sec = get_unaligned_be32(data);
+	nsec = get_unaligned_be32(data + 4);
+
+	hwts = skb_hwtstamps(skb);
+	hwts->hwtstamp = ktime_set(sec, nsec);
+
+	return false;
+}
+
+static bool bcm_ptp_get_tstamp(struct bcm_ptp_private *priv,
+			       struct bcm_ptp_capture *capts)
+{
+	struct phy_device *phydev = priv->phydev;
+	u16 ts[4], reg;
+	u32 sec, nsec;
+
+	mutex_lock(&priv->mutex);
+
+	reg = bcm_phy_read_exp(phydev, INTR_STATUS);
+	if ((reg & INTC_SOP) == 0) {
+		mutex_unlock(&priv->mutex);
+		return false;
+	}
+
+	bcm_phy_write_exp(phydev, TS_READ_CTRL, TS_READ_START);
+
+	ts[0] = bcm_phy_read_exp(phydev, TS_REG_0);
+	ts[1] = bcm_phy_read_exp(phydev, TS_REG_1);
+	ts[2] = bcm_phy_read_exp(phydev, TS_REG_2);
+	ts[3] = bcm_phy_read_exp(phydev, TS_REG_3);
+
+	/* not in be32 format for some reason */
+	capts->seq_id = bcm_phy_read_exp(priv->phydev, TS_INFO_0);
+
+	reg = bcm_phy_read_exp(phydev, TS_INFO_1);
+	capts->msgtype = reg >> 12;
+	capts->tx_dir = !!(reg & BIT(11));
+
+	bcm_phy_write_exp(phydev, TS_READ_CTRL, TS_READ_END);
+	bcm_phy_write_exp(phydev, TS_READ_CTRL, 0);
+
+	mutex_unlock(&priv->mutex);
+
+	sec = (ts[3] << 16) | ts[2];
+	nsec = (ts[1] << 16) | ts[0];
+	capts->hwtstamp = ktime_set(sec, nsec);
+
+	return true;
+}
+
+static void bcm_ptp_match_tstamp(struct bcm_ptp_private *priv,
+				 struct bcm_ptp_capture *capts)
+{
+	struct skb_shared_hwtstamps hwts;
+	struct sk_buff *skb, *ts_skb;
+	unsigned long flags;
+	bool first = false;
+
+	ts_skb = NULL;
+	spin_lock_irqsave(&priv->tx_queue.lock, flags);
+	skb_queue_walk(&priv->tx_queue, skb) {
+		if (BCM_SKB_CB(skb)->seq_id == capts->seq_id &&
+		    BCM_SKB_CB(skb)->msgtype == capts->msgtype) {
+			first = skb_queue_is_first(&priv->tx_queue, skb);
+			__skb_unlink(skb, &priv->tx_queue);
+			ts_skb = skb;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&priv->tx_queue.lock, flags);
+
+	/* TX captures one-step packets, discard them if needed. */
+	if (ts_skb) {
+		if (BCM_SKB_CB(ts_skb)->discard) {
+			kfree_skb(ts_skb);
+		} else {
+			memset(&hwts, 0, sizeof(hwts));
+			hwts.hwtstamp = capts->hwtstamp;
+			skb_complete_tx_timestamp(ts_skb, &hwts);
+		}
+	}
+
+	/* not first match, try and expire entries */
+	if (!first) {
+		while ((skb = skb_dequeue(&priv->tx_queue))) {
+			if (!time_after(jiffies, BCM_SKB_CB(skb)->timeout)) {
+				skb_queue_head(&priv->tx_queue, skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+	}
+}
+
+static long bcm_ptp_do_aux_work(struct ptp_clock_info *info)
+{
+	struct bcm_ptp_private *priv = ptp2priv(info);
+	struct bcm_ptp_capture capts;
+	bool reschedule = false;
+
+	while (!skb_queue_empty_lockless(&priv->tx_queue)) {
+		if (!bcm_ptp_get_tstamp(priv, &capts)) {
+			reschedule = true;
+			break;
+		}
+		bcm_ptp_match_tstamp(priv, &capts);
+	}
+
+	return reschedule ? 1 : -1;
+}
+
+static const struct ptp_clock_info bcm_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= KBUILD_MODNAME,
+	.max_adj	= 100000000,
+	.gettimex64	= bcm_ptp_gettimex,
+	.settime64	= bcm_ptp_settime,
+	.adjtime	= bcm_ptp_adjtime,
+	.adjfine	= bcm_ptp_adjfine,
+	.enable		= bcm_ptp_enable,
+	.do_aux_work	= bcm_ptp_do_aux_work,
+	.n_per_out	= 1,
+};
+
+static void bcm_ptp_txtstamp(struct mii_timestamper *mii_ts,
+			     struct sk_buff *skb, int type)
+{
+	struct bcm_ptp_private *priv = mii2priv(mii_ts);
+	struct ptp_header *hdr;
+	bool discard = false;
+	int msgtype;
+
+	hdr = ptp_parse_header(skb, type);
+	if (!hdr)
+		goto out;
+	msgtype = ptp_get_msgtype(hdr, type);
+
+	switch (priv->tx_type) {
+	case HWTSTAMP_TX_ONESTEP_P2P:
+		if (msgtype == PTP_MSGTYPE_PDELAY_RESP)
+			discard = true;
+		fallthrough;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		if (msgtype == PTP_MSGTYPE_SYNC)
+			discard = true;
+		fallthrough;
+	case HWTSTAMP_TX_ON:
+		BCM_SKB_CB(skb)->timeout = jiffies + SKB_TS_TIMEOUT;
+		BCM_SKB_CB(skb)->seq_id = be16_to_cpu(hdr->sequence_id);
+		BCM_SKB_CB(skb)->msgtype = msgtype;
+		BCM_SKB_CB(skb)->discard = discard;
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		skb_queue_tail(&priv->tx_queue, skb);
+		ptp_schedule_worker(priv->ptp_clock, 0);
+		return;
+	default:
+		break;
+	}
+
+out:
+	kfree_skb(skb);
+}
+
+static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
+			    struct ifreq *ifr)
+{
+	struct bcm_ptp_private *priv = mii2priv(mii_ts);
+	struct hwtstamp_config cfg;
+	u16 mode, ctrl;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		priv->hwts_rx = false;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		priv->hwts_rx = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	priv->tx_type = cfg.tx_type;
+
+	ctrl  = priv->hwts_rx ? SLICE_RX_EN : 0;
+	ctrl |= priv->tx_type != HWTSTAMP_TX_OFF ? SLICE_TX_EN : 0;
+
+	mode = tx_mode(PORT, SYNC, REPLACE_TS) |
+	       tx_mode(PORT, DELAY_REQ, REPLACE_TS) |
+	       tx_mode(PORT, PDELAY_REQ, REPLACE_TS) |
+	       tx_mode(PORT, PDELAY_RESP, REPLACE_TS);
+
+	bcm_phy_write_exp(priv->phydev, TX_EVENT_MODE, mode);
+
+	mode = rx_mode(PORT, SYNC, INSERT_TS_64) |
+	       rx_mode(PORT, DELAY_REQ, INSERT_TS_64) |
+	       rx_mode(PORT, PDELAY_REQ, INSERT_TS_64) |
+	       rx_mode(PORT, PDELAY_RESP, INSERT_TS_64);
+
+	bcm_phy_write_exp(priv->phydev, RX_EVENT_MODE, mode);
+
+	bcm_phy_write_exp(priv->phydev, SLICE_CTRL, ctrl);
+
+	if (ctrl & SLICE_TX_EN)
+		bcm_phy_write_exp(priv->phydev, TX_TS_CAPTURE, TX_TS_CAP_EN);
+	else
+		ptp_cancel_worker_sync(priv->ptp_clock);
+
+	/* purge existing data */
+	skb_queue_purge(&priv->tx_queue);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts,
+			   struct ethtool_ts_info *ts_info)
+{
+	struct bcm_ptp_private *priv = mii2priv(mii_ts);
+
+	ts_info->phc_index = ptp_clock_index(priv->ptp_clock);
+	ts_info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	ts_info->tx_types =
+		BIT(HWTSTAMP_TX_ON) |
+		BIT(HWTSTAMP_TX_OFF) |
+		BIT(HWTSTAMP_TX_ONESTEP_SYNC) |
+		BIT(HWTSTAMP_TX_ONESTEP_P2P);
+	ts_info->rx_filters =
+		BIT(HWTSTAMP_FILTER_NONE) |
+		BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+		BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
+
+	return 0;
+}
+
+void bcm_ptp_config_init(struct phy_device *phydev)
+{
+	/* init network sync engine */
+	bcm_phy_write_exp(phydev, NSE_CTRL, NSE_GMODE_EN | NSE_INIT);
+
+	/* enable time sync (TX/RX SOP capture) */
+	bcm_phy_write_exp(phydev, TIME_SYNC, TIME_SYNC_EN);
+
+	/* use sec.nsec heartbeat capture */
+	bcm_phy_write_exp(phydev, DPLL_SELECT, DPLL_HB_MODE2);
+
+	/* use 64 bit timecode for in TX */
+	bcm_phy_write_exp(phydev, TIMECODE_CTRL, TX_TIMECODE_SEL);
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_config_init);
+
+static void bcm_ptp_init(struct bcm_ptp_private *priv)
+{
+	priv->nse_ctrl = NSE_GMODE_EN;
+
+	mutex_init(&priv->mutex);
+	skb_queue_head_init(&priv->tx_queue);
+
+	priv->mii_ts.rxtstamp = bcm_ptp_rxtstamp;
+	priv->mii_ts.txtstamp = bcm_ptp_txtstamp;
+	priv->mii_ts.hwtstamp = bcm_ptp_hwtstamp;
+	priv->mii_ts.ts_info = bcm_ptp_ts_info;
+
+	priv->phydev->mii_ts = &priv->mii_ts;
+}
+
+struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev)
+{
+	struct bcm_ptp_private *priv;
+	struct ptp_clock *clock;
+
+	switch (BRCM_PHY_MODEL(phydev)) {
+	case PHY_ID_BCM54210E:
+#ifdef PHY_ID_BCM54213PE
+	case PHY_ID_BCM54213PE:
+#endif
+		break;
+	default:
+		return NULL;
+	}
+
+	priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	priv->ptp_info = bcm_ptp_clock_info;
+
+	clock = ptp_clock_register(&priv->ptp_info, &phydev->mdio.dev);
+	if (IS_ERR(clock))
+		return (void *)clock;
+	priv->ptp_clock = clock;
+
+	priv->phydev = phydev;
+	bcm_ptp_init(priv);
+
+	return priv;
+}
+EXPORT_SYMBOL_GPL(bcm_ptp_probe);
+
+MODULE_LICENSE("GPL");
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3] brcmfmac: of: introduce new property to allow disable PNO
From: Hermes Zhang @ 2022-04-24  2:22 UTC (permalink / raw)
  To: Arend van Spriel, Franky Lin, Hante Meuleman, Kalle Valo,
	David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: kernel, Hermes Zhang, linux-wireless, brcm80211-dev-list.pdl,
	SHA-cyfmac-dev-list, netdev, linux-kernel

From: Hermes Zhang <chenhuiz@axis.com>

Some versions of the Broadcom firmware for this chip seem to hang
if the PNO feature is enabled when connecting to a dummy or
non-existent AP.
Add a new property to allow the disabling of PNO for devices with
this specific firmware.

Signed-off-by: Hermes Zhang <chenhuiz@axis.com>
---

Notes:
    Comments update

 drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index 8623bde5eb70..121a195e4054 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -11,6 +11,7 @@
 #include "core.h"
 #include "common.h"
 #include "of.h"
+#include "feature.h"
 
 static int brcmf_of_get_country_codes(struct device *dev,
 				      struct brcmf_mp_device *settings)
@@ -102,6 +103,9 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
 	if (bus_type != BRCMF_BUSTYPE_SDIO)
 		return;
 
+	if (of_find_property(np, "brcm,pno-disable", NULL))
+		settings->feature_disable |= BIT(BRCMF_FEAT_PNO);
+
 	if (of_property_read_u32(np, "brcm,drive-strength", &val) == 0)
 		sdio->drive_strength = val;
 
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH iproute2-next 1/3] libbpf: Use bpf_object__load instead of bpf_object__load_xattr
From: Hangbin Liu @ 2022-04-24  1:56 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, stephen, toke, Paul Chaignon
In-Reply-To: <20220423152300.16201-2-dsahern@kernel.org>

Hi David,

This patch revert c04e45d0 lib/bpf: fix verbose flag when using libbpf,
Should we set prog->log_level directly before it loaded, like
bpf_program__set_log_level() does?

Thanks
Hangbin
On Sat, Apr 23, 2022 at 09:22:58AM -0600, David Ahern wrote:
> bpf_object__load_xattr is deprecated as of v0.8+; remove it
> in favor of bpf_object__load.
> 
> Signed-off-by: David Ahern <dsahern@kernel.org>
> ---
>  lib/bpf_libbpf.c | 7 +------
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/lib/bpf_libbpf.c b/lib/bpf_libbpf.c
> index f4f98caa1e58..f723f6310c28 100644
> --- a/lib/bpf_libbpf.c
> +++ b/lib/bpf_libbpf.c
> @@ -248,7 +248,6 @@ static int handle_legacy_maps(struct bpf_object *obj)
>  
>  static int load_bpf_object(struct bpf_cfg_in *cfg)
>  {
> -	struct bpf_object_load_attr attr = {};
>  	struct bpf_program *p, *prog = NULL;
>  	struct bpf_object *obj;
>  	char root_path[PATH_MAX];
> @@ -305,11 +304,7 @@ static int load_bpf_object(struct bpf_cfg_in *cfg)
>  	if (ret)
>  		goto unload_obj;
>  
> -	attr.obj = obj;
> -	if (cfg->verbose)
> -		attr.log_level = 2;
> -
> -	ret = bpf_object__load_xattr(&attr);
> +	ret = bpf_object__load(obj);
>  	if (ret)
>  		goto unload_obj;
>  
> -- 
> 2.24.3 (Apple Git-128)
> 


^ permalink raw reply

* Re: [PATCH net-next v2 1/2] rtnetlink: add extack support in fdb del handlers
From: Nikolay Aleksandrov @ 2022-04-23 23:00 UTC (permalink / raw)
  To: Alaa Mohamed, netdev; +Cc: outreachy, roopa, roopa.prabhu, jdenham, sbrivio
In-Reply-To: <6a77eca533b7048b85bf0ffe0c3904d36045c320.1650754231.git.eng.alaamohamedsoliman.am@gmail.com>

On 4/24/22 01:54, Alaa Mohamed wrote:
> Add extack support to .ndo_fdb_del in netdevice.h and
> all related methods.
> 
> Signed-off-by: Alaa Mohamed <eng.alaamohamedsoliman.am@gmail.com>
> ---

Please CC all patch-related maintainers next time. One comment below.

>   drivers/net/ethernet/intel/ice/ice_main.c        | 3 +--
>   drivers/net/ethernet/mscc/ocelot_net.c           | 4 ++--
>   drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +-
>   drivers/net/macvlan.c                            | 2 +-
>   drivers/net/vxlan/vxlan_core.c                   | 2 +-
>   include/linux/netdevice.h                        | 2 +-
>   net/bridge/br_fdb.c                              | 2 +-
>   net/bridge/br_private.h                          | 2 +-
>   net/core/rtnetlink.c                             | 4 ++--
>   9 files changed, 11 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index d768925785ca..5f9cb4830956 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -5678,10 +5678,9 @@ ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
>   static int
>   ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
>   	    struct net_device *dev, const unsigned char *addr,
> -	    __always_unused u16 vid)
> +	    __always_unused u16 vid, struct netlink_ext_ack *extack)
>   {
>   	int err;
> -

I don't think you should remove this new line.

>   	if (ndm->ndm_state & NUD_PERMANENT) {
>   		netdev_err(dev, "FDB only supports static addresses\n");
>   		return -EINVAL;

^ permalink raw reply

* net: stmmac: dwmac-imx: half duplex crash
From: Marcel Ziswiler @ 2022-04-23 22:58 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: alexandre.torgue@foss.st.com, davem@davemloft.net,
	kernel@pengutronix.de, linux-imx@nxp.com,
	linux-stm32@st-md-mailman.stormreply.com, festevam@gmail.com,
	linux-kernel@vger.kernel.org, shawnguo@kernel.org,
	linux-arm-kernel@lists.infradead.org, s.hauer@pengutronix.de,
	mcoquelin.stm32@gmail.com, pabeni@redhat.com,
	peppe.cavallaro@st.com, joabreu@synopsys.com, kuba@kernel.org

Hi there

We lately tried operating the IMX8MPEVK ENET_QOS imx-dwmac driver in half-duplex modes which crashes as
follows:

root@imx8mpevk:~# uname -a
Linux imx8mpevk 5.18.0-rc2-next-20220413-00006-gc741306ff2ed #4 SMP PREEMPT Wed Apr 13 15:08:36 CEST 2022
aarch64 aarch64 aarch64 GNU/Linux
root@imx8mpevk:~# ethtool -s eth1 advertise 0x004
[  469.685304] imx-dwmac 30bf0000.ethernet eth1: Link is Down
[  469.703528] kauditd_printk_skb: 1 callbacks suppressed
[  469.703539] audit: type=1334 audit(1650754238.319:23): prog-id=17 op=LOAD
[  469.715602] audit: type=1334 audit(1650754238.327:24): prog-id=18 op=LOAD
[  472.737884] imx-dwmac 30bf0000.ethernet eth1: Link is Up - 100Mbps/Half - flow control off
[  472.746205] IPv6: ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready
[  478.080481] ------------[ cut here ]------------
[  478.085134] NETDEV WATCHDOG: eth1 (imx-dwmac): transmit queue 1 timed out
[  478.091985] WARNING: CPU: 3 PID: 0 at net/sched/sch_generic.c:529 dev_watchdog+0x200/0x210
[  478.100269] Modules linked in: 8021q garp mrp stp llc overlay bluetooth ecdh_generic ecc rfkill caam_jr
caamhash_desc caamalg_desc crypto_engine rng_core authenc libdes dwmac_imx stmmac_platform imx_sdma
crct10dif_ce fsl_imx8_ddr_perf stmmac pcs_xpcs etnaviv gpu_sched flexcan caam snvs_pwrkey error can_dev
rtc_snvs imx_cpufreq_dt imx8mm_thermal fuse drm ipv6
[  478.132142] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.18.0-rc2-next-20220413-00006-gc741306ff2ed #4
[  478.141364] Hardware name: NXP i.MX8MPlus EVK board (DT)
[  478.146676] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[  478.153644] pc : dev_watchdog+0x200/0x210
[  478.157662] lr : dev_watchdog+0x200/0x210
[  478.161680] sp : ffff80000a3b3a70
[  478.164992] x29: ffff80000a3b3a70 x28: 0000000000000005 x27: ffff800008e57600
[  478.172140] x26: ffff800009ef79c0 x25: ffff00017f3c7fe8 x24: ffff80000a3b3b40
[  478.179283] x23: ffff800009ef7000 x22: 0000000000000001 x21: ffff0000c4cc039c
[  478.186428] x20: ffff0000c4cc0000 x19: ffff0000c4cc0448 x18: 0000000000000030
[  478.193571] x17: ffff800175a13000 x16: ffff80000a2e4000 x15: ffffffffffffffff
[  478.200713] x14: ffff800009f12388 x13: 00000000000004ec x12: 00000000000001a4
[  478.207860] x11: 712074696d736e61 x10: ffff800009f6a388 x9 : 00000000fffff000
[  478.215003] x8 : ffff800009f12388 x7 : 0000000000000003 x6 : 0000000000000000
[  478.222146] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
[  478.229294] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c01a0000
[  478.236439] Call trace:
[  478.238886]  dev_watchdog+0x200/0x210
[  478.242556]  call_timer_fn.constprop.0+0x24/0x80
[  478.247182]  __run_timers.part.0+0x1f4/0x23c
[  478.251454]  run_timer_softirq+0x3c/0x7c
[  478.255384]  __do_softirq+0x124/0x2a0
[  478.259047]  __irq_exit_rcu+0xe4/0x100
[  478.262804]  irq_exit_rcu+0x10/0x1c
[  478.266300]  el1_interrupt+0x38/0x70
[  478.269881]  el1h_64_irq_handler+0x18/0x24
[  478.273988]  el1h_64_irq+0x64/0x68
[  478.277393]  arch_cpu_idle+0x18/0x2c
[  478.280969]  default_idle_call+0x24/0x6c
[  478.284900]  do_idle+0x22c/0x29c
[  478.288133]  cpu_startup_entry+0x28/0x30
[  478.292065]  secondary_start_kernel+0x140/0x164
[  478.296600]  __secondary_switched+0xa0/0xa4
[  478.300789] ---[ end trace 0000000000000000 ]---
[  478.305451] imx-dwmac 30bf0000.ethernet eth1: Reset adapter.
[  478.332901] imx-dwmac 30bf0000.ethernet eth1: FPE workqueue stop
[  478.339233] imx-dwmac 30bf0000.ethernet eth1: Timeout accessing MAC_VLAN_Tag_Filter
[  478.346962] imx-dwmac 30bf0000.ethernet eth1: failed to kill vid 0081/0
[  478.556494] imx-dwmac 30bf0000.ethernet eth1: PHY [stmmac-1:01] driver [RTL8211F Gigabit Ethernet]
(irq=POLL)
[  478.736560] imx-dwmac 30bf0000.ethernet eth1: Register MEM_TYPE_PAGE_POOL RxQ-0
[  478.744388] imx-dwmac 30bf0000.ethernet eth1: Register MEM_TYPE_PAGE_POOL RxQ-1
[  478.752222] imx-dwmac 30bf0000.ethernet eth1: Register MEM_TYPE_PAGE_POOL RxQ-2
[  478.760126] imx-dwmac 30bf0000.ethernet eth1: Register MEM_TYPE_PAGE_POOL RxQ-3
[  478.767951] imx-dwmac 30bf0000.ethernet eth1: Register MEM_TYPE_PAGE_POOL RxQ-4
[  478.784520] imx-dwmac 30bf0000.ethernet eth1: No Safety Features support found
[  478.791787] imx-dwmac 30bf0000.ethernet eth1: IEEE 1588-2008 Advanced Timestamp supported
[  478.800227] imx-dwmac 30bf0000.ethernet eth1: registered PTP clock
[  478.806610] imx-dwmac 30bf0000.ethernet eth1: FPE workqueue start
[  478.812774] imx-dwmac 30bf0000.ethernet eth1: configuring for phy/rgmii-id link mode
[  478.848739] 8021q: adding VLAN 0 to HW filter on device eth1

Does anybody have any experience in running dwmac in half-duplex mode? Any suggestions?

BTW: It also crashes the same way running NXP's latest downstream LF5.15.5_1.0.0 which I reported here [1].

[1]
https://community.nxp.com/t5/i-MX-Processors/IMX8MPEVK-ENET-QOS-imx-dwmac-Half-Duplex-Crashes/m-p/1448085#M189597

Cheers

Marcel

^ permalink raw reply

* [PATCH net-next v2 2/2] net: vxlan: vxlan_core.c: Add extack support to vxlan_fdb_delete
From: Alaa Mohamed @ 2022-04-23 22:54 UTC (permalink / raw)
  To: netdev
  Cc: outreachy, roopa, roopa.prabhu, jdenham, sbrivio,
	eng.alaamohamedsoliman.am
In-Reply-To: <cover.1650754228.git.eng.alaamohamedsoliman.am@gmail.com>

Add extack to vxlan_fdb_delete and vxlan_fdb_parse

Signed-off-by: Alaa Mohamed <eng.alaamohamedsoliman.am@gmail.com>
---
changes in V2:
	- fix spelling vxlan_fdb_delete
	- add missing braces
	- edit error message
---
 drivers/net/vxlan/vxlan_core.c | 36 +++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index cf2f60037340..4e1886655101 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1129,19 +1129,23 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 			   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
-			   __be32 *vni, u32 *ifindex, u32 *nhid)
+			   __be32 *vni, u32 *ifindex, u32 *nhid, struct netlink_ext_ack *extack)
 {
 	struct net *net = dev_net(vxlan->dev);
 	int err;
 
 	if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
-	    tb[NDA_PORT]))
-		return -EINVAL;
+	    tb[NDA_PORT])){
+			NL_SET_ERR_MSG(extack, "DST, VNI, ifindex and port are mutually exclusive with NH_ID");
+			return -EINVAL;
+		}
 
 	if (tb[NDA_DST]) {
 		err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
-		if (err)
+		if (err){
+			NL_SET_ERR_MSG(extack, "Unsupported address family");
 			return err;
+		}
 	} else {
 		union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
 
@@ -1157,24 +1161,30 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 	}
 
 	if (tb[NDA_PORT]) {
-		if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
+		if (nla_len(tb[NDA_PORT]) != sizeof(__be16)){
+			NL_SET_ERR_MSG(extack, "Invalid vxlan port");
 			return -EINVAL;
+		}
 		*port = nla_get_be16(tb[NDA_PORT]);
 	} else {
 		*port = vxlan->cfg.dst_port;
 	}
 
 	if (tb[NDA_VNI]) {
-		if (nla_len(tb[NDA_VNI]) != sizeof(u32))
+		if (nla_len(tb[NDA_VNI]) != sizeof(u32)){
+			NL_SET_ERR_MSG(extack, "Invalid vni");
 			return -EINVAL;
+		}	
 		*vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
 	} else {
 		*vni = vxlan->default_dst.remote_vni;
 	}
 
 	if (tb[NDA_SRC_VNI]) {
-		if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32))
+		if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32)){
+			NL_SET_ERR_MSG(extack, "Invalid src vni");
 			return -EINVAL;
+		}
 		*src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
 	} else {
 		*src_vni = vxlan->default_dst.remote_vni;
@@ -1183,12 +1193,16 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 	if (tb[NDA_IFINDEX]) {
 		struct net_device *tdev;
 
-		if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
+		if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)){
+			NL_SET_ERR_MSG(extack, "Invalid ifindex");
 			return -EINVAL;
+		}
 		*ifindex = nla_get_u32(tb[NDA_IFINDEX]);
 		tdev = __dev_get_by_index(net, *ifindex);
-		if (!tdev)
+		if (!tdev){
+			NL_SET_ERR_MSG(extack,"Device not found");
 			return -EADDRNOTAVAIL;
+		}
 	} else {
 		*ifindex = 0;
 	}
@@ -1226,7 +1240,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 
 	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
-			      &nhid);
+			      &nhid, extack);
 	if (err)
 		return err;
 
@@ -1291,7 +1305,7 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 	int err;
 
 	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
-			      &nhid);
+			      &nhid, extack);
 	if (err)
 		return err;
 
-- 
2.36.0


^ permalink raw reply related

* [PATCH net-next v2 1/2] rtnetlink: add extack support in fdb del handlers
From: Alaa Mohamed @ 2022-04-23 22:54 UTC (permalink / raw)
  To: netdev
  Cc: outreachy, roopa, roopa.prabhu, jdenham, sbrivio,
	eng.alaamohamedsoliman.am
In-Reply-To: <cover.1650754228.git.eng.alaamohamedsoliman.am@gmail.com>

Add extack support to .ndo_fdb_del in netdevice.h and
all related methods.

Signed-off-by: Alaa Mohamed <eng.alaamohamedsoliman.am@gmail.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c        | 3 +--
 drivers/net/ethernet/mscc/ocelot_net.c           | 4 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +-
 drivers/net/macvlan.c                            | 2 +-
 drivers/net/vxlan/vxlan_core.c                   | 2 +-
 include/linux/netdevice.h                        | 2 +-
 net/bridge/br_fdb.c                              | 2 +-
 net/bridge/br_private.h                          | 2 +-
 net/core/rtnetlink.c                             | 4 ++--
 9 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d768925785ca..5f9cb4830956 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5678,10 +5678,9 @@ ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
 static int
 ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
 	    struct net_device *dev, const unsigned char *addr,
-	    __always_unused u16 vid)
+	    __always_unused u16 vid, struct netlink_ext_ack *extack)
 {
 	int err;
-
 	if (ndm->ndm_state & NUD_PERMANENT) {
 		netdev_err(dev, "FDB only supports static addresses\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 247bc105bdd2..e07c64e3159c 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -774,14 +774,14 @@ static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 static int ocelot_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			       struct net_device *dev,
-			       const unsigned char *addr, u16 vid)
+			       const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
 	int port = priv->chip_port;
 
-	return ocelot_fdb_del(ocelot, port, addr, vid, ocelot_port->bridge);
+	return ocelot_fdb_del(ocelot, port, addr, vid, ocelot_port->bridge, extack);
 }
 
 static int ocelot_port_fdb_dump(struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index d320567b2cca..51fa23418f6a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -368,7 +368,7 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p)
 
 static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			struct net_device *netdev,
-			const unsigned char *addr, u16 vid)
+			const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = -EOPNOTSUPP;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 069e8824c264..ffd34d9f7049 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1017,7 +1017,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
-			   const unsigned char *addr, u16 vid)
+			   const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	int err = -EINVAL;
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index de97ff98d36e..cf2f60037340 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1280,7 +1280,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
 /* Delete entry (via netlink) */
 static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 			    struct net_device *dev,
-			    const unsigned char *addr, u16 vid)
+			    const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	union vxlan_addr ip;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28ea4f8269d4..d0d2a8f33c73 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1509,7 +1509,7 @@ struct net_device_ops {
 					       struct nlattr *tb[],
 					       struct net_device *dev,
 					       const unsigned char *addr,
-					       u16 vid);
+					       u16 vid, struct netlink_ext_ack *extack);
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
 						struct netlink_callback *cb,
 						struct net_device *dev,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ccda68bd473..5bfce2e9a553 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1110,7 +1110,7 @@ static int __br_fdb_delete(struct net_bridge *br,
 /* Remove neighbor entry with RTM_DELNEIGH */
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		  struct net_device *dev,
-		  const unsigned char *addr, u16 vid)
+		  const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p = NULL;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 18ccc3d5d296..95348c1c9ce5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -780,7 +780,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		   const unsigned char *addr, u16 vid, unsigned long flags);
 
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
-		  struct net_device *dev, const unsigned char *addr, u16 vid);
+		  struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack);
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
 	       const unsigned char *addr, u16 vid, u16 nlh_flags,
 	       struct netlink_ext_ack *extack);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4041b3e2e8ec..99b30ae58a47 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4223,7 +4223,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
 		if (ops->ndo_fdb_del)
-			err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+			err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
 
 		if (err)
 			goto out;
@@ -4235,7 +4235,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (ndm->ndm_flags & NTF_SELF) {
 		if (dev->netdev_ops->ndo_fdb_del)
 			err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
-							   vid);
+							   vid, extack);
 		else
 			err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
 
-- 
2.36.0


^ permalink raw reply related

* [PATCH net-next v2 0/2] propagate extack to vxlan_fdb_delete
From: Alaa Mohamed @ 2022-04-23 22:54 UTC (permalink / raw)
  To: netdev
  Cc: outreachy, roopa, roopa.prabhu, jdenham, sbrivio,
	eng.alaamohamedsoliman.am

In order to propagate extack to vxlan_fdb_delete and vxlan_fdb_parse,
add extack to .ndo_fdb_del and edit all fdb del handelers

Alaa Mohamed (2):
  rtnetlink: add extack support in fdb del handlers
  net: vxlan: vxlan_core.c: Add extack support to vxlan_fdb_delete

 drivers/net/ethernet/intel/ice/ice_main.c     |  3 +-
 drivers/net/ethernet/mscc/ocelot_net.c        |  4 +-
 .../net/ethernet/qlogic/qlcnic/qlcnic_main.c  |  2 +-
 drivers/net/macvlan.c                         |  2 +-
 drivers/net/vxlan/vxlan_core.c                | 38 +++++++++++++------
 include/linux/netdevice.h                     |  2 +-
 net/bridge/br_fdb.c                           |  2 +-
 net/bridge/br_private.h                       |  2 +-
 net/core/rtnetlink.c                          |  4 +-
 9 files changed, 36 insertions(+), 23 deletions(-)

-- 
2.36.0


^ permalink raw reply

* Re: [PATCH net] sctp: check asoc strreset_chunk in sctp_generate_reconf_event
From: patchwork-bot+netdevbpf @ 2022-04-23 21:40 UTC (permalink / raw)
  To: Xin Long; +Cc: netdev, linux-sctp, davem, kuba, marcelo.leitner, nhorman
In-Reply-To: <3000f8b12920ae81b84dceead6dcc90bb00c0403.1650487961.git.lucien.xin@gmail.com>

Hello:

This patch was applied to netdev/net.git (master)
by David S. Miller <davem@davemloft.net>:

On Wed, 20 Apr 2022 16:52:41 -0400 you wrote:
> A null pointer reference issue can be triggered when the response of a
> stream reconf request arrives after the timer is triggered, such as:
> 
>   send Incoming SSN Reset Request --->
>   CPU0:
>    reconf timer is triggered,
>    go to the handler code before hold sk lock
>                             <--- reply with Outgoing SSN Reset Request
>   CPU1:
>    process Outgoing SSN Reset Request,
>    and set asoc->strreset_chunk to NULL
>   CPU0:
>    continue the handler code, hold sk lock,
>    and try to hold asoc->strreset_chunk, crash!
> 
> [...]

Here is the summary with links:
  - [net] sctp: check asoc strreset_chunk in sctp_generate_reconf_event
    https://git.kernel.org/netdev/net/c/165e3e17fe8f

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH iproute2-next 0/3] Address more libbpf deprecations
From: Toke Høiland-Jørgensen @ 2022-04-23 21:35 UTC (permalink / raw)
  To: David Ahern, netdev; +Cc: stephen, haliu, David Ahern
In-Reply-To: <20220423152300.16201-1-dsahern@kernel.org>

David Ahern <dsahern@kernel.org> writes:

> Another round of changes to handle libbpf deprecations. Compiles are
> clean as of libbpf commit 533c7666eb72 ("Fix downloads formats").
>
> David Ahern (3):
>   libbpf: Use bpf_object__load instead of bpf_object__load_xattr
>   libbpf: Remove use of bpf_program__set_priv and bpf_program__priv
>   libbpf: Remove use of bpf_map_is_offload_neutral
>
>  lib/bpf_libbpf.c | 30 +++++++++++++++++-------------
>  1 file changed, 17 insertions(+), 13 deletions(-)

For the series:

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>


^ permalink raw reply

* Zero-Day bug in VLAN offloading + cooked AF_PACKET
From: alexandre.ferrieux @ 2022-04-23 20:02 UTC (permalink / raw)
  To: netdev

Hi,

I know the subject sounds like this belongs in libpcap bug reports; indeed it 
started there [1]. However, after some digging, it really looks like there's an 
issue in what the kernel itself provides.

TL;DR: outgoing VLAN-tagged traffic to non-offloaded interfaces is captured as 
corrupted in cooked mode, and has been so since at least 3.4...

One popular way of doing captures with libpcap-based tools like tcpdump, is the 
so-called "cooked mode". This is what you get with "tcpdump -i any". The kernel 
API used for this, documented in packet(7), is a socket of family AF_PACKET and 
protocol level SOCK_DGRAM. Contrarily to SOCK_RAW, SOCK_DGRAM provides a kind of 
"near L3" abstraction, stripping most of the L2 headers from the original 
packets. For example, when using recvmsg(),

  - the .msg_iov (main payload) of the recvmsg() is the packet starting at the 
L3 header
  - the .msg_name (aka "address") is a sockaddr_ll structure containing some L2 
information: ethertype, source MAC address.
  - the .msg_control (aka metadata, activated with PACKET_AUXDATA sockopt) may 
contain VLAN information: TCI, TPID.

All this works beautifully most of the time, with or without VLAN tags, as the 
ethertype is correctly extracted and conveyed in the sockaddr_ll. This allows 
any consumer of the L3 frame to decode it properly, knowing exactly wich L3 it's 
looking at.

However, there's a catch: for outgoing packets, *if* the interface has no 
hardware VLAN offloading, the ethertype gets overwritten by ... the TPID 
(0x8100). As a result, a consumer of the L3 frame has absolutely no way to 
recover its type.

As a demo, here is what the venerable "tcpdump -i any" says of an outgoing ARP 
packet on VLAN interface eth0.24, after VLAN offloading has been disabled via 
"ethtool -K". Two lines are generated, as the packet is seen on both eth0.24 
(first line) and eth0 (second line):

  15:06:37.681328 ARP, Request who-has 1.0.24.3 tell 1.0.24.1, length 28
  15:06:37.681336 ethertype IPv4, IP0

The first line is correct, as the frame is captured before handling by the 8021q 
module. The second is not !!

This is the result of the ethertype being overwritten. The actual value is 
0x8100, which tcpdump decodes as a 802.1Q TPID, thus shifting the L3 beginning 
by 4 bytes, ending up seeing a nonsensical "IPv0" frame.

To prove that this is *not* an issue in libpcap or tcpdump, here are the three 
aforementioned pieces of the packet, gotten by a simple test program doing 
recvmsg() on an AF_PACKET+SOCK_DGRAM capture socket:

  On VLAN interface eth0.24: (the "^^^^" show the ethertype's position)
  --------------------------

   - metadata:     107:8:010000001c0000001c0000000000000000000000
   - sockaddr_ll:  1100080606000000010004060025903285a70000
                       ^^^^
   - L3 frame:     00010800060400010025903285a70100180100000000000001001803

  On parent interface eth0:
  -------------------------

   - metadata:     107:8:010000001c0000001c0000000000000000000000
   - sockaddr_ll:  1100810004000000010004060025903285a70000
                       ^^^^
   - L3 frame:     00010800060400010025903285a70100180100000000000001001803

As is clear above, the second instance contains no trace of the original ARP 
ethertype 0x0806.

By contrast, if we re-enable VLAN offloading,

    - the first instance (on subinterface) is unchanged
    - the second instance (on parent interface) is back to normal, with a 
correct ARP ethertype (^^^^=0806) *and* VLAN info in the metadata (TCI-TPID, 
byte-swapped =1800,0081):

  On parent interface eth0:
  -------------------------

   - metadata:     107:8:510000001c0000001c0000000000000018000081
                                                         TCI-TPID
   - sockaddr_ll:  1100080604000000010004060025903285a70000
                       ^^^^
   - L3 frame:     00010800060400010025903285a70100180100000000000001001803

And sure enough, tcpdump is happy again:

  21:44:18.481331 ARP, Request who-has 1.0.24.3 tell 1.0.24.1, length 28
  21:44:18.481338 ethertype ARP, ARP, Request who-has 1.0.24.3 tell 1.0.24.1, 
length 28

I have found this bug active on an old machine with kernel 3.4.
In the URL below you'll find more details on ftrace-based evidence, hinting at 
the 8021q module.
However, I am *not* familiar enough with the Linux network stack (and special 
cases like offloading) to suggest a fix, sorry.
I hope a knowledgeable person will consider this nasty enough to deserve their 
attention.

Thanks in advance !

-Alex

[1] https://github.com/the-tcpdump-group/libpcap/issues/1105

_________________________________________________________________________________________________________________________

Ce message et ses pieces jointes peuvent contenir des informations confidentielles ou privilegiees et ne doivent donc
pas etre diffuses, exploites ou copies sans autorisation. Si vous avez recu ce message par erreur, veuillez le signaler
a l'expediteur et le detruire ainsi que les pieces jointes. Les messages electroniques etant susceptibles d'alteration,
Orange decline toute responsabilite si ce message a ete altere, deforme ou falsifie. Merci.

This message and its attachments may contain confidential or privileged information that may be protected by law;
they should not be distributed, used or copied without authorisation.
If you have received this email in error, please notify the sender and delete this message and its attachments.
As emails may be altered, Orange is not liable for messages that have been modified, changed or falsified.
Thank you.

^ permalink raw reply

* Re: Accessing XDP packet memory from the end
From: Toke Høiland-Jørgensen @ 2022-04-23 20:05 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: Larysa Zaremba, bpf, netdev, Andrii Nakryiko, Alexei Starovoitov,
	Daniel Borkmann, Jesper Dangaard Brouer, Magnus Karlsson,
	Maciej Fijalkowski, Alexander Lobakin
In-Reply-To: <20220422164137.875143-1-alexandr.lobakin@intel.com>

Alexander Lobakin <alexandr.lobakin@intel.com> writes:

> From: Toke Høiland-Jørgensen <toke@redhat.com>
> Date: Thu, 21 Apr 2022 19:17:11 +0200
>
>> Larysa Zaremba <larysa.zaremba@intel.com> writes:
>> 
>> > Dear all,
>> > Our team has encountered a need of accessing data_meta in a following way:
>> >
>> > int xdp_meta_prog(struct xdp_md *ctx)
>> > {
>> > 	void *data_meta_ptr = (void *)(long)ctx->data_meta;
>> > 	void *data_end = (void *)(long)ctx->data_end;
>> > 	void *data = (void *)(long)ctx->data;
>> > 	u64 data_size = sizeof(u32);
>> > 	u32 magic_meta;
>> > 	u8 offset;
>> >
>> > 	offset = (u8)((s64)data - (s64)data_meta_ptr);
>> > 	if (offset < data_size) {
>> > 		bpf_printk("invalid offset: %ld\n", offset);
>> > 		return XDP_DROP;
>> > 	}
>> >
>> > 	data_meta_ptr += offset;
>> > 	data_meta_ptr -= data_size;
>> >
>> > 	if (data_meta_ptr + data_size > data) {
>> > 		return XDP_DROP;
>> > 	}
>> > 		
>> > 	magic_meta = *((u32 *)data);
>> > 	bpf_printk("Magic: %d\n", magic_meta);
>> > 	return XDP_PASS;
>> > }
>> >
>> > Unfortunately, verifier claims this code attempts to access packet with
>> > an offset of -2 (a constant part) and negative offset is generally forbidden.
>> >
>> > For now we have 2 solutions, one is using bpf_xdp_adjust_meta(),
>> > which is pretty good, but not ideal for the hot path.
>> > The second one is the patch at the end.
>> >
>> > Do you see any other way of accessing memory from the end of data_meta/data?
>> > What do you think about both suggested solutions?
>> 
>> The problem is that the compiler is generating code that the verifier
>> doesn't understand. It's notoriously hard to get LLVM to produce code
>> that preserves the right bounds checks which is why projects like Cilium
>> use helpers with inline ASM to produce the right loads, like in [0].
>> 
>> Adapting that cilium helper to load from the metadata area, your example
>> can be rewritten as follows (which works just fine with no verifier
>> changes):
>> 
>> static __always_inline int
>> xdp_load_meta_bytes(const struct xdp_md *ctx, __u64 off, void *to, const __u64 len)
>> {
>> 	void *from;
>> 	int ret;
>> 	/* LLVM tends to generate code that verifier doesn't understand,
>> 	 * so force it the way we want it in order to open up a range
>> 	 * on the reg.
>> 	 */
>> 	asm volatile("r1 = *(u32 *)(%[ctx] +8)\n\t"
>> 		     "r2 = *(u32 *)(%[ctx] +0)\n\t"
>> 		     "%[off] &= %[offmax]\n\t"
>> 		     "r1 += %[off]\n\t"
>> 		     "%[from] = r1\n\t"
>> 		     "r1 += %[len]\n\t"
>> 		     "if r1 > r2 goto +2\n\t"
>> 		     "%[ret] = 0\n\t"
>> 		     "goto +1\n\t"
>> 		     "%[ret] = %[errno]\n\t"
>> 		     : [ret]"=r"(ret), [from]"=r"(from)
>> 		     : [ctx]"r"(ctx), [off]"r"(off), [len]"ri"(len),
>> 		       [offmax]"i"(__CTX_OFF_MAX), [errno]"i"(-EINVAL)
>> 		     : "r1", "r2");
>> 	if (!ret)
>> 		__builtin_memcpy(to, from, len);
>> 	return ret;
>> }
>> 
>> 
>> SEC("xdp")
>> int xdp_meta_prog(struct xdp_md *ctx)
>> {
>>         void *data_meta_ptr = (void *)(long)ctx->data_meta;
>>         void *data = (void *)(long)ctx->data;
>>         __u32 magic_meta;
>>         __u8 offset;
>> 	int ret;
>> 
>>         offset = (__u8)((__s64)data - (__s64)data_meta_ptr);
>> 	ret = xdp_load_meta_bytes(ctx, offset - 4, &magic_meta, sizeof(magic_meta));
>> 	if (ret) {
>> 		bpf_printk("load bytes failed: %d\n", ret);
>>                 return XDP_DROP;
>> 	}
>> 
>>         bpf_printk("Magic: %d\n", magic_meta);
>>         return XDP_PASS;
>> }
>
> At the moment, we use this (based on Cilium's and your), it works
> just like we want C code to work previously:
>
> #define __CTX_OFF_MAX 0xff
>
> static __always_inline void *
> can_i_access_meta_please(const struct xdp_md *ctx, __u64 off, const __u64 len)
> {
> 	void *ret;
>
> 	/* LLVM tends to generate code that verifier doesn't understand,
> 	 * so force it the way we want it in order to open up a range
> 	 * on the reg.
> 	 */
> 	asm volatile("r1 = *(u32 *)(%[ctx] +8)\n\t"
> 		     "r2 = *(u32 *)(%[ctx] +0)\n\t"
> 		     "%[off] &= %[offmax]\n\t"
> 		     "r1 += %[off]\n\t"
> 		     "%[ret] = r1\n\t"
> 		     "r1 += %[len]\n\t"
> 		     "if r1 > r2 goto +1\n\t"
> 		     "goto +1\n\t"
> 		     "%[ret] = %[null]\n\t"
> 		     : [ret]"=r"(ret)
> 		     : [ctx]"r"(ctx), [off]"r"(off), [len]"ri"(len),
> 		       [offmax]"i"(__CTX_OFF_MAX), [null]"i"(NULL)
> 		     : "r1", "r2");
>
> 	return ret;
> }
>
> SEC("xdp")
> int xdp_prognum_n0_meta(struct xdp_md *ctx)
> {
> 	void *data_meta = (void *)(__s64)ctx->data_meta;
> 	void *data = (void *)(__s64)ctx->data;
> 	struct xdp_meta_generic *md;
> 	__u64 offset;
>
> 	offset = (__u64)((__s64)data - (__s64)data_meta);
>
> 	md = can_i_access_meta_please(ctx, offset, sizeof(*md));
> 	if (__builtin_expect(!md, 0)) {
> 		bpf_printk("No you can't\n");
> 		return XDP_DROP;
> 	}
>
> 	bpf_printk("Magic: 0x%04x\n", md->magic_id);
> 	return XDP_PASS;
> }
>
> Thanks for the help!

Great! You're welcome! :)

> It's a shame LLVM still suck on generating correct object code from C.
> I guess we'll define a helper above in one of the headers to not
> copy-paste it back and forth between each program wanting to access
> only the generic part of the metadata (which is always being placed at
> the end).

Yeah, it would be nice if LLVM could just generate code that works, but
in the meantime we'll just have to define a helper. I suspect we'll need
to define some helper functions to work with xdp-hints style metadata
field anyway, so wrapping the reader into that somewhere would probably
make sense, no?

-Toke


^ permalink raw reply

* Re: [PATCH] net: linkwatch: ignore events for unregistered netdevs
From: Lukas Wunner @ 2022-04-23 19:35 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Oliver Neukum, David S. Miller, Jakub Kicinski, Jann Horn,
	Oleksij Rempel, Eric Dumazet, netdev, linux-usb, Andrew Lunn,
	Jacky Chou, Willy Tarreau, Lino Sanfilippo, Philipp Rosenberger,
	Heiner Kallweit, Greg Kroah-Hartman
In-Reply-To: <20220423160723.GA20330@wunner.de>

On Sat, Apr 23, 2022 at 06:07:23PM +0200, Lukas Wunner wrote:
> On Thu, Apr 21, 2022 at 10:02:43AM +0200, Paolo Abeni wrote:
> > On Sun, 2022-04-17 at 09:04 +0200, Lukas Wunner wrote:
> > > --- a/net/core/link_watch.c
> > > +++ b/net/core/link_watch.c
> > > @@ -107,7 +107,8 @@ static void linkwatch_add_event(struct net_device *dev)
> > >  	unsigned long flags;
> > >  
> > >  	spin_lock_irqsave(&lweventlist_lock, flags);
> > > -	if (list_empty(&dev->link_watch_list)) {
> > > +	if (list_empty(&dev->link_watch_list) &&
> > > +	    dev->reg_state < NETREG_UNREGISTERED) {
> > >  		list_add_tail(&dev->link_watch_list, &lweventlist);
> > >  		dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
> > >  	
> > 
> > What about testing dev->reg_state in linkwatch_fire_event() before
> > setting the __LINK_STATE_LINKWATCH_PENDING bit, so that we don't leave
> > the device in an unexpected state?

About __LINK_STATE_LINKWATCH_PENDING being set even though the netdev
is not on link_watch_list:

After this patch (which removes one user of __LINK_STATE_LINKWATCH_PENDING)
the only purpose of the flag is a small speed-up of linkwatch_fire_event():
If the netdev is already on link_watch_list, the function skips acquiring
lweventlist_lock.

I don't think this is a hotpath, so the small speed-up is probably not worth
it and the flag could be removed completely in a follow-up patch.

There is a single other (somewhat oddball) user of the flag in
bond_should_notify_peers() in drivers/net/bonding/bond_main.c.
It would be possible to replace it with "!list_empty(&dev->link_watch_list)".
I don't think acquiring lweventlist_lock is necessary for that because
test_bit() is unordered (per Documentation/atomic_bitops.txt) and the
check is racy anyway.

Thanks,

Lukas

^ permalink raw reply

* Re: [PATCH] dt-bindings: can: renesas,rcar-canfd: Document RZ/G2UL support
From: Krzysztof Kozlowski @ 2022-04-23 18:56 UTC (permalink / raw)
  To: Biju Das, Wolfgang Grandegger, Marc Kleine-Budde, David S. Miller,
	Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski
  Cc: Fabrizio Castro, linux-can, netdev, devicetree,
	Geert Uytterhoeven, Chris Paterson, Biju Das,
	Prabhakar Mahadev Lad, linux-renesas-soc
In-Reply-To: <20220423130743.123198-1-biju.das.jz@bp.renesas.com>

On 23/04/2022 15:07, Biju Das wrote:
> Add CANFD binding documentation for Renesas R9A07G043 (RZ/G2UL) SoC.
> 
> Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>


Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH net-next] 1588 support on bcm54210pe
From: Richard Cochran @ 2022-04-23 18:16 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: Andrew Lunn, Lasse Johnsen, netdev, Gordon Hollingworth,
	Ahmad Byagowi, Heiner Kallweit, Russell King,
	bcm-kernel-feedback-list
In-Reply-To: <01f35484-e8b6-d0bb-dba7-d1e0407c00ca@gmail.com>

On Sat, Apr 23, 2022 at 07:40:49AM -0700, Florian Fainelli wrote:
> I would prefer that we just stick to adding that code to bcm-phy-lib.[ch]
> which all Broadcom PHY drivers can use and we can decide whether we want to
> add a Kconfig option specifically for PTP.

Sounds good.

Thanks,
Richard

^ permalink raw reply

* Re: [PATCH net-next 4/5] net: dt-bindings: Introduce the Qualcomm IPQESS Ethernet controller
From: Krzysztof Kozlowski @ 2022-04-23 17:49 UTC (permalink / raw)
  To: Maxime Chevallier, davem, Rob Herring
  Cc: netdev, linux-kernel, devicetree, thomas.petazzoni, Andrew Lunn,
	Florian Fainelli, Heiner Kallweit, Russell King, linux-arm-kernel,
	Vladimir Oltean, Luka Perkov, Robert Marko
In-Reply-To: <20220422180305.301882-5-maxime.chevallier@bootlin.com>

On 22/04/2022 20:03, Maxime Chevallier wrote:
> Add the DT binding for the IPQESS Ethernet Controller. This is a simple
> controller, only requiring the phy-mode, interrupts, clocks, and
> possibly a MAC address setting.
> 
> Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
> ---
>  .../devicetree/bindings/net/qcom,ipqess.yaml  | 94 +++++++++++++++++++
>  1 file changed, 94 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/net/qcom,ipqess.yaml
> 
> diff --git a/Documentation/devicetree/bindings/net/qcom,ipqess.yaml b/Documentation/devicetree/bindings/net/qcom,ipqess.yaml
> new file mode 100644
> index 000000000000..8fec5633692f
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/qcom,ipqess.yaml
> @@ -0,0 +1,94 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/net/qcom,ipqess.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Qualcomm IPQ ESS EDMA Ethernet Controller Device Tree Bindings

s/Device Tree Bindings//

> +
> +allOf:
> +  - $ref: "ethernet-controller.yaml#"

allOf goes after maintainers.

> +
> +maintainers:
> +  - Maxime Chevallier <maxime.chevallier@bootlin.com>
> +
> +properties:
> +  compatible:
> +    const: qcom,ipq4019e-ess-edma
> +
> +  reg:
> +    maxItems: 1
> +
> +  interrupts:
> +    minItems: 2
> +    maxItems: 32
> +    description: One interrupt per tx and rx queue, with up to 16 queues.
> +
> +  clocks:
> +    maxItems: 1
> +
> +  phy-mode: true
> +
> +  fixed-link: true
> +
> +  mac-address: true

You don't need all these three. They come from ethernet-controller and
you use unevaluatedProperties.

> +
> +required:
> +  - compatible
> +  - reg
> +  - interrupts
> +  - clocks
> +  - phy-mode
> +
> +unevaluatedProperties: false
> +
> +examples:
> +  - |
> +    gmac: ethernet@c080000 {
> +        compatible = "qcom,ipq4019-ess-edma";
> +        reg = <0xc080000 0x8000>;
> +        interrupts = <GIC_SPI  65 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  66 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  67 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  68 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  69 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  70 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  71 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  72 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  73 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  74 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  75 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  76 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  77 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  78 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  79 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI  80 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 240 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 241 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 242 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 243 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 244 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 245 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 246 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 247 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 248 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 249 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 250 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 251 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 252 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 253 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 254 IRQ_TYPE_EDGE_RISING>,
> +                     <GIC_SPI 255 IRQ_TYPE_EDGE_RISING>;
> +
> +        status = "okay";

No status in the example.

> +
> +        phy-mode = "internal";
> +        fixed-link {
> +            speed = <1000>;
> +            full-duplex;
> +            pause;
> +            asym-pause;
> +        };
> +    };
> +
> +...


Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH net] net: sched: act_mirred: Reset ct info when mirror/redirect skb
From: Eyal Birger @ 2022-04-23 16:08 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: Hangbin Liu, netdev, jhs, xiyou.wangcong, jiri, davem, kuba,
	ahleihel, dcaratti, aconole, roid, Shmulik Ladkani
In-Reply-To: <CALnP8ZY9hkiWyxjrVTdq=NFA0PYjt7f9YbSEJrbt-EQoRAk6gw@mail.gmail.com>

On Fri, Apr 22, 2022 at 4:41 PM Marcelo Ricardo Leitner
<mleitner@redhat.com> wrote:
>
> On Thu, Apr 21, 2022 at 07:00:07PM +0800, Hangbin Liu wrote:
> > Hi Eyal,
> > On Tue, Apr 19, 2022 at 09:14:38PM +0300, Eyal Birger wrote:
> > > > > > On Mon,  9 Aug 2021 15:04:55 +0800 you wrote:
> > > > > > > When mirror/redirect a skb to a different port, the ct info should be reset
> > > > > > > for reclassification. Or the pkts will match unexpected rules. For example,
> > > > > > > with following topology and commands:
> > > > > > >
> > > > > > >     -----------
> > > > > > >               |
> > > > > > >        veth0 -+-------
> > > > > > >               |
> > > > > > >        veth1 -+-------
> > > > > > >               |
> > > > > > >
> > > > > > > [...]
> > > > > >
> > > > > > Here is the summary with links:
> > > > > >   - [net] net: sched: act_mirred: Reset ct info when mirror/redirect skb
> > > > > >     https://git.kernel.org/netdev/net/c/d09c548dbf3b
> > > > >
> > > > > Unfortunately this commit breaks DNAT when performed before going via mirred
> > > > > egress->ingress.
> > > > >
> > > > > The reason is that connection tracking is lost and therefore a new state
> > > > > is created on ingress.
> > > > >
> > > > > This breaks existing setups.
> > > > >
> > > > > See below a simplified script reproducing this issue.
> >
> > I think we come in to a paradox state. Some user don't want to have previous
> > ct info after mirror, while others would like to keep. In my understanding,
> > when we receive a pkt from a interface, the skb should be clean and no ct info
> > at first. But I may wrong.
>
> Makes sense to me. Moreover, there were a couple of fixes on this on
> mirred around that time frame/area (like f799ada6bf23 ("net: sched:
> act_mirred: drop dst for the direction from egress to ingress")). That's
> because we are seeing that mirred xmit action when switching to
> ingress direction should be as close skb_scrub_packet. OVS needs this
> scrubbing as well, btw. This ct information could be easily stale if
> there were other packet changes after it.

Makes sense to me too. The main reason for bringing this up was that it's a
subtle change and wasn't trivial to figure out.

>
> Point being, if we really need the knob for backwards compatibility
> here, it may have to be a broader one.

FWIW the dst change was ok in our setups.

>
> >
> > Jamal, Wang Cong, Jiri, do you have any comments?
> >
> > > >
> > > > I guess I can understand why the reproducer triggers it, but I fail to
> > > > see the actual use case you have behind it. Can you please elaborate
> > > > on it?
> > >
> > > One use case we use mirred egress->ingress redirect for is when we want to
> > > reroute a packet after applying some change to the packet which would affect
> > > its routing. for example consider a bpf program running on tc ingress (after
> > > mirred) setting the skb->mark based on some criteria.
> > >
> > > So you have something like:
> > >
> > > packet routed to dummy device based on some criteria ->
> > >   mirred redirect to ingress ->
> > >     classification by ebpf logic at tc ingress ->
> > >        packet routed again
> > >
> > > We have a setup where DNAT is performed before this flow in that case the
> > > ebpf logic needs to see the packet after the NAT.
> >
> > Is it possible to check whether it's need to set the skb->mark before DNAT?
> > So we can update it before egress and no need to re-route.

For future reference, we worked around this issue by moving some of the
relevant ebpf functionality to the lwt output hook which allows classification
and rerouting.

Eyal.

^ permalink raw reply

* Re: [PATCH] net: linkwatch: ignore events for unregistered netdevs
From: Lukas Wunner @ 2022-04-23 16:07 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Oliver Neukum, David S. Miller, Jakub Kicinski, Jann Horn,
	Oleksij Rempel, Eric Dumazet, netdev, linux-usb, Andrew Lunn,
	Jacky Chou, Willy Tarreau, Lino Sanfilippo, Philipp Rosenberger,
	Heiner Kallweit, Greg Kroah-Hartman
In-Reply-To: <9325d344e8a6b1a4720022697792a84e545fef62.camel@redhat.com>

On Thu, Apr 21, 2022 at 10:02:43AM +0200, Paolo Abeni wrote:
> On Sun, 2022-04-17 at 09:04 +0200, Lukas Wunner wrote:
> > --- a/net/core/link_watch.c
> > +++ b/net/core/link_watch.c
> > @@ -107,7 +107,8 @@ static void linkwatch_add_event(struct net_device *dev)
> >  	unsigned long flags;
> >  
> >  	spin_lock_irqsave(&lweventlist_lock, flags);
> > -	if (list_empty(&dev->link_watch_list)) {
> > +	if (list_empty(&dev->link_watch_list) &&
> > +	    dev->reg_state < NETREG_UNREGISTERED) {
> >  		list_add_tail(&dev->link_watch_list, &lweventlist);
> >  		dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
> >  	
> 
> What about testing dev->reg_state in linkwatch_fire_event() before
> setting the __LINK_STATE_LINKWATCH_PENDING bit, so that we don't leave
> the device in an unexpected state?

That would be racy because linkwatch_fire_event() may see a reg_state of
REGISTERED or UNREGISTERING and then add the device to link_watch_list,
even though reg_state may be changed to UNREGISTERED in-between.

That race is avoided by performing the reg_state check under
lweventlist_lock:

Scenario 1:

CPU 1:                                  CPU 2:
                                        linkwatch_add_event(dev);
dev->reg_state = NETREG_UNREGISTERED;
linkwatch_forget_dev(dev);

In this scenario, CPU 2 sees the old value of dev->reg_state and
adds the device to link_watch_list, but CPU 1 will subsequently
delete it from the list.

Scenario 2:

CPU 1:                                  CPU 2:
dev->reg_state = NETREG_UNREGISTERED;
linkwatch_forget_dev(dev);
                                        linkwatch_add_event(dev);

In this scenario, CPU 2 refrains from adding the device to
link_watch_list.  It is guaranteed to see the new reg_state
due to the memory barriers implied by lweventlist_lock,
which is taken both by linkwatch_forget_dev() and
linkwatch_add_event().

Note that an unregistered netdev has been stopped, so the portion
of linkwatch_do_dev() which is constrained to the netdev being IFF_UP
is skipped.  The only portion that's executed is rfc2863_policy(),
which updates the operstate.

I believe that operstate changes are irrelevant and unnecessary after
the netdev has been unregistered.

Same for the fact that __LINK_STATE_LINKWATCH_PENDING may be set even
though the netdev is not on link_watch_list.  That should be irrelevant
for an unregistered netdev.

> Other than that, it looks good to me, but potentially quite risky.

To mitigate risk I suggest letting the patch bake in linux-next
for a couple of weeks.

However I would then have to respin it because the declaration of
linkwatch_run_queue() was moved from include/linux/netdevice.h to
net/core/dev.h by Jakub's net-next commit 6264f58ca0e5 ("net:
extract a few internals from netdevice.h").

Let me know if you want me to respin the patch based on net-next.

> Looking at the original report it looks like the issue could be
> resolved with a more usb-specific change: e.g. it looks like
> usbnet_defer_kevent() is not acquiring a dev reference as it should.
> 
> Have you considered that path?

First of all, the diffstat of the patch shows this is an opportunity
to reduce LoC as well as simplify and speed up device teardown.

Second, the approach you're proposing won't work if a driver calls
netif_carrier_on/off() after unregister_netdev().

It seems prudent to prevent such a misbehavior in *any* driver,
not just usbnet.  usbnet may not be the only one doing it wrong.
Jann pointed out that there are more syzbot reports related
to a UAF in linkwatch:

https://lore.kernel.org/netdev/?q=__linkwatch_run_queue+syzbot

Third, I think an API which schedules work, invisibly to the driver,
is dangerous and misguided.  If it is illegal to call
netif_carrier_on/off() for an unregistered but not yet freed netdev,
catch that in core networking code and don't expect drivers to respect
a rule which isn't even documented.

Thanks,

Lukas

^ permalink raw reply

* Re: [Patch net-next] net: phy: LAN937x: add interrupt support for link detection
From: Andrew Lunn @ 2022-04-23 15:59 UTC (permalink / raw)
  To: Arun Ramadoss
  Cc: linux-kernel, netdev, Paolo Abeni, Jakub Kicinski,
	David S. Miller, Russell King, Heiner Kallweit, UNGLinuxDriver
In-Reply-To: <20220423154727.29052-1-arun.ramadoss@microchip.com>

On Sat, Apr 23, 2022 at 09:17:27PM +0530, Arun Ramadoss wrote:
> Added the config_intr and handle_interrupt for the LAN937x phy which is
> same as the LAN87xx phy.
> 
> Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

While looking at the code, i noticed LAN87XX has PHY_POLL_CABLE_TEST
where as LAN937X does not. Is this correct?

    Andrew

^ permalink raw reply

* Re: [PATCH net] virtio_net: fix wrong buf address calculation when using xdp
From: Nikolay Aleksandrov @ 2022-04-23 15:55 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: kuba, davem, stable, Jason Wang, Daniel Borkmann,
	Michael S. Tsirkin, virtualization, netdev
In-Reply-To: <1650726113.2334588-1-xuanzhuo@linux.alibaba.com>

On 23/04/2022 18:01, Xuan Zhuo wrote:
> On Sat, 23 Apr 2022 17:58:05 +0300, Nikolay Aleksandrov <razor@blackwall.org> wrote:
>> On 23/04/2022 17:36, Xuan Zhuo wrote:
>>> On Sat, 23 Apr 2022 17:30:11 +0300, Nikolay Aleksandrov <razor@blackwall.org> wrote:
>>>> On 23/04/2022 17:16, Nikolay Aleksandrov wrote:
>>>>> On 23/04/2022 16:31, Xuan Zhuo wrote:
>>>>>> On Sat, 23 Apr 2022 14:26:12 +0300, Nikolay Aleksandrov <razor@blackwall.org> wrote:
[snip]                                   metasize,
>>>>>> -                                                      VIRTIO_XDP_HEADROOM);
>>>>>> +                                                      VIRTIO_XDP_HEADROOM - metazie);
>>>>>>                                 return head_skb;
>>>>>>                         }
>>>>>>                         break;
>>>>>
>>>>> That patch doesn't fix it, as I said with xdp you can move both data and data_meta.
>>>>> So just doing that would take care of the meta, but won't take care of moving data.
>>>>>
>>>>
>>>> Also it doesn't take care of the case where page_to_skb() is called with the original page
>>>> i.e. when we already have headroom, so we hit the next/standard page_to_skb() call (xdp_page == page).
>>>
>>> Yes, you are right.
>>>
>>>>
>>>> The above change guarantees that buf and p will be in the same page
>>>
>>>
>>> How can this be guaranteed?
>>>
>>> 1. For example, we applied for a 32k buffer first, and took away 1500 + hdr_len
>>>    from the allocation.
>>> 2. set xdp
>>> 3. alloc for new buffer
>>>
>>
>> p = page_address(page) + offset;
>> buf = p & PAGE_MASK; // whatever page p lands in is where buf is set
>>
>> => p and buf are always in the same page, no?
> 
> I don't think it is, it's entirely possible to split on two pages.
> 

Ahhh, I completely misinterpreted page_address(). You're right.



^ permalink raw reply

* [Patch net-next] net: phy: LAN937x: add interrupt support for link detection
From: Arun Ramadoss @ 2022-04-23 15:47 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Paolo Abeni, Jakub Kicinski, David S. Miller, Russell King,
	Heiner Kallweit, Andrew Lunn, UNGLinuxDriver, Arun Ramadoss

Added the config_intr and handle_interrupt for the LAN937x phy which is
same as the LAN87xx phy.

Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
---
 drivers/net/phy/microchip_t1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
index 796fbcb7dafe..d4c93d59bc53 100644
--- a/drivers/net/phy/microchip_t1.c
+++ b/drivers/net/phy/microchip_t1.c
@@ -792,6 +792,8 @@ static struct phy_driver microchip_t1_phy_driver[] = {
 		.flags          = PHY_POLL_CABLE_TEST,
 		.features	= PHY_BASIC_T1_FEATURES,
 		.config_init	= lan87xx_config_init,
+		.config_intr    = lan87xx_phy_config_intr,
+		.handle_interrupt = lan87xx_handle_interrupt,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 		.config_aneg    = lan87xx_config_aneg,

base-commit: cfc1d91a7d78cf9de25b043d81efcc16966d55b3
-- 
2.33.0


^ permalink raw reply related

* Re: [PATCH 2/2] net: dsa: mv88e6xxx: Handle single-chip-address OF property
From: Andrew Lunn @ 2022-04-23 15:41 UTC (permalink / raw)
  To: Nathan Rossi
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	Vladimir Oltean, David S. Miller, Jakub Kicinski, Paolo Abeni
In-Reply-To: <CA+aJhH3EtAxAKy8orC-SU8UnagBCibF3dHXrp78zfjuAzj4vUg@mail.gmail.com>

On Sun, Apr 24, 2022 at 12:41:22AM +1000, Nathan Rossi wrote:
> On Sun, 24 Apr 2022 at 00:07, Andrew Lunn <andrew@lunn.ch> wrote:
> >
> > On Sat, Apr 23, 2022 at 01:14:27PM +0000, Nathan Rossi wrote:
> > > Handle the parsing and use of single chip addressing when the switch has
> > > the single-chip-address property defined. This allows for specifying the
> > > switch as using single chip addressing even when mdio address 0 is used
> > > by another device on the bus. This is a feature of some switches (e.g.
> > > the MV88E6341/MV88E6141) where the switch shares the bus only responding
> > > to the higher 16 addresses.
> >
> > Hi Nathan
> >
> > I think i'm missing something in this explanation:
> >
> > smi.c says:
> >
> > /* The switch ADDR[4:1] configuration pins define the chip SMI device address
> >  * (ADDR[0] is always zero, thus only even SMI addresses can be strapped).
> >  *
> >  * When ADDR is all zero, the chip uses Single-chip Addressing Mode, assuming it
> >  * is the only device connected to the SMI master. In this mode it responds to
> >  * all 32 possible SMI addresses, and thus maps directly the internal devices.
> >  *
> >  * When ADDR is non-zero, the chip uses Multi-chip Addressing Mode, allowing
> >  * multiple devices to share the SMI interface. In this mode it responds to only
> >  * 2 registers, used to indirectly access the internal SMI devices.
> >  *
> >  * Some chips use a different scheme: Only the ADDR4 pin is used for
> >  * configuration, and the device responds to 16 of the 32 SMI
> >  * addresses, allowing two to coexist on the same SMI interface.
> >  */
> >
> > So if ADDR = 0, it takes up the whole bus. And in this case reg = 0.
> > If ADDR != 0, it is in multi chip mode, and DT reg = ADDR.
> >
> > int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
> >                        struct mii_bus *bus, int sw_addr)
> > {
> >         if (chip->info->dual_chip)
> >                 chip->smi_ops = &mv88e6xxx_smi_dual_direct_ops;
> >         else if (sw_addr == 0)
> >                 chip->smi_ops = &mv88e6xxx_smi_direct_ops;
> >         else if (chip->info->multi_chip)
> >                 chip->smi_ops = &mv88e6xxx_smi_indirect_ops;
> >         else
> >                 return -EINVAL;
> >
> > This seems to implement what is above. smi_direct_ops == whole bus,
> > smi_indirect_ops == multi-chip mode.
> >
> > In what situation do you see this not working? What device are you
> > using, what does you DT look like, and what at the ADDR value?
> 
> The device I am using is the MV88E6141, it follows the second scheme
> such that it only responds to the upper 16 of the 32 SMI addresses in
> single chip addressing mode. I am able to define the switch at address
> 0, and everything works. However in the device I am using (Netgate
> SG-3100) the ethernet phys for the non switch ethernet interfaces are
> also on the same mdio bus as the switch. One of those phys is
> configured with address 0. Defining both the ethernet-phy and switch
> as address 0 does not work.
> 
> The device tree I have looks like:
> 
> &mdio {
>     status = "okay";
>     pinctrl-0 = <&mdio_pins>;
>     pinctrl-names = "default";
> 
>     phy0: ethernet-phy@0 {
>         status = "okay";
>         reg = <0>;
>     };
> 
>     phy1: ethernet-phy@1 {
>         status = "okay";
>         reg = <1>;
>     };

So normally, we would have


    switch0: switch0@16 {
        compatible = "marvell,mv88e6141", "marvell,mv88e6085";
        single-chip-address;
        reg = <0>;
        dsa,member = <0 0>;
        status = "okay";

and then i guess you are seeing mdiobus_register_device() returning
-EBUSY because the PHY is also at address 0?

This is what is missing from your explanation. It is always better to
have more than less in the commit message.

So the chip is using addresses 0x10-0x1f, but in order to probe, you
need to put reg = 0, taking up slot 0, clashing with the PHY. Ideally
we want to take up one of the slots in the range 0x10-0x1f. reg=16 on
its own indicates multi-chip mode and the device is using address 16.

O.K, a bit more digging into the datasheet:

For multi-chip mode, for the 6341 family,

The SMI address that is used is determined by the ADDR[3:0]
configuration pins. ADDR[4] must be zero to select the device.

So it can only take the address range 0-f, since ADDR[4] == 0.  So 16
is not even a valid multi-chip address. But it is valid for some other
chips.

So your DT property is says, ignore reg, i really am in single chip
mode.

This appears to be a general problem for any device with
.port_base_addr = 0x10.

I'm wondering if a better solution to this is special case
reg=16. First try mv88e6xxx_detect() in single chip mode. That will
read register 3. A read should be safe. If we get back a valid ID for
a switch, keep with single chip mode. Otherwise swap to multi-chip
mode. A multi-chip mv88e6xxx_detect() is more dangerous, because that
involves writes.

Looking at the existing DTs, there are only two using multi-chip mode
with reg=16:

arm/boot/dts/armada-370-rd.dts-		reg = <0x10>;
arm/boot/dts/kirkwood-linksys-viper.dts-		reg = <16>;

And i happen to have an armada-370-rd :-)

    Andrew

^ permalink raw reply

* [PATCH memcg v2] net: set proper memcg for net_init hooks allocations
From: Vasily Averin @ 2022-04-23 15:38 UTC (permalink / raw)
  To: Vlastimil Babka, Shakeel Butt
  Cc: kernel, Florian Westphal, linux-kernel, Roman Gushchin,
	Michal Hocko, cgroups, netdev, David S. Miller, Jakub Kicinski,
	Paolo Abeni
In-Reply-To: <202204231806.8O86U791-lkp@intel.com>

__register_pernet_operations() executes init hook of registered
pernet_operation structure in all existing net namespaces.

Typically, these hooks are called by a process associated with
the specified net namespace, and all __GFP_ACCOUNTING marked
allocation are accounted for corresponding container/memcg.

However __register_pernet_operations() calls the hooks in the same
context, and as a result all marked allocations are accounted
to one memcg for all processed net namespaces.

This patch adjusts active memcg for each net namespace and helps
to account memory allocated inside ops_init() into the proper memcg.

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
v2: introduced get/put_net_memcg(),
    new functions are moved under CONFIG_MEMCG_KMEM
    to fix compilation issues reported by Intel's kernel test robot

v1: introduced get_mem_cgroup_from_kmem(), which takes the refcount
    for the found memcg, suggested by Shakeel
---
 include/linux/memcontrol.h | 35 +++++++++++++++++++++++++++++++++++
 net/core/net_namespace.c   |  7 +++++++
 2 files changed, 42 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0abbd685703b..5230d3c5585a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1714,6 +1714,33 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 
 struct mem_cgroup *mem_cgroup_from_obj(void *p);
 
+static inline struct mem_cgroup *get_mem_cgroup_from_kmem(void *p)
+{
+	struct mem_cgroup *memcg;
+
+	rcu_read_lock();
+	do {
+		memcg = mem_cgroup_from_obj(p);
+	} while (memcg && !css_tryget(&memcg->css));
+	rcu_read_unlock();
+	return memcg;
+}
+
+static inline struct mem_cgroup *get_net_memcg(void *p)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = get_mem_cgroup_from_kmem(p);
+
+	if (!memcg)
+		memcg = root_mem_cgroup;
+
+	return memcg;
+}
+static inline void put_net_memcg(struct mem_cgroup *memcg)
+{
+	css_put(&memcg->css);
+}
 #else
 static inline bool mem_cgroup_kmem_disabled(void)
 {
@@ -1766,6 +1793,14 @@ static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
        return NULL;
 }
 
+static inline struct mem_cgroup *get_net_memcg(void *p)
+{
+	return NULL;
+}
+
+static inline void put_net_memcg(struct mem_cgroup *memcg)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a5b5bb99c644..bf88360b8377 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -26,6 +26,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
+#include <linux/sched/mm.h>
 /*
  *	Our network namespace constructor/destructor lists
  */
@@ -1147,7 +1148,13 @@ static int __register_pernet_operations(struct list_head *list,
 		 * setup_net() and cleanup_net() are not possible.
 		 */
 		for_each_net(net) {
+			struct mem_cgroup *old, *memcg;
+
+			memcg = get_net_memcg(net);
+			old = set_active_memcg(memcg);
 			error = ops_init(ops, net);
+			set_active_memcg(old);
+			put_net_memcg(memcg);
 			if (error)
 				goto out_undo;
 			list_add_tail(&net->exit_list, &net_exit_list);
-- 
2.31.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox