Netdev List
 help / color / mirror / Atom feed
* [PATCH 3/8] can: flexcan: add CAN FD mode support
From: Joakim Zhang @ 2019-07-12  8:02 UTC (permalink / raw)
  To: mkl@pengutronix.de, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, dl-linux-imx, netdev@vger.kernel.org,
	Joakim Zhang
In-Reply-To: <20190712075926.7357-1-qiangqing.zhang@nxp.com>

This patch intends to add CAN FD mode support in driver, it means that
payload size can extend up to 64 bytes.

Bit timing always set in CBT register other than CTRL1 register when CANFD
supports BRS, it will extend the range of all CAN bit timing variables
(PRESDIV, PROPSEG, PSEG1, PSEG2 and RJW), which will improve the bit
timing accuracy.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/net/can/flexcan.c | 255 +++++++++++++++++++++++++++++++++-----
 1 file changed, 225 insertions(+), 30 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 5b0a159daa38..23e9407e33ff 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -52,6 +52,7 @@
 #define FLEXCAN_MCR_IRMQ		BIT(16)
 #define FLEXCAN_MCR_LPRIO_EN		BIT(13)
 #define FLEXCAN_MCR_AEN			BIT(12)
+#define FLEXCAN_MCR_FDEN		BIT(11)
 /* MCR_MAXMB: maximum used MBs is MAXMB + 1 */
 #define FLEXCAN_MCR_MAXMB(x)		((x) & 0x7f)
 #define FLEXCAN_MCR_IDAM_A		(0x0 << 8)
@@ -137,6 +138,26 @@
 	 FLEXCAN_ESR_BOFF_INT | FLEXCAN_ESR_ERR_INT | \
 	 FLEXCAN_ESR_WAK_INT)
 
+/* FLEXCAN Bit Timing register (CBT) bits */
+#define FLEXCAN_CBT_BTF		        BIT(31)
+#define FLEXCAN_CBT_EPRESDIV(x)	        (((x) & 0x3ff) << 21)
+#define FLEXCAN_CBT_ERJW(x)		(((x) & 0x1f) << 16)
+#define FLEXCAN_CBT_EPROPSEG(x)	        (((x) & 0x3f) << 10)
+#define FLEXCAN_CBT_EPSEG1(x)		(((x) & 0x1f) << 5)
+#define FLEXCAN_CBT_EPSEG2(x)		((x) & 0x1f)
+
+/* FLEXCAN FD control register (FDCTRL) bits */
+#define FLEXCAN_FDCTRL_FDRATE		BIT(31)
+#define FLEXCAN_FDCTRL_MBDSR1(x)	(((x) & 0x3) << 19)
+#define FLEXCAN_FDCTRL_MBDSR0(x)	(((x) & 0x3) << 16)
+
+/* FLEXCAN FD Bit Timing register (FDCBT) bits */
+#define FLEXCAN_FDCBT_FPRESDIV(x)	(((x) & 0x3ff) << 20)
+#define FLEXCAN_FDCBT_FRJW(x)		(((x) & 0x07) << 16)
+#define FLEXCAN_FDCBT_FPROPSEG(x)	(((x) & 0x1f) << 10)
+#define FLEXCAN_FDCBT_FPSEG1(x)		(((x) & 0x07) << 5)
+#define FLEXCAN_FDCBT_FPSEG2(x)		((x) & 0x07)
+
 /* FLEXCAN interrupt flag register (IFLAG) bits */
 /* Errata ERR005829 step7: Reserve first valid MB */
 #define FLEXCAN_TX_MB_RESERVED_OFF_FIFO		8
@@ -148,6 +169,10 @@
 #define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE	BIT(5)
 
 /* FLEXCAN message buffers */
+#define FLEXCAN_MB_CNT_EDL		BIT(31)
+#define FLEXCAN_MB_CNT_BRS		BIT(30)
+#define FLEXCAN_MB_CNT_ESI		BIT(29)
+
 #define FLEXCAN_MB_CODE_MASK		(0xf << 24)
 #define FLEXCAN_MB_CODE_RX_BUSY_BIT	(0x1 << 24)
 #define FLEXCAN_MB_CODE_RX_INACTIVE	(0x0 << 24)
@@ -192,6 +217,7 @@
 #define FLEXCAN_QUIRK_BROKEN_PERR_STATE	BIT(6) /* No interrupt for error passive */
 #define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN	BIT(7) /* default to BE register access */
 #define FLEXCAN_QUIRK_SETUP_STOP_MODE		BIT(8) /* Setup stop mode to support wakeup */
+#define FLEXCAN_QUIRK_TIMESTAMP_SUPPORT_FD	BIT(9) /* Use timestamp then support can fd mode */
 
 /* Structure of the message buffer */
 struct flexcan_mb {
@@ -225,7 +251,8 @@ struct flexcan_regs {
 	u32 crcr;		/* 0x44 */
 	u32 rxfgmask;		/* 0x48 */
 	u32 rxfir;		/* 0x4c */
-	u32 _reserved3[12];	/* 0x50 */
+	u32 cbt;                /* 0x50 */
+	u32 _reserved3[11];     /* 0x54 */
 	u8 mb[2][512];		/* 0x80 */
 	/* FIFO-mode:
 	 *			MB
@@ -250,6 +277,9 @@ struct flexcan_regs {
 	u32 rerrdr;		/* 0xaf4 */
 	u32 rerrsynr;		/* 0xaf8 */
 	u32 errsr;		/* 0xafc */
+	u32 _reserved7[64];     /* 0xb00 */
+	u32 fdctrl;             /* 0xc00 */
+	u32 fdcbt;              /* 0xc04 */
 };
 
 struct flexcan_devtype_data {
@@ -337,6 +367,30 @@ static const struct can_bittiming_const flexcan_bittiming_const = {
 	.brp_inc = 1,
 };
 
+static const struct can_bittiming_const flexcan_fd_bittiming_const = {
+	.name = DRV_NAME,
+	.tseg1_min = 2,
+	.tseg1_max = 96,
+	.tseg2_min = 2,
+	.tseg2_max = 32,
+	.sjw_max = 16,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
+static const struct can_bittiming_const flexcan_fd_data_bittiming_const = {
+	.name = DRV_NAME,
+	.tseg1_min = 2,
+	.tseg1_max = 39,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
 /* FlexCAN module is essentially modelled as a little-endian IP in most
  * SoCs, i.e the registers as well as the message buffer areas are
  * implemented in a little-endian fashion.
@@ -612,7 +666,7 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 	u32 can_id;
 	u32 data;
-	u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | (cfd->len << 16);
+	u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_len2dlc(cfd->len)) << 16);
 	int i;
 
 	if (can_dropped_invalid_skb(dev, skb))
@@ -630,6 +684,9 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de
 	if (cfd->can_id & CAN_RTR_FLAG)
 		ctrl |= FLEXCAN_MB_CNT_RTR;
 
+	if (can_is_canfd_skb(skb))
+		ctrl |= FLEXCAN_MB_CNT_EDL;
+
 	for (i = 0; i < cfd->len; i += sizeof(u32)) {
 		data = be32_to_cpup((__be32 *)&cfd->data[i]);
 		priv->write(data, &priv->tx_mb->data[i / sizeof(u32)]);
@@ -828,8 +885,14 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload, bool dr
 		reg_ctrl = priv->read(&mb->can_ctrl);
 	}
 
-	if (!drop)
-		*skb = alloc_can_skb(offload->dev, (struct can_frame **)&cfd);
+
+	if (!drop) {
+		if (reg_ctrl & FLEXCAN_MB_CNT_EDL)
+			*skb = alloc_canfd_skb(offload->dev, &cfd);
+		else
+			*skb = alloc_can_skb(offload->dev,
+					     (struct can_frame **)&cfd);
+	}
 
 	if (*skb && cfd) {
 		/* increase timstamp to full 32 bit */
@@ -841,9 +904,20 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload, bool dr
 		else
 			cfd->can_id = (reg_id >> 18) & CAN_SFF_MASK;
 
-		if (reg_ctrl & FLEXCAN_MB_CNT_RTR)
-			cfd->can_id |= CAN_RTR_FLAG;
-		cfd->len = get_can_dlc((reg_ctrl >> 16) & 0x0F);
+
+		if (reg_ctrl & FLEXCAN_MB_CNT_EDL) {
+			cfd->len = can_dlc2len(get_canfd_dlc((reg_ctrl >> 16) & 0x0F));
+		} else {
+			cfd->len = get_can_dlc((reg_ctrl >> 16) & 0x0F);
+
+			if (reg_ctrl & FLEXCAN_MB_CNT_RTR)
+				cfd->can_id |= CAN_RTR_FLAG;
+		}
+
+		if (reg_ctrl & FLEXCAN_MB_CNT_ESI) {
+			cfd->flags |= CANFD_ESI;
+			netdev_warn(priv->can.dev, "ESI Error\n");
+		}
 
 		for (i = 0; i < cfd->len; i += sizeof(u32)) {
 			__be32 data = cpu_to_be32(priv->read(&mb->data[i / sizeof(u32)]));
@@ -989,27 +1063,14 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 
 static void flexcan_set_bittiming(struct net_device *dev)
 {
-	const struct flexcan_priv *priv = netdev_priv(dev);
-	const struct can_bittiming *bt = &priv->can.bittiming;
+	struct flexcan_priv *priv = netdev_priv(dev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	struct can_bittiming *dbt = &priv->can.data_bittiming;
 	struct flexcan_regs __iomem *regs = priv->regs;
-	u32 reg;
+	u32 reg, reg_cbt, reg_fdcbt;
 
 	reg = priv->read(&regs->ctrl);
-	reg &= ~(FLEXCAN_CTRL_PRESDIV(0xff) |
-		 FLEXCAN_CTRL_RJW(0x3) |
-		 FLEXCAN_CTRL_PSEG1(0x7) |
-		 FLEXCAN_CTRL_PSEG2(0x7) |
-		 FLEXCAN_CTRL_PROPSEG(0x7) |
-		 FLEXCAN_CTRL_LPB |
-		 FLEXCAN_CTRL_SMP |
-		 FLEXCAN_CTRL_LOM);
-
-	reg |= FLEXCAN_CTRL_PRESDIV(bt->brp - 1) |
-		FLEXCAN_CTRL_PSEG1(bt->phase_seg1 - 1) |
-		FLEXCAN_CTRL_PSEG2(bt->phase_seg2 - 1) |
-		FLEXCAN_CTRL_RJW(bt->sjw - 1) |
-		FLEXCAN_CTRL_PROPSEG(bt->prop_seg - 1);
-
+	reg &= ~(FLEXCAN_CTRL_LPB | FLEXCAN_CTRL_SMP | FLEXCAN_CTRL_LOM);
 	if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
 		reg |= FLEXCAN_CTRL_LPB;
 	if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
@@ -1020,9 +1081,102 @@ static void flexcan_set_bittiming(struct net_device *dev)
 	netdev_dbg(dev, "writing ctrl=0x%08x\n", reg);
 	priv->write(reg, &regs->ctrl);
 
-	/* print chip status */
-	netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
-		   priv->read(&regs->mcr), priv->read(&regs->ctrl));
+	if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
+		reg_cbt = priv->read(&regs->cbt);
+		reg_cbt &= ~(FLEXCAN_CBT_EPRESDIV(0x3ff) |
+			     FLEXCAN_CBT_EPSEG1(0x1f) |
+			     FLEXCAN_CBT_EPSEG2(0x1f) |
+			     FLEXCAN_CBT_ERJW(0x1f) |
+			     FLEXCAN_CBT_EPROPSEG(0x3f) |
+			     FLEXCAN_CBT_BTF);
+
+		/* CBT[EPSEG1] is 5 bit long and CBT[EPROPSEG] is 6 bit long.
+		 * The can_calc_bittiming tries to divide the tseg1 equally
+		 * between phase_seg1 and prop_seg, which may not fit in CBT
+		 * register. Therefore, if phase_seg1 is more than possible
+		 * value, increase prop_seg and decrease phase_seg1
+		 */
+		if (bt->phase_seg1 > 0x20) {
+			bt->prop_seg += (bt->phase_seg1 - 0x20);
+			bt->phase_seg1 = 0x20;
+		}
+
+		reg_cbt = FLEXCAN_CBT_EPRESDIV(bt->brp - 1) |
+				FLEXCAN_CBT_EPSEG1(bt->phase_seg1 - 1) |
+				FLEXCAN_CBT_EPSEG2(bt->phase_seg2 - 1) |
+				FLEXCAN_CBT_ERJW(bt->sjw - 1) |
+				FLEXCAN_CBT_EPROPSEG(bt->prop_seg - 1) |
+				FLEXCAN_CBT_BTF;
+		priv->write(reg_cbt, &regs->cbt);
+
+		netdev_dbg(dev, "bt: prediv %d seg1 %d seg2 %d rjw %d propseg %d\n",
+			   bt->brp - 1, bt->phase_seg1 - 1, bt->phase_seg2 - 1,
+			   bt->sjw - 1, bt->prop_seg - 1);
+
+		if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+			reg_fdcbt = priv->read(&regs->fdcbt);
+			reg_fdcbt &= ~(FLEXCAN_FDCBT_FPRESDIV(0x3ff) |
+				       FLEXCAN_FDCBT_FPSEG1(0x07) |
+				       FLEXCAN_FDCBT_FPSEG2(0x07) |
+				       FLEXCAN_FDCBT_FRJW(0x07) |
+				       FLEXCAN_FDCBT_FPROPSEG(0x1f));
+
+			/* FDCBT[FPSEG1] is 3 bit long and FDCBT[FPROPSEG] is 5 bit long.
+			 * The can_calc_bittiming tries to divide the tseg1 equally
+			 * between phase_seg1 and prop_seg, which may not fit in FDCBT
+			 * register. Therefore, if phase_seg1 is more than possible
+			 * value, increase prop_seg and decrease phase_seg1
+			 */
+			if (dbt->phase_seg1 > 0x8) {
+				dbt->prop_seg += (dbt->phase_seg1 - 0x8);
+				dbt->phase_seg1 = 0x8;
+			}
+
+			reg_fdcbt = FLEXCAN_FDCBT_FPRESDIV(dbt->brp - 1) |
+					FLEXCAN_FDCBT_FPSEG1(dbt->phase_seg1 - 1) |
+					FLEXCAN_FDCBT_FPSEG2(dbt->phase_seg2 - 1) |
+					FLEXCAN_FDCBT_FRJW(dbt->sjw - 1) |
+					FLEXCAN_FDCBT_FPROPSEG(dbt->prop_seg);
+			priv->write(reg_fdcbt, &regs->fdcbt);
+
+			if (bt->brp != dbt->brp)
+				netdev_warn(dev, "Warning!! data brp = %d and brp = %d don't match.\n"
+					    "flexcan may not work. consider using different bitrate or data bitrate\n",
+					    dbt->brp, bt->brp);
+
+			netdev_dbg(dev, "fdbt: prediv %d seg1 %d seg2 %d rjw %d propseg %d\n",
+				   dbt->brp - 1, dbt->phase_seg1 - 1, dbt->phase_seg2 - 1,
+				   dbt->sjw - 1, dbt->prop_seg);
+
+			netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x cbt=0x%08x fdcbt=0x%08x\n",
+				   __func__, priv->read(&regs->mcr),
+				   priv->read(&regs->ctrl),
+				   priv->read(&regs->cbt),
+				   priv->read(&regs->fdcbt));
+		}
+	} else {
+		reg = priv->read(&regs->ctrl);
+		reg &= ~(FLEXCAN_CTRL_PRESDIV(0xff) |
+			 FLEXCAN_CTRL_RJW(0x3) |
+			 FLEXCAN_CTRL_PSEG1(0x7) |
+			 FLEXCAN_CTRL_PSEG2(0x7) |
+			 FLEXCAN_CTRL_PROPSEG(0x7));
+
+		reg |= FLEXCAN_CTRL_PRESDIV(bt->brp - 1) |
+			FLEXCAN_CTRL_PSEG1(bt->phase_seg1 - 1) |
+			FLEXCAN_CTRL_PSEG2(bt->phase_seg2 - 1) |
+			FLEXCAN_CTRL_RJW(bt->sjw - 1) |
+			FLEXCAN_CTRL_PROPSEG(bt->prop_seg - 1);
+		priv->write(reg, &regs->ctrl);
+
+		netdev_dbg(dev, "bt: prediv %d seg1 %d seg2 %d rjw %d propseg %d\n",
+			   bt->brp - 1, bt->phase_seg1 - 1, bt->phase_seg2 - 1,
+			   bt->sjw - 1, bt->prop_seg - 1);
+
+		/* print chip status */
+		netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
+			   priv->read(&regs->mcr), priv->read(&regs->ctrl));
+	}
 }
 
 /* flexcan_chip_start
@@ -1034,7 +1188,7 @@ static int flexcan_chip_start(struct net_device *dev)
 {
 	struct flexcan_priv *priv = netdev_priv(dev);
 	struct flexcan_regs __iomem *regs = priv->regs;
-	u32 reg_mcr, reg_ctrl, reg_ctrl2, reg_mecr;
+	u32 reg_mcr, reg_ctrl, reg_ctrl2, reg_mecr, reg_fdctrl;
 	u64 reg_imask;
 	int err, i;
 	struct flexcan_mb __iomem *mb;
@@ -1131,6 +1285,26 @@ static int flexcan_chip_start(struct net_device *dev)
 	netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
 	priv->write(reg_ctrl, &regs->ctrl);
 
+	/* FDCTRL */
+	if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
+		reg_fdctrl = priv->read(&regs->fdctrl) & ~FLEXCAN_FDCTRL_FDRATE;
+		reg_fdctrl &= ~(FLEXCAN_FDCTRL_MBDSR1(0x3) | FLEXCAN_FDCTRL_MBDSR0(0x3));
+		reg_mcr = priv->read(&regs->mcr) & ~FLEXCAN_MCR_FDEN;
+
+		/* support BRS when set CAN FD mode
+		 * 64 bytes payload per MB and 7 MBs per RAM block by default
+		 * enable CAN FD mode
+		 */
+		if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+			reg_fdctrl |= FLEXCAN_FDCTRL_FDRATE;
+			reg_fdctrl |= FLEXCAN_FDCTRL_MBDSR1(0x3) | FLEXCAN_FDCTRL_MBDSR0(0x3);
+			reg_mcr |= FLEXCAN_MCR_FDEN;
+		}
+
+		priv->write(reg_fdctrl, &regs->fdctrl);
+		priv->write(reg_mcr, &regs->mcr);
+	}
+
 	if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
 		reg_ctrl2 = priv->read(&regs->ctrl2);
 		reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS;
@@ -1255,6 +1429,12 @@ static int flexcan_open(struct net_device *dev)
 	struct flexcan_priv *priv = netdev_priv(dev);
 	int err;
 
+	if ((priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) &&
+	    (priv->can.ctrlmode & CAN_CTRLMODE_FD)) {
+		netdev_err(dev, "three samples mode and fd mode can't be used together\n");
+		return -EINVAL;
+	}
+
 	err = pm_runtime_get_sync(priv->dev);
 	if (err < 0)
 		return err;
@@ -1267,7 +1447,10 @@ static int flexcan_open(struct net_device *dev)
 	if (err)
 		goto out_close;
 
-	priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
+	if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
+		priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN;
+	else
+		priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
 	priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
 			 (sizeof(priv->regs->mb[1]) / priv->mb_size);
 
@@ -1607,6 +1790,18 @@ static int flexcan_probe(struct platform_device *pdev)
 	priv->devtype_data = devtype_data;
 	priv->reg_xceiver = reg_xceiver;
 
+	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_TIMESTAMP_SUPPORT_FD) {
+		if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+			priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD;
+			priv->can.bittiming_const = &flexcan_fd_bittiming_const;
+			priv->can.data_bittiming_const = &flexcan_fd_data_bittiming_const;
+		} else {
+			dev_err(&pdev->dev, "can fd mode can't work on fifo mode\n");
+			err = -EINVAL;
+			goto failed_register;
+		}
+	}
+
 	pm_runtime_get_noresume(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
-- 
2.17.1


^ permalink raw reply related

* [PATCH 6/8] can: flexcan: add Transceiver Delay Compensation suopport
From: Joakim Zhang @ 2019-07-12  8:02 UTC (permalink / raw)
  To: mkl@pengutronix.de, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, dl-linux-imx, netdev@vger.kernel.org,
	Joakim Zhang
In-Reply-To: <20190712075926.7357-1-qiangqing.zhang@nxp.com>

The CAN FD protocol allows the transmission and reception of data at a higher
bit rate than the nominal rate used in the arbitration phase when the message's
BRS bit is set.

The TDC mechanism is effective only during the data phase of FD frames
having BRS bit set. It has no effect either on non-FD frames, or on FD
frames transmitted at normal bit rate.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/net/can/flexcan.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index daf4f0e88224..2c48151e431f 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -149,8 +149,10 @@
 
 /* FLEXCAN FD control register (FDCTRL) bits */
 #define FLEXCAN_FDCTRL_FDRATE		BIT(31)
+#define FLEXCAN_FDCTRL_TDCEN		BIT(15)
 #define FLEXCAN_FDCTRL_MBDSR1(x)	(((x) & 0x3) << 19)
 #define FLEXCAN_FDCTRL_MBDSR0(x)	(((x) & 0x3) << 16)
+#define FLEXCAN_FDCTRL_TDCOFF(x)	(((x) & 0x1f) << 8)
 
 /* FLEXCAN FD Bit Timing register (FDCBT) bits */
 #define FLEXCAN_FDCBT_FPRESDIV(x)	(((x) & 0x3ff) << 20)
@@ -1075,7 +1077,7 @@ static void flexcan_set_bittiming(struct net_device *dev)
 	struct can_bittiming *bt = &priv->can.bittiming;
 	struct can_bittiming *dbt = &priv->can.data_bittiming;
 	struct flexcan_regs __iomem *regs = priv->regs;
-	u32 reg, reg_cbt, reg_fdcbt;
+	u32 reg, reg_cbt, reg_fdcbt, reg_fdctrl;
 
 	reg = priv->read(&regs->ctrl);
 	reg &= ~(FLEXCAN_CTRL_LPB | FLEXCAN_CTRL_SMP | FLEXCAN_CTRL_LOM);
@@ -1147,6 +1149,19 @@ static void flexcan_set_bittiming(struct net_device *dev)
 					FLEXCAN_FDCBT_FPROPSEG(dbt->prop_seg);
 			priv->write(reg_fdcbt, &regs->fdcbt);
 
+			/* enable transceiver delay compensation(TDC) for fd frame.
+			 * TDC must be disabled when Loop Back mode is enabled.
+			 */
+			reg_fdctrl = priv->read(&regs->fdctrl);
+			if (!(reg & FLEXCAN_CTRL_LPB)) {
+				reg_fdctrl |= FLEXCAN_FDCTRL_TDCEN;
+				reg_fdctrl &= ~FLEXCAN_FDCTRL_TDCOFF(0x1f);
+				/* for the TDC to work reliably, the offset has to use optimal settings */
+				reg_fdctrl |= FLEXCAN_FDCTRL_TDCOFF(((dbt->phase_seg1 - 1) + dbt->prop_seg + 2) *
+								    ((dbt->brp -1) + 1));
+			}
+			priv->write(reg_fdctrl, &regs->fdctrl);
+
 			if (bt->brp != dbt->brp)
 				netdev_warn(dev, "Warning!! data brp = %d and brp = %d don't match.\n"
 					    "flexcan may not work. consider using different bitrate or data bitrate\n",
@@ -1296,6 +1311,7 @@ static int flexcan_chip_start(struct net_device *dev)
 	/* FDCTRL */
 	if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
 		reg_fdctrl = priv->read(&regs->fdctrl) & ~FLEXCAN_FDCTRL_FDRATE;
+		reg_fdctrl &= ~FLEXCAN_FDCTRL_TDCEN;
 		reg_fdctrl &= ~(FLEXCAN_FDCTRL_MBDSR1(0x3) | FLEXCAN_FDCTRL_MBDSR0(0x3));
 		reg_mcr = priv->read(&regs->mcr) & ~FLEXCAN_MCR_FDEN;
 		reg_ctrl2 = priv->read(&regs->ctrl2) & ~FLEXCAN_CTRL2_ISOCANFDEN;
-- 
2.17.1


^ permalink raw reply related

* [PATCH 7/8] can: flexcan: add imx8qm support
From: Joakim Zhang @ 2019-07-12  8:02 UTC (permalink / raw)
  To: mkl@pengutronix.de, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, dl-linux-imx, netdev@vger.kernel.org,
	Joakim Zhang
In-Reply-To: <20190712075926.7357-1-qiangqing.zhang@nxp.com>

The Flexcan on i.MX8QM supports CAN FD protocol.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/net/can/flexcan.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 2c48151e431f..f1fdaae52ef4 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -346,6 +346,12 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
 		FLEXCAN_QUIRK_SETUP_STOP_MODE,
 };
 
+static struct flexcan_devtype_data fsl_imx8qm_devtype_data = {
+	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+		FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+		FLEXCAN_QUIRK_TIMESTAMP_SUPPORT_FD,
+};
+
 static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
 	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
 		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
@@ -1703,6 +1709,7 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
 }
 
 static const struct of_device_id flexcan_of_match[] = {
+	{ .compatible = "fsl,imx8qm-flexcan", .data = &fsl_imx8qm_devtype_data, },
 	{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
 	{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
 	{ .compatible = "fsl,imx53-flexcan", .data = &fsl_imx25_devtype_data, },
-- 
2.17.1


^ permalink raw reply related

* [PATCH 8/8] can: flexcan: add lx2160ar1 support
From: Joakim Zhang @ 2019-07-12  8:03 UTC (permalink / raw)
  To: mkl@pengutronix.de, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, dl-linux-imx, netdev@vger.kernel.org,
	Joakim Zhang
In-Reply-To: <20190712075926.7357-1-qiangqing.zhang@nxp.com>

The Flexcan on lx2160ar1 supports CAN FD protocol.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/net/can/flexcan.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index f1fdaae52ef4..f5c66f284c70 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -358,6 +358,12 @@ static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
 		FLEXCAN_QUIRK_BROKEN_PERR_STATE,
 };
 
+static const struct flexcan_devtype_data fsl_lx2160a_r1_devtype_data = {
+	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+		FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_TIMESTAMP_SUPPORT_FD,
+};
+
 static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = {
 	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
 		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
@@ -1709,6 +1715,7 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
 }
 
 static const struct of_device_id flexcan_of_match[] = {
+	{ .compatible = "fsl,lx2160ar1-flexcan", .data = &fsl_lx2160a_r1_devtype_data, },
 	{ .compatible = "fsl,imx8qm-flexcan", .data = &fsl_imx8qm_devtype_data, },
 	{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
 	{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
-- 
2.17.1


^ permalink raw reply related

* [GIT PULL] 9p updates for 5.3
From: Dominique Martinet @ 2019-07-12  8:04 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: v9fs-developer, linux-kernel, netdev

Hi Linus,

Here is a 9p update for 5.3, just a couple of fixes that have been
sitting here for too long as I missed the 5.2 merge window.

I have two more patches that I didn't have time to test early enough for
this but also are plain details fix, please let me know if you would
prefer having me send a pull request for -rc2 after a week in -next or
if I should just wait until the next window.
There's little risk but I'm usually rather conservative on this.


The following changes since commit 5908e6b738e3357af42c10e1183753c70a0117a9:

  Linux 5.0-rc8 (2019-02-24 16:46:45 -0800)

are available in the git repository at:

  git://github.com/martinetd/linux tags/9p-for-5.3

for you to fetch changes up to 80a316ff16276b36d0392a8f8b2f63259857ae98:

  9p/xen: Add cleanup path in p9_trans_xen_init (2019-05-15 13:00:07
  +0000)

----------------------------------------------------------------
9p pull request for inclusion in 5.13

Two small fixes to properly cleanup the 9p transports list if virtio/xen
module initialization fail.
9p might otherwise try to access memory from a module that failed to
register got freed.

----------------------------------------------------------------
YueHaibing (2):
      9p/virtio: Add cleanup path in p9_virtio_init
      9p/xen: Add cleanup path in p9_trans_xen_init

 net/9p/trans_virtio.c |    8 +++++++-
 net/9p/trans_xen.c    |    8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

^ permalink raw reply

* Re: [PATCH v4 bpf-next 0/4] selftests/bpf: fix compiling loop{1,2,3}.c on s390
From: Ilya Leoshkevich @ 2019-07-12  8:55 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: bpf, Networking, Y Song, daniel, davem, ast
In-Reply-To: <20190711203508.GC16709@mini-arch>

> Am 11.07.2019 um 22:35 schrieb Stanislav Fomichev <sdf@fomichev.me>:
> 
> On 07/11, Ilya Leoshkevich wrote:
>> Use PT_REGS_RC(ctx) instead of ctx->rax, which is not present on s390.
>> 
>> This patch series consists of three preparatory commits, which make it
>> possible to use PT_REGS_RC in BPF selftests, followed by the actual fix.
>> 
> Still looks good to me, thanks!
> 
> Reviewed-by: Stanislav Fomichev <sdf@google.com>
> 
> Again, should probably go via bpf to fix the existing tests, not bpf-next
> (but I see bpf tree is not synced with net tree yet).

Sorry, I missed your comment the last time. You are right - that’s the
reason I’ve been sending this to bpf-next so far — loop*.c don’t even
exist in the bpf tree.

^ permalink raw reply

* [PATCH] [net-next, netfilter] mlx5: avoid unused variable warning
From: Arnd Bergmann @ 2019-07-12  8:57 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky, David S. Miller
  Cc: Arnd Bergmann, Pablo Neira Ayuso, Jakub Kicinski, Tariq Toukan,
	Or Gerlitz, Maxim Mikityanskiy, Eran Ben Elisha, Aya Levin,
	netdev, linux-rdma, linux-kernel

Without CONFIG_MLX5_ESWITCH we get a harmless warning:

drivers/net/ethernet/mellanox/mlx5/core/en_main.c:3467:21: error: unused variable 'priv' [-Werror,-Wunused-variable]
        struct mlx5e_priv *priv = netdev_priv(dev);

Hide the declaration in the same #ifdef as its usage.

Fixes: 4e95bc268b91 ("net: flow_offload: add flow_block_cb_setup_simple()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6d0ae87c8ded..b562ba904ea1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3464,7 +3464,9 @@ static LIST_HEAD(mlx5e_block_cb_list);
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
+#ifdef CONFIG_MLX5_ESWITCH
 	struct mlx5e_priv *priv = netdev_priv(dev);
+#endif
 
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
-- 
2.20.0


^ permalink raw reply related

* Re: [PATCH v4 bpf-next 1/4] selftests/bpf: compile progs with -D__TARGET_ARCH_$(SRCARCH)
From: Ilya Leoshkevich @ 2019-07-12  8:59 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Y Song, Daniel Borkmann, Stanislav Fomichev,
	David S. Miller, Alexei Starovoitov
In-Reply-To: <CAEf4BzYwwqn9ATwPyVcJ8nBQM+rvaFp7KBFjqbYY4GKda3G8jA@mail.gmail.com>

> Am 12.07.2019 um 02:53 schrieb Andrii Nakryiko <andrii.nakryiko@gmail.com>:
> 
> On Thu, Jul 11, 2019 at 7:32 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>> 
>> This opens up the possibility of accessing registers in an
>> arch-independent way.
>> 
>> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
>> ---
>> tools/testing/selftests/bpf/Makefile | 4 +++-
>> 1 file changed, 3 insertions(+), 1 deletion(-)
>> 
>> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
>> index 2620406a53ec..ad84450e4ab8 100644
>> --- a/tools/testing/selftests/bpf/Makefile
>> +++ b/tools/testing/selftests/bpf/Makefile
>> @@ -1,4 +1,5 @@
>> # SPDX-License-Identifier: GPL-2.0
>> +include ../../../scripts/Makefile.arch
>> 
>> LIBDIR := ../../../lib
>> BPFDIR := $(LIBDIR)/bpf
>> @@ -138,7 +139,8 @@ CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
>> 
>> CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
>>              $(CLANG_SYS_INCLUDES) \
>> -             -Wno-compare-distinct-pointer-types
>> +             -Wno-compare-distinct-pointer-types \
>> +             -D__TARGET_ARCH_$(SRCARCH)
> 
> samples/bpf/Makefile uses $(ARCH), why does it work for samples?
> Should we update samples/bpf/Makefile as well?

I believe that in common cases both are okay, but judging by
linux:Makefile and linux:tools/scripts/Makefile.arch, one could use e.g.
ARCH=i686, and that would be converted to SRCARCH=x86. So IMHO SRCARCH
is safer, and we should change bpf/samples/Makefile. I could send a
patch separately.

^ permalink raw reply

* [PATCH] [net-next] cxgb4: reduce kernel stack usage in cudbg_collect_mem_region()
From: Arnd Bergmann @ 2019-07-12  9:06 UTC (permalink / raw)
  To: Vishal Kulkarni, David S. Miller
  Cc: Arnd Bergmann, Rahul Lakkireddy, Ganesh Goudar, Alexios Zavras,
	Arjun Vynipadath, Surendra Mobiya, netdev, linux-kernel,
	clang-built-linux

The cudbg_collect_mem_region() and cudbg_read_fw_mem() both use several
hundred kilobytes of kernel stack space. One gets inlined into the other,
which causes the stack usage to be combined beyond the warning limit
when building with clang:

drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c:1057:12: error: stack frame size of 1244 bytes in function 'cudbg_collect_mem_region' [-Werror,-Wframe-larger-than=]

Restructuring cudbg_collect_mem_region() lets clang do the same
optimization that gcc does and reuse the stack slots as it can
see that the large variables are never used together.

A better fix might be to avoid using cudbg_meminfo on the stack
altogether, but that requires a larger rewrite.

Fixes: a1c69520f785 ("cxgb4: collect MC memory dump")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 .../net/ethernet/chelsio/cxgb4/cudbg_lib.c    | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index a76529a7662d..c2e92786608b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -1054,14 +1054,12 @@ static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init,
 	}
 }
 
-static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
-				    struct cudbg_buffer *dbg_buff,
-				    struct cudbg_error *cudbg_err,
-				    u8 mem_type)
+static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init,
+					   struct cudbg_error *cudbg_err,
+					   u8 mem_type)
 {
 	struct adapter *padap = pdbg_init->adap;
 	struct cudbg_meminfo mem_info;
-	unsigned long size;
 	u8 mc_idx;
 	int rc;
 
@@ -1075,7 +1073,16 @@ static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
 	if (rc)
 		return rc;
 
-	size = mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+	return mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+}
+
+static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
+				    struct cudbg_buffer *dbg_buff,
+				    struct cudbg_error *cudbg_err,
+				    u8 mem_type)
+{
+	unsigned long size = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type);
+
 	return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size,
 				 cudbg_err);
 }
-- 
2.20.0


^ permalink raw reply related

* Re: [PATCH net-next 00/11] Add drop monitor for offloaded data paths
From: Toke Høiland-Jørgensen @ 2019-07-12  9:27 UTC (permalink / raw)
  To: Neil Horman, Ido Schimmel
  Cc: David Miller, netdev, jiri, mlxsw, dsahern, roopa, nikolay, andy,
	pablo, jakub.kicinski, pieter.jansenvanvuuren, andrew, f.fainelli,
	vivien.didelot, idosch
In-Reply-To: <20190711235354.GA30396@hmswarspite.think-freely.org>

Neil Horman <nhorman@tuxdriver.com> writes:

> On Thu, Jul 11, 2019 at 03:39:09PM +0300, Ido Schimmel wrote:
>> On Sun, Jul 07, 2019 at 12:45:41PM -0700, David Miller wrote:
>> > From: Ido Schimmel <idosch@idosch.org>
>> > Date: Sun,  7 Jul 2019 10:58:17 +0300
>> > 
>> > > Users have several ways to debug the kernel and understand why a packet
>> > > was dropped. For example, using "drop monitor" and "perf". Both
>> > > utilities trace kfree_skb(), which is the function called when a packet
>> > > is freed as part of a failure. The information provided by these tools
>> > > is invaluable when trying to understand the cause of a packet loss.
>> > > 
>> > > In recent years, large portions of the kernel data path were offloaded
>> > > to capable devices. Today, it is possible to perform L2 and L3
>> > > forwarding in hardware, as well as tunneling (IP-in-IP and VXLAN).
>> > > Different TC classifiers and actions are also offloaded to capable
>> > > devices, at both ingress and egress.
>> > > 
>> > > However, when the data path is offloaded it is not possible to achieve
>> > > the same level of introspection as tools such "perf" and "drop monitor"
>> > > become irrelevant.
>> > > 
>> > > This patchset aims to solve this by allowing users to monitor packets
>> > > that the underlying device decided to drop along with relevant metadata
>> > > such as the drop reason and ingress port.
>> > 
>> > We are now going to have 5 or so ways to capture packets passing through
>> > the system, this is nonsense.
>> > 
>> > AF_PACKET, kfree_skb drop monitor, perf, XDP perf events, and now this
>> > devlink thing.
>> > 
>> > This is insanity, too many ways to do the same thing and therefore the
>> > worst possible user experience.
>> > 
>> > Pick _ONE_ method to trap packets and forward normal kfree_skb events,
>> > XDP perf events, and these taps there too.
>> > 
>> > I mean really, think about it from the average user's perspective.  To
>> > see all drops/pkts I have to attach a kfree_skb tracepoint, and not just
>> > listen on devlink but configure a special tap thing beforehand and then
>> > if someone is using XDP I gotta setup another perf event buffer capture
>> > thing too.
>> 
>> Dave,
>> 
>> Before I start working on v2, I would like to get your feedback on the
>> high level plan. Also adding Neil who is the maintainer of drop_monitor
>> (and counterpart DropWatch tool [1]).
>> 
>> IIUC, the problem you point out is that users need to use different
>> tools to monitor packet drops based on where these drops occur
>> (SW/HW/XDP).
>> 
>> Therefore, my plan is to extend the existing drop_monitor netlink
>> channel to also cover HW drops. I will add a new message type and a new
>> multicast group for HW drops and encode in the message what is currently
>> encoded in the devlink events.
>> 
> A few things here:
> IIRC we don't announce individual hardware drops, drivers record them in
> internal structures, and they are retrieved on demand via ethtool calls, so you
> will either need to include some polling (probably not a very performant idea),
> or some sort of flagging mechanism to indicate that on the next message sent to
> user space you should go retrieve hw stats from a given interface.  I certainly
> wouldn't mind seeing this happen, but its more work than just adding a new
> netlink message.
>
> Also, regarding XDP drops, we wont see them if the xdp program is offloaded to
> hardware (you'll need your hw drop gathering mechanism for that), but for xdp
> programs run on the cpu, dropwatch should alrady catch those.  I.e. if the xdp
> program returns a DROP result for a packet being processed, the OS will call
> kfree_skb on its behalf, and dropwatch wil call that.

There is no skb by the time an XDP program runs, so this is not true. As
I mentioned upthread, there's a tracepoint that will get called if an
error occurs (or the program returns XDP_ABORTED), but in most cases,
XDP_DROP just means that the packet silently disappears...

-Toke

^ permalink raw reply

* Re: [PATCH] User mode linux bump maximum MTU tuntap interface [RESAND]
From: Anton Ivanov @ 2019-07-12  9:40 UTC (permalink / raw)
  To: Richard Weinberger,
	Алексей
  Cc: netdev, linux-um
In-Reply-To: <1fb36224-81c0-0c4c-72c4-5a60dfe207ef@cambridgegreys.com>

On 12/07/2019 10:22, Anton Ivanov wrote:
> On 02/07/2019 15:40, Richard Weinberger wrote:
>> CC'ing um folks.
>>
>> On Tue, Jul 2, 2019 at 3:01 PM Алексей <ne-vlezay80@yandex.ru> wrote:
>>>
>>> Hello, the parameter  ETH_MAX_PACKET limited to 1500 bytes is the not
>>> support jumbo frames.
>>>
>>> This patch change ETH_MAX_PACKET the 65535 bytes to jumbo frame support
>>> with user mode linux tuntap driver.
>>>
>>>
>>> PATCH:
>>>
>>> -------------------
>>>
>>>
>>> diff -ruNP ../linux_orig/linux-5.1/arch/um/include/shared/net_user.h
>>> ./arch/um/include/shared/net_user.h
>>> --- a/arch/um/include/shared/net_user.h    2019-05-06 00:42:58.000000000
>>> +0000
>>> +++ b/arch/um/include/shared/net_user.h    2019-07-02 07:14:13.593333356
>>> +0000
>>> @@ -9,7 +9,7 @@
>>>   #define ETH_ADDR_LEN (6)
>>>   #define ETH_HEADER_ETHERTAP (16)
>>>   #define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for
>>> crazy people */
>>> -#define ETH_MAX_PACKET (1500)
>>> +#define ETH_MAX_PACKET (65535)
>>>
>>>   #define UML_NET_VERSION (4)
>>>
>>> -------------------
>>>
>>>
>>
>>
> 
> This does not quite work because in some of the drivers you get extra 
> added on top of this constant.
> 
> I am going to see what can be done to fix the old net* drivers, imho we 
> should start phasing them out in favor of the vector ones.
> 

In fact it does not work even for lower values because the old net_ 
family assumes a contiguous skb buffer and uses read/write functions 
which read the whole packet into it at once. That is the buffer it get 
if it asks for anything less than SKB_WITH_OVERHEAD(PAGE_SIZE).

If it asks for more there is no guarantee that the resulting buffer will 
be contiguous - it may get a segmented skb and will need to use 
appropriate functions to read/write into segments.

If you just up the MTU without fixing the underlying transport you get 
memory corruption.

If we are to support this I have to rewrite the whole driver set and it 
will frankly be easier to just make them use vector drivers underneath 
and set the names to be ethX instead.

In fact, I will probably do a patch that does that the moment I finish 
adding all existing socket transports to vector_user.c.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

^ permalink raw reply

* Re: iwl_mvm_add_new_dqa_stream_wk BUG in lib/list_debug.c:56
From: Luciano Coelho @ 2019-07-12  9:57 UTC (permalink / raw)
  To: Marc Haber, Yussuf Khalil
  Cc: linux-wireless, linux-kernel, netdev, Johannes Berg,
	Emmanuel Grumbach, Intel Linux Wireless
In-Reply-To: <20190607204421.GK31088@torres.zugschlus.de>

On Fri, 2019-06-07 at 22:44 +0200, Marc Haber wrote:
> On Fri, Jun 07, 2019 at 10:20:56PM +0200, Yussuf Khalil wrote:
> > CC'ing iwlwifi maintainers to get some attention for this issue.
> > 
> > I am experiencing the very same bug on a ThinkPad T480s running 5.1.6 with
> > Fedora 30. A friend is seeing it on his X1 Carbon 6th Gen, too. Both have an
> > "Intel Corporation Wireless 8265 / 8275" card according to lspci.
> 
> I have an older 04:00.0 Network controller [0280]: Intel Corporation
> Wireless 8260 [8086:24f3] (rev 3a) on a Thinkpad X260.
> 
> > Notably, in all cases I've observed it occurred right after roaming from one
> > AP to another (though I can't guarantee this isn't a coincidence).
> 
> I also have multiple Access Points broadcasting the same SSID in my
> house, and yes, I experience those issues often when I move from one
> part of the hose to another. I have, however, also experienced it in a
> hotel when I was using the mobile hotspot offered by my mobile, so that
> was clearly not a roaming situation.

Hi,

Sorry this got under the radar for a while.  Yesterday someone created
a bugzilla entry with the same error:

https://bugzilla.kernel.org/show_bug.cgi?id=204141

I'm going to file an internal bug report and then have someone look
further into it.

Any additional comments/reproductions/etc. please use that bugzilla
entry.

Thanks for reporting!

--
Cheers,
Luca.


^ permalink raw reply

* Re: [PATCH] User mode linux bump maximum MTU tuntap interface [RESAND]
From: Anton Ivanov @ 2019-07-12  9:22 UTC (permalink / raw)
  To: Richard Weinberger,
	Алексей
  Cc: netdev, linux-um
In-Reply-To: <CAFLxGvytDC1TFdT0m9vvijz_93B8TziWURcR-3mskWB-7TzFag@mail.gmail.com>

On 02/07/2019 15:40, Richard Weinberger wrote:
> CC'ing um folks.
> 
> On Tue, Jul 2, 2019 at 3:01 PM Алексей <ne-vlezay80@yandex.ru> wrote:
>>
>> Hello, the parameter  ETH_MAX_PACKET limited to 1500 bytes is the not
>> support jumbo frames.
>>
>> This patch change ETH_MAX_PACKET the 65535 bytes to jumbo frame support
>> with user mode linux tuntap driver.
>>
>>
>> PATCH:
>>
>> -------------------
>>
>>
>> diff -ruNP ../linux_orig/linux-5.1/arch/um/include/shared/net_user.h
>> ./arch/um/include/shared/net_user.h
>> --- a/arch/um/include/shared/net_user.h    2019-05-06 00:42:58.000000000
>> +0000
>> +++ b/arch/um/include/shared/net_user.h    2019-07-02 07:14:13.593333356
>> +0000
>> @@ -9,7 +9,7 @@
>>   #define ETH_ADDR_LEN (6)
>>   #define ETH_HEADER_ETHERTAP (16)
>>   #define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for
>> crazy people */
>> -#define ETH_MAX_PACKET (1500)
>> +#define ETH_MAX_PACKET (65535)
>>
>>   #define UML_NET_VERSION (4)
>>
>> -------------------
>>
>>
> 
> 

This does not quite work because in some of the drivers you get extra 
added on top of this constant.

I am going to see what can be done to fix the old net* drivers, imho we 
should start phasing them out in favor of the vector ones.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

^ permalink raw reply

* Re: [RFC] virtio-net: share receive_*() and add_recvbuf_*() with virtio-vsock
From: Stefano Garzarella @ 2019-07-12 10:00 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Jason Wang, Stefan Hajnoczi, virtualization, netdev
In-Reply-To: <20190711152855-mutt-send-email-mst@kernel.org>

On Thu, Jul 11, 2019 at 03:52:21PM -0400, Michael S. Tsirkin wrote:
> On Thu, Jul 11, 2019 at 01:41:34PM +0200, Stefano Garzarella wrote:
> > On Thu, Jul 11, 2019 at 03:37:00PM +0800, Jason Wang wrote:
> > > 
> > > On 2019/7/10 下午11:37, Stefano Garzarella wrote:
> > > > Hi,
> > > > as Jason suggested some months ago, I looked better at the virtio-net driver to
> > > > understand if we can reuse some parts also in the virtio-vsock driver, since we
> > > > have similar challenges (mergeable buffers, page allocation, small
> > > > packets, etc.).
> > > > 
> > > > Initially, I would add the skbuff in the virtio-vsock in order to re-use
> > > > receive_*() functions.
> > > 
> > > 
> > > Yes, that will be a good step.
> > > 
> > 
> > Okay, I'll go on this way.
> > 
> > > 
> > > > Then I would move receive_[small, big, mergeable]() and
> > > > add_recvbuf_[small, big, mergeable]() outside of virtio-net driver, in order to
> > > > call them also from virtio-vsock. I need to do some refactoring (e.g. leave the
> > > > XDP part on the virtio-net driver), but I think it is feasible.
> > > > 
> > > > The idea is to create a virtio-skb.[h,c] where put these functions and a new
> > > > object where stores some attributes needed (e.g. hdr_len ) and status (e.g.
> > > > some fields of struct receive_queue).
> > > 
> > > 
> > > My understanding is we could be more ambitious here. Do you see any blocker
> > > for reusing virtio-net directly? It's better to reuse not only the functions
> > > but also the logic like NAPI to avoid re-inventing something buggy and
> > > duplicated.
> > > 
> > 
> > These are my concerns:
> > - virtio-vsock is not a "net_device", so a lot of code related to
> >   ethtool, net devices (MAC address, MTU, speed, VLAN, XDP, offloading) will be
> >   not used by virtio-vsock.
> > 
> > - virtio-vsock has a different header. We can consider it as part of
> >   virtio_net payload, but it precludes the compatibility with old hosts. This
> >   was one of the major doubts that made me think about using only the
> >   send/recv skbuff functions, that it shouldn't break the compatibility.
> > 
> > > 
> > > > This is an idea of virtio-skb.h that
> > > > I have in mind:
> > > >      struct virtskb;
> > > 
> > > 
> > > What fields do you want to store in virtskb? It looks to be exist sk_buff is
> > > flexible enough to us?
> > 
> > My idea is to store queues information, like struct receive_queue or
> > struct send_queue, and some device attributes (e.g. hdr_len ).
> > 
> > > 
> > > 
> > > > 
> > > >      struct sk_buff *virtskb_receive_small(struct virtskb *vs, ...);
> > > >      struct sk_buff *virtskb_receive_big(struct virtskb *vs, ...);
> > > >      struct sk_buff *virtskb_receive_mergeable(struct virtskb *vs, ...);
> > > > 
> > > >      int virtskb_add_recvbuf_small(struct virtskb*vs, ...);
> > > >      int virtskb_add_recvbuf_big(struct virtskb *vs, ...);
> > > >      int virtskb_add_recvbuf_mergeable(struct virtskb *vs, ...);
> > > > 
> > > > For the Guest->Host path it should be easier, so maybe I can add a
> > > > "virtskb_send(struct virtskb *vs, struct sk_buff *skb)" with a part of the code
> > > > of xmit_skb().
> > > 
> > > 
> > > I may miss something, but I don't see any thing that prevents us from using
> > > xmit_skb() directly.
> > > 
> > 
> > Yes, but my initial idea was to make it more parametric and not related to the
> > virtio_net_hdr, so the 'hdr_len' could be a parameter and the
> > 'num_buffers' should be handled by the caller.
> > 
> > > 
> > > > 
> > > > Let me know if you have in mind better names or if I should put these function
> > > > in another place.
> > > > 
> > > > I would like to leave the control part completely separate, so, for example,
> > > > the two drivers will negotiate the features independently and they will call
> > > > the right virtskb_receive_*() function based on the negotiation.
> > > 
> > > 
> > > If it's one the issue of negotiation, we can simply change the
> > > virtnet_probe() to deal with different devices.
> > > 
> > > 
> > > > 
> > > > I already started to work on it, but before to do more steps and send an RFC
> > > > patch, I would like to hear your opinion.
> > > > Do you think that makes sense?
> > > > Do you see any issue or a better solution?
> > > 
> > > 
> > > I still think we need to seek a way of adding some codes on virtio-net.c
> > > directly if there's no huge different in the processing of TX/RX. That would
> > > save us a lot time.
> > 
> > After the reading of the buffers from the virtqueue I think the process
> > is slightly different, because virtio-net will interface with the network
> > stack, while virtio-vsock will interface with the vsock-core (socket).
> > So the virtio-vsock implements the following:
> > - control flow mechanism to avoid to loose packets, informing the peer
> >   about the amount of memory available in the receive queue using some
> >   fields in the virtio_vsock_hdr
> > - de-multiplexing parsing the virtio_vsock_hdr and choosing the right
> >   socket depending on the port
> > - socket state handling
> > 
> > We can use the virtio-net as transport, but we should add a lot of
> > code to skip "net device" stuff when it is used by the virtio-vsock.
> > This could break something in virtio-net, for this reason, I thought to reuse
> > only the send/recv functions starting from the idea to split the virtio-net
> > driver in two parts:
> > a. one with all stuff related to the network stack
> > b. one with the stuff needed to communicate with the host
> > 
> > And use skbuff to communicate between parts. In this way, virtio-vsock
> > can use only the b part.
> > 
> > Maybe we can do this split in a better way, but I'm not sure it is
> > simple.
> > 
> > Thanks,
> > Stefano
> 
> Frankly, skb is a huge structure which adds a lot of
> overhead. I am not sure that using it is such a great idea
> if building a device that does not have to interface
> with the networking stack.

Thanks for the advice!

> 
> So I agree with Jason in theory. To clarify, he is basically saying
> current implementation is all wrong, it should be a protocol and we
> should teach networking stack that there are reliable net devices that
> handle just this protocol. We could add a flag in virtio net that
> will say it's such a device.
> 
> Whether it's doable, I don't know, and it's definitely not simple - in
> particular you will have to also re-implement existing devices in these
> terms, and not just virtio - vmware vsock too.
> 
> If you want to do a POC you can add a new address family,
> that's easier.

Very interesting!
I agree with you. In this way we can completely split the protocol
logic, from the device.

As you said, it will not simple to do, but can be an opportunity to learn
better the Linux networking stack!
I'll try to do a PoC with AF_VSOCK2 that will use the virtio-net.

> 
> Just reusing random functions won't help, net stack
> is very heavy, if it manages to outperform vsock it's
> because vsock was not written with performance in mind.
> But the smarts are in the core not virtio driver.
> What makes vsock slow is design decisions like
> using a workqueue to process packets,
> not batching memory management etc etc.
> All things that net core does for virtio net.

Got it :)

Michael, Jason, thank you very much! Your suggestions are very useful!

Stefano

^ permalink raw reply

* Re: [RFC] virtio-net: share receive_*() and add_recvbuf_*() with virtio-vsock
From: Jason Wang @ 2019-07-12 10:14 UTC (permalink / raw)
  To: Stefano Garzarella, Michael S. Tsirkin
  Cc: Stefan Hajnoczi, virtualization, netdev
In-Reply-To: <20190712100033.xs3xesz2plfwj3ag@steredhat>


On 2019/7/12 下午6:00, Stefano Garzarella wrote:
> On Thu, Jul 11, 2019 at 03:52:21PM -0400, Michael S. Tsirkin wrote:
>> On Thu, Jul 11, 2019 at 01:41:34PM +0200, Stefano Garzarella wrote:
>>> On Thu, Jul 11, 2019 at 03:37:00PM +0800, Jason Wang wrote:
>>>> On 2019/7/10 下午11:37, Stefano Garzarella wrote:
>>>>> Hi,
>>>>> as Jason suggested some months ago, I looked better at the virtio-net driver to
>>>>> understand if we can reuse some parts also in the virtio-vsock driver, since we
>>>>> have similar challenges (mergeable buffers, page allocation, small
>>>>> packets, etc.).
>>>>>
>>>>> Initially, I would add the skbuff in the virtio-vsock in order to re-use
>>>>> receive_*() functions.
>>>>
>>>> Yes, that will be a good step.
>>>>
>>> Okay, I'll go on this way.
>>>
>>>>> Then I would move receive_[small, big, mergeable]() and
>>>>> add_recvbuf_[small, big, mergeable]() outside of virtio-net driver, in order to
>>>>> call them also from virtio-vsock. I need to do some refactoring (e.g. leave the
>>>>> XDP part on the virtio-net driver), but I think it is feasible.
>>>>>
>>>>> The idea is to create a virtio-skb.[h,c] where put these functions and a new
>>>>> object where stores some attributes needed (e.g. hdr_len ) and status (e.g.
>>>>> some fields of struct receive_queue).
>>>>
>>>> My understanding is we could be more ambitious here. Do you see any blocker
>>>> for reusing virtio-net directly? It's better to reuse not only the functions
>>>> but also the logic like NAPI to avoid re-inventing something buggy and
>>>> duplicated.
>>>>
>>> These are my concerns:
>>> - virtio-vsock is not a "net_device", so a lot of code related to
>>>    ethtool, net devices (MAC address, MTU, speed, VLAN, XDP, offloading) will be
>>>    not used by virtio-vsock.


Linux support device other than ethernet, so it should not be a problem.


>>>
>>> - virtio-vsock has a different header. We can consider it as part of
>>>    virtio_net payload, but it precludes the compatibility with old hosts. This
>>>    was one of the major doubts that made me think about using only the
>>>    send/recv skbuff functions, that it shouldn't break the compatibility.


We can extend the current vnet header helper for it to work for vsock.


>>>
>>>>> This is an idea of virtio-skb.h that
>>>>> I have in mind:
>>>>>       struct virtskb;
>>>>
>>>> What fields do you want to store in virtskb? It looks to be exist sk_buff is
>>>> flexible enough to us?
>>> My idea is to store queues information, like struct receive_queue or
>>> struct send_queue, and some device attributes (e.g. hdr_len ).


If you reuse skb or virtnet_info, there is not necessary.


>>>
>>>>
>>>>>       struct sk_buff *virtskb_receive_small(struct virtskb *vs, ...);
>>>>>       struct sk_buff *virtskb_receive_big(struct virtskb *vs, ...);
>>>>>       struct sk_buff *virtskb_receive_mergeable(struct virtskb *vs, ...);
>>>>>
>>>>>       int virtskb_add_recvbuf_small(struct virtskb*vs, ...);
>>>>>       int virtskb_add_recvbuf_big(struct virtskb *vs, ...);
>>>>>       int virtskb_add_recvbuf_mergeable(struct virtskb *vs, ...);
>>>>>
>>>>> For the Guest->Host path it should be easier, so maybe I can add a
>>>>> "virtskb_send(struct virtskb *vs, struct sk_buff *skb)" with a part of the code
>>>>> of xmit_skb().
>>>>
>>>> I may miss something, but I don't see any thing that prevents us from using
>>>> xmit_skb() directly.
>>>>
>>> Yes, but my initial idea was to make it more parametric and not related to the
>>> virtio_net_hdr, so the 'hdr_len' could be a parameter and the
>>> 'num_buffers' should be handled by the caller.
>>>
>>>>> Let me know if you have in mind better names or if I should put these function
>>>>> in another place.
>>>>>
>>>>> I would like to leave the control part completely separate, so, for example,
>>>>> the two drivers will negotiate the features independently and they will call
>>>>> the right virtskb_receive_*() function based on the negotiation.
>>>>
>>>> If it's one the issue of negotiation, we can simply change the
>>>> virtnet_probe() to deal with different devices.
>>>>
>>>>
>>>>> I already started to work on it, but before to do more steps and send an RFC
>>>>> patch, I would like to hear your opinion.
>>>>> Do you think that makes sense?
>>>>> Do you see any issue or a better solution?
>>>>
>>>> I still think we need to seek a way of adding some codes on virtio-net.c
>>>> directly if there's no huge different in the processing of TX/RX. That would
>>>> save us a lot time.
>>> After the reading of the buffers from the virtqueue I think the process
>>> is slightly different, because virtio-net will interface with the network
>>> stack, while virtio-vsock will interface with the vsock-core (socket).
>>> So the virtio-vsock implements the following:
>>> - control flow mechanism to avoid to loose packets, informing the peer
>>>    about the amount of memory available in the receive queue using some
>>>    fields in the virtio_vsock_hdr
>>> - de-multiplexing parsing the virtio_vsock_hdr and choosing the right
>>>    socket depending on the port
>>> - socket state handling


I think it's just a branch, for ethernet, go for networking stack. 
otherwise go for vsock core?


>>>
>>> We can use the virtio-net as transport, but we should add a lot of
>>> code to skip "net device" stuff when it is used by the virtio-vsock.


This could be another choice, but consider it was not transparent to the 
admin and require new features, we may seek a transparent solution here.


>>> This could break something in virtio-net, for this reason, I thought to reuse
>>> only the send/recv functions starting from the idea to split the virtio-net
>>> driver in two parts:
>>> a. one with all stuff related to the network stack
>>> b. one with the stuff needed to communicate with the host
>>>
>>> And use skbuff to communicate between parts. In this way, virtio-vsock
>>> can use only the b part.
>>>
>>> Maybe we can do this split in a better way, but I'm not sure it is
>>> simple.
>>>
>>> Thanks,
>>> Stefano
>> Frankly, skb is a huge structure which adds a lot of
>> overhead. I am not sure that using it is such a great idea
>> if building a device that does not have to interface
>> with the networking stack.


I believe vsock is mainly used for stream performance not for PPS. So 
the impact should be minimal. We can use other metadata, just need 
branch in recv_xxx().


> Thanks for the advice!
>
>> So I agree with Jason in theory. To clarify, he is basically saying
>> current implementation is all wrong, it should be a protocol and we
>> should teach networking stack that there are reliable net devices that
>> handle just this protocol. We could add a flag in virtio net that
>> will say it's such a device.
>>
>> Whether it's doable, I don't know, and it's definitely not simple - in
>> particular you will have to also re-implement existing devices in these
>> terms, and not just virtio - vmware vsock too.


Merging vsock protocol to exist networking stack could be a long term 
goal, I believe for the first phase, we can seek to use virtio-net first.


>>
>> If you want to do a POC you can add a new address family,
>> that's easier.
> Very interesting!
> I agree with you. In this way we can completely split the protocol
> logic, from the device.
>
> As you said, it will not simple to do, but can be an opportunity to learn
> better the Linux networking stack!
> I'll try to do a PoC with AF_VSOCK2 that will use the virtio-net.


I suggest to do this step by step:

1) use virtio-net but keep some protocol logic

2) separate protocol logic and merge it to exist Linux networking stack

Thanks


>> Just reusing random functions won't help, net stack
>> is very heavy, if it manages to outperform vsock it's
>> because vsock was not written with performance in mind.
>> But the smarts are in the core not virtio driver.
>> What makes vsock slow is design decisions like
>> using a workqueue to process packets,
>> not batching memory management etc etc.
>> All things that net core does for virtio net.
> Got it :)
>
> Michael, Jason, thank you very much! Your suggestions are very useful!
>
> Stefano

^ permalink raw reply

* Re: [PATCH v1 1/6] rcu: Add support for consolidated-RCU reader checking
From: Peter Zijlstra @ 2019-07-12 11:01 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Alexey Kuznetsov, Bjorn Helgaas, Borislav Petkov,
	c0d1n61at3, David S. Miller, edumazet, Greg Kroah-Hartman,
	Hideaki YOSHIFUJI, H. Peter Anvin, Ingo Molnar, Josh Triplett,
	keescook, kernel-hardening, Lai Jiangshan, Len Brown, linux-acpi,
	linux-pci, linux-pm, Mathieu Desnoyers, neilb, netdev, oleg,
	Paul E. McKenney, Pavel Machek, Rafael J. Wysocki,
	Rasmus Villemoes, rcu, Steven Rostedt, Tejun Heo, Thomas Gleixner,
	will, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190711234401.220336-2-joel@joelfernandes.org>

On Thu, Jul 11, 2019 at 07:43:56PM -0400, Joel Fernandes (Google) wrote:
> This patch adds support for checking RCU reader sections in list
> traversal macros. Optionally, if the list macro is called under SRCU or
> other lock/mutex protection, then appropriate lockdep expressions can be
> passed to make the checks pass.
> 
> Existing list_for_each_entry_rcu() invocations don't need to pass the
> optional fourth argument (cond) unless they are under some non-RCU
> protection and needs to make lockdep check pass.
> 
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  include/linux/rculist.h  | 29 ++++++++++++++++++++++++-----
>  include/linux/rcupdate.h |  7 +++++++
>  kernel/rcu/Kconfig.debug | 11 +++++++++++
>  kernel/rcu/update.c      | 26 ++++++++++++++++++++++++++
>  4 files changed, 68 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/rculist.h b/include/linux/rculist.h
> index e91ec9ddcd30..78c15ec6b2c9 100644
> --- a/include/linux/rculist.h
> +++ b/include/linux/rculist.h
> @@ -40,6 +40,23 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
>   */
>  #define list_next_rcu(list)	(*((struct list_head __rcu **)(&(list)->next)))
>  
> +/*
> + * Check during list traversal that we are within an RCU reader
> + */
> +
> +#define SIXTH_ARG(a1, a2, a3, a4, a5, a6, ...) a6
> +#define COUNT_VARGS(...) SIXTH_ARG(dummy, ## __VA_ARGS__, 4, 3, 2, 1, 0)

You don't seem to actually use it in this patch; also linux/kernel.h has
COUNT_ARGS().

^ permalink raw reply

* Re: [PATCH v1 1/6] rcu: Add support for consolidated-RCU reader checking
From: Peter Zijlstra @ 2019-07-12 11:11 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Alexey Kuznetsov, Bjorn Helgaas, Borislav Petkov,
	c0d1n61at3, David S. Miller, edumazet, Greg Kroah-Hartman,
	Hideaki YOSHIFUJI, H. Peter Anvin, Ingo Molnar, Josh Triplett,
	keescook, kernel-hardening, Lai Jiangshan, Len Brown, linux-acpi,
	linux-pci, linux-pm, Mathieu Desnoyers, neilb, netdev, oleg,
	Paul E. McKenney, Pavel Machek, Rafael J. Wysocki,
	Rasmus Villemoes, rcu, Steven Rostedt, Tejun Heo, Thomas Gleixner,
	will, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190711234401.220336-2-joel@joelfernandes.org>

On Thu, Jul 11, 2019 at 07:43:56PM -0400, Joel Fernandes (Google) wrote:
> +int rcu_read_lock_any_held(void)
> +{
> +	int lockdep_opinion = 0;
> +
> +	if (!debug_lockdep_rcu_enabled())
> +		return 1;
> +	if (!rcu_is_watching())
> +		return 0;
> +	if (!rcu_lockdep_current_cpu_online())
> +		return 0;
> +
> +	/* Preemptible RCU flavor */
> +	if (lock_is_held(&rcu_lock_map))

you forgot debug_locks here.

> +		return 1;
> +
> +	/* BH flavor */
> +	if (in_softirq() || irqs_disabled())

I'm not sure I'd put irqs_disabled() under BH, also this entire
condition is superfluous, see below.

> +		return 1;
> +
> +	/* Sched flavor */
> +	if (debug_locks)
> +		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
> +	return lockdep_opinion || !preemptible();

that !preemptible() turns into:

  !(preempt_count()==0 && !irqs_disabled())

which is:

  preempt_count() != 0 || irqs_disabled()

and already includes irqs_disabled() and in_softirq().

> +}

So maybe something lke:

	if (debug_locks && (lock_is_held(&rcu_lock_map) ||
			    lock_is_held(&rcu_sched_lock_map)))
		return true;

	return !preemptible();



^ permalink raw reply

* Re: [PATCH net-next 00/11] Add drop monitor for offloaded data paths
From: Neil Horman @ 2019-07-12 12:05 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: Ido Schimmel, David Miller, netdev, jiri, mlxsw, dsahern, roopa,
	nikolay, andy, pablo, jakub.kicinski, pieter.jansenvanvuuren,
	andrew, vivien.didelot, idosch
In-Reply-To: <69d0917f-895f-6239-4044-76944432e8ca@gmail.com>

On Thu, Jul 11, 2019 at 08:40:34PM -0700, Florian Fainelli wrote:
> 
> 
> On 7/11/2019 4:53 PM, Neil Horman wrote:
> >> I would like to emphasize that the configuration of whether these
> >> dropped packets are even sent to the CPU from the device still needs to
> >> reside in devlink given this is the go-to tool for device-specific
> >> configuration. In addition, these drop traps are a small subset of the
> >> entire packet traps devices support and all have similar needs such as
> >> HW policer configuration and statistics.
> >>
> >> In the future we might also want to report events that indicate the
> >> formation of possible problems. For example, in case packets are queued
> >> above a certain threshold or for long periods of time. I hope we could
> >> re-use drop_monitor for this as well, thereby making it the go-to
> >> channel for diagnosing current and to-be problems in the data path.
> >>
> > Thats an interesting idea, but dropwatch certainly isn't currently setup for
> > that kind of messaging.  It may be worth creating a v2 of the netlink protocol
> > and really thinking out what you want to communicate.
> 
> Is not what you describe more or less what Ido has been doing here with
> this patch series?
possibly, I was only CCed on this thread halfway throught the conversation, and
only on the cover letter, I've not had a chance to look at the entire series

Neil

> -- 
> Florian
> 

^ permalink raw reply

* Re: [PATCH v1 1/6] rcu: Add support for consolidated-RCU reader checking
From: Oleg Nesterov @ 2019-07-12 12:12 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Alexey Kuznetsov, Bjorn Helgaas, Borislav Petkov,
	c0d1n61at3, David S. Miller, edumazet, Greg Kroah-Hartman,
	Hideaki YOSHIFUJI, H. Peter Anvin, Ingo Molnar, Josh Triplett,
	keescook, kernel-hardening, Lai Jiangshan, Len Brown, linux-acpi,
	linux-pci, linux-pm, Mathieu Desnoyers, neilb, netdev,
	Paul E. McKenney, Pavel Machek, peterz, Rafael J. Wysocki,
	Rasmus Villemoes, rcu, Steven Rostedt, Tejun Heo, Thomas Gleixner,
	will, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190711234401.220336-2-joel@joelfernandes.org>

On 07/11, Joel Fernandes (Google) wrote:
>
> +int rcu_read_lock_any_held(void)

rcu_sync_is_idle() wants it. You have my ack in advance ;)

Oleg.


^ permalink raw reply

* Re: [PATCH net-next 00/11] Add drop monitor for offloaded data paths
From: Neil Horman @ 2019-07-12 12:18 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Ido Schimmel, David Miller, netdev, jiri, mlxsw, dsahern, roopa,
	nikolay, andy, pablo, jakub.kicinski, pieter.jansenvanvuuren,
	andrew, f.fainelli, vivien.didelot, idosch
In-Reply-To: <87r26vvbz8.fsf@toke.dk>

On Fri, Jul 12, 2019 at 11:27:55AM +0200, Toke Høiland-Jørgensen wrote:
> Neil Horman <nhorman@tuxdriver.com> writes:
> 
> > On Thu, Jul 11, 2019 at 03:39:09PM +0300, Ido Schimmel wrote:
> >> On Sun, Jul 07, 2019 at 12:45:41PM -0700, David Miller wrote:
> >> > From: Ido Schimmel <idosch@idosch.org>
> >> > Date: Sun,  7 Jul 2019 10:58:17 +0300
> >> > 
> >> > > Users have several ways to debug the kernel and understand why a packet
> >> > > was dropped. For example, using "drop monitor" and "perf". Both
> >> > > utilities trace kfree_skb(), which is the function called when a packet
> >> > > is freed as part of a failure. The information provided by these tools
> >> > > is invaluable when trying to understand the cause of a packet loss.
> >> > > 
> >> > > In recent years, large portions of the kernel data path were offloaded
> >> > > to capable devices. Today, it is possible to perform L2 and L3
> >> > > forwarding in hardware, as well as tunneling (IP-in-IP and VXLAN).
> >> > > Different TC classifiers and actions are also offloaded to capable
> >> > > devices, at both ingress and egress.
> >> > > 
> >> > > However, when the data path is offloaded it is not possible to achieve
> >> > > the same level of introspection as tools such "perf" and "drop monitor"
> >> > > become irrelevant.
> >> > > 
> >> > > This patchset aims to solve this by allowing users to monitor packets
> >> > > that the underlying device decided to drop along with relevant metadata
> >> > > such as the drop reason and ingress port.
> >> > 
> >> > We are now going to have 5 or so ways to capture packets passing through
> >> > the system, this is nonsense.
> >> > 
> >> > AF_PACKET, kfree_skb drop monitor, perf, XDP perf events, and now this
> >> > devlink thing.
> >> > 
> >> > This is insanity, too many ways to do the same thing and therefore the
> >> > worst possible user experience.
> >> > 
> >> > Pick _ONE_ method to trap packets and forward normal kfree_skb events,
> >> > XDP perf events, and these taps there too.
> >> > 
> >> > I mean really, think about it from the average user's perspective.  To
> >> > see all drops/pkts I have to attach a kfree_skb tracepoint, and not just
> >> > listen on devlink but configure a special tap thing beforehand and then
> >> > if someone is using XDP I gotta setup another perf event buffer capture
> >> > thing too.
> >> 
> >> Dave,
> >> 
> >> Before I start working on v2, I would like to get your feedback on the
> >> high level plan. Also adding Neil who is the maintainer of drop_monitor
> >> (and counterpart DropWatch tool [1]).
> >> 
> >> IIUC, the problem you point out is that users need to use different
> >> tools to monitor packet drops based on where these drops occur
> >> (SW/HW/XDP).
> >> 
> >> Therefore, my plan is to extend the existing drop_monitor netlink
> >> channel to also cover HW drops. I will add a new message type and a new
> >> multicast group for HW drops and encode in the message what is currently
> >> encoded in the devlink events.
> >> 
> > A few things here:
> > IIRC we don't announce individual hardware drops, drivers record them in
> > internal structures, and they are retrieved on demand via ethtool calls, so you
> > will either need to include some polling (probably not a very performant idea),
> > or some sort of flagging mechanism to indicate that on the next message sent to
> > user space you should go retrieve hw stats from a given interface.  I certainly
> > wouldn't mind seeing this happen, but its more work than just adding a new
> > netlink message.
> >
> > Also, regarding XDP drops, we wont see them if the xdp program is offloaded to
> > hardware (you'll need your hw drop gathering mechanism for that), but for xdp
> > programs run on the cpu, dropwatch should alrady catch those.  I.e. if the xdp
> > program returns a DROP result for a packet being processed, the OS will call
> > kfree_skb on its behalf, and dropwatch wil call that.
> 
> There is no skb by the time an XDP program runs, so this is not true. As
> I mentioned upthread, there's a tracepoint that will get called if an
> error occurs (or the program returns XDP_ABORTED), but in most cases,
> XDP_DROP just means that the packet silently disappears...
> 
As I noted, thats only true for xdp programs that are offloaded to hardware, I
was only speaking for XDP programs that run on the cpu.  For the former case, we
obviously need some other mechanism to detect drops, but for cpu executed xdp
programs, the OS is responsible for freeing skbs associated with programs the
return XDP_DROP.

Neil

> -Toke
> 

^ permalink raw reply

* Re: [PATCH net-next 00/11] Add drop monitor for offloaded data paths
From: Toke Høiland-Jørgensen @ 2019-07-12 12:33 UTC (permalink / raw)
  To: Neil Horman
  Cc: Ido Schimmel, David Miller, netdev, jiri, mlxsw, dsahern, roopa,
	nikolay, andy, pablo, jakub.kicinski, pieter.jansenvanvuuren,
	andrew, f.fainelli, vivien.didelot, idosch
In-Reply-To: <20190712121859.GB13696@hmswarspite.think-freely.org>

Neil Horman <nhorman@tuxdriver.com> writes:

> On Fri, Jul 12, 2019 at 11:27:55AM +0200, Toke Høiland-Jørgensen wrote:
>> Neil Horman <nhorman@tuxdriver.com> writes:
>> 
>> > On Thu, Jul 11, 2019 at 03:39:09PM +0300, Ido Schimmel wrote:
>> >> On Sun, Jul 07, 2019 at 12:45:41PM -0700, David Miller wrote:
>> >> > From: Ido Schimmel <idosch@idosch.org>
>> >> > Date: Sun,  7 Jul 2019 10:58:17 +0300
>> >> > 
>> >> > > Users have several ways to debug the kernel and understand why a packet
>> >> > > was dropped. For example, using "drop monitor" and "perf". Both
>> >> > > utilities trace kfree_skb(), which is the function called when a packet
>> >> > > is freed as part of a failure. The information provided by these tools
>> >> > > is invaluable when trying to understand the cause of a packet loss.
>> >> > > 
>> >> > > In recent years, large portions of the kernel data path were offloaded
>> >> > > to capable devices. Today, it is possible to perform L2 and L3
>> >> > > forwarding in hardware, as well as tunneling (IP-in-IP and VXLAN).
>> >> > > Different TC classifiers and actions are also offloaded to capable
>> >> > > devices, at both ingress and egress.
>> >> > > 
>> >> > > However, when the data path is offloaded it is not possible to achieve
>> >> > > the same level of introspection as tools such "perf" and "drop monitor"
>> >> > > become irrelevant.
>> >> > > 
>> >> > > This patchset aims to solve this by allowing users to monitor packets
>> >> > > that the underlying device decided to drop along with relevant metadata
>> >> > > such as the drop reason and ingress port.
>> >> > 
>> >> > We are now going to have 5 or so ways to capture packets passing through
>> >> > the system, this is nonsense.
>> >> > 
>> >> > AF_PACKET, kfree_skb drop monitor, perf, XDP perf events, and now this
>> >> > devlink thing.
>> >> > 
>> >> > This is insanity, too many ways to do the same thing and therefore the
>> >> > worst possible user experience.
>> >> > 
>> >> > Pick _ONE_ method to trap packets and forward normal kfree_skb events,
>> >> > XDP perf events, and these taps there too.
>> >> > 
>> >> > I mean really, think about it from the average user's perspective.  To
>> >> > see all drops/pkts I have to attach a kfree_skb tracepoint, and not just
>> >> > listen on devlink but configure a special tap thing beforehand and then
>> >> > if someone is using XDP I gotta setup another perf event buffer capture
>> >> > thing too.
>> >> 
>> >> Dave,
>> >> 
>> >> Before I start working on v2, I would like to get your feedback on the
>> >> high level plan. Also adding Neil who is the maintainer of drop_monitor
>> >> (and counterpart DropWatch tool [1]).
>> >> 
>> >> IIUC, the problem you point out is that users need to use different
>> >> tools to monitor packet drops based on where these drops occur
>> >> (SW/HW/XDP).
>> >> 
>> >> Therefore, my plan is to extend the existing drop_monitor netlink
>> >> channel to also cover HW drops. I will add a new message type and a new
>> >> multicast group for HW drops and encode in the message what is currently
>> >> encoded in the devlink events.
>> >> 
>> > A few things here:
>> > IIRC we don't announce individual hardware drops, drivers record them in
>> > internal structures, and they are retrieved on demand via ethtool calls, so you
>> > will either need to include some polling (probably not a very performant idea),
>> > or some sort of flagging mechanism to indicate that on the next message sent to
>> > user space you should go retrieve hw stats from a given interface.  I certainly
>> > wouldn't mind seeing this happen, but its more work than just adding a new
>> > netlink message.
>> >
>> > Also, regarding XDP drops, we wont see them if the xdp program is offloaded to
>> > hardware (you'll need your hw drop gathering mechanism for that), but for xdp
>> > programs run on the cpu, dropwatch should alrady catch those.  I.e. if the xdp
>> > program returns a DROP result for a packet being processed, the OS will call
>> > kfree_skb on its behalf, and dropwatch wil call that.
>> 
>> There is no skb by the time an XDP program runs, so this is not true. As
>> I mentioned upthread, there's a tracepoint that will get called if an
>> error occurs (or the program returns XDP_ABORTED), but in most cases,
>> XDP_DROP just means that the packet silently disappears...
>> 
> As I noted, thats only true for xdp programs that are offloaded to hardware, I
> was only speaking for XDP programs that run on the cpu.  For the former case, we
> obviously need some other mechanism to detect drops, but for cpu executed xdp
> programs, the OS is responsible for freeing skbs associated with programs the
> return XDP_DROP.

Ah, I think maybe you're thinking of generic XDP (also referred to as
skb mode)? That is a separate mode; an XDP program loaded in "native
mode" (or "driver mode") runs on the CPU, but before the skb is created;
this is the common case for XDP, and there is no skb and thus no drop
notification in this mode.

There is *also* an offload mode for XDP programs, but that is only
supported by netronome cards thus far, so not as commonly used...

-Toke

^ permalink raw reply

* Re: [PATCH] MAINTAINERS: update BPF JIT S390 maintainers
From: Daniel Borkmann @ 2019-07-12 12:40 UTC (permalink / raw)
  To: David Miller, gor
  Cc: ast, heiko.carstens, borntraeger, iii, netdev, bpf, linux-s390
In-Reply-To: <20190711.113343.906691840255971211.davem@davemloft.net>

On 07/11/2019 08:33 PM, David Miller wrote:
> From: Vasily Gorbik <gor@linux.ibm.com>
> Date: Wed, 10 Jul 2019 13:34:54 +0200
> 
>> Dave, Alexei, Daniel,
>> would you take it via one of your trees? Or should I take it via s390?
> 
> I think it can go via the bpf tree.

Yep, just applied to bpf, thanks!

^ permalink raw reply

* Re: [PATCH v6 bpf-next 0/3] bpf: add bpf_descendant_of helper
From: Daniel Borkmann @ 2019-07-12 12:41 UTC (permalink / raw)
  To: Javier Honduvilla Coto, netdev; +Cc: yhs, kernel-team, jonhaslam
In-Reply-To: <20190710180025.94726-1-javierhonduco@fb.com>

On 07/10/2019 08:00 PM, Javier Honduvilla Coto wrote:
> Hi all,
> 
> This patch adds the bpf_descendant_of helper which accepts a PID and
> returns 1 if the PID of the process currently being executed is a
> descendant of it or if it's itself. Returns 0 otherwise. The passed
> PID should be the one as seen from the "global" pid namespace as the
> processes' PIDs in the hierarchy are resolved using the context of said
> initial namespace.
> 
> This is very useful in tracing programs when we want to filter by a
> given PID and all the children it might spawn. The current workarounds
> most people implement for this purpose have issues:
> 
> - Attaching to process spawning syscalls and dynamically add those PIDs
> to some bpf map that would be used to filter is cumbersome and
> potentially racy.
> - Unrolling some loop to perform what this helper is doing consumes lots
> of instructions. That and the impossibility to jump backwards makes it
> really hard to be correct in really large process chains.
> 
> 
> Let me know what do you think!
> 
> Thanks,
> 
> ---
> Changes in V6:
>         - Small style fix
>         - Clarify in the docs that we are resolving PIDs using the global,
> initial PID namespace, and the provided *pid* argument should be global, too
>         - Changed the way we assert on the helper return value
> 
> Changes in V5:
>         - Addressed code review feedback
>         - Renamed from progenyof => descendant_of as suggested by Jon Haslam
> and Brendan Gregg
> 
> Changes in V4:
>         - Rebased on latest bpf-next after merge window
> 
> Changes in V3:
>         - Removed RCU read (un)locking as BPF programs alredy run in RCU locked
>                 context
>         - progenyof(0) now returns 1, which, semantically makes more sense
>         - Added new test case for PID 0 and changed sentinel value for errors
>         - Rebase on latest bpf-next/master
>         - Used my work email as somehow I accidentally used my personal one in v2
> 
> Changes in V2:
>         - Adding missing docs in include/uapi/linux/bpf.h
> 

bpf-next is currently closed due to merge window, please resubmit once it reopens.

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH bpf-next v9 0/2] bpf: Allow bpf_skb_event_output for more prog types
From: Daniel Borkmann @ 2019-07-12 12:42 UTC (permalink / raw)
  To: Allan Zhang, Andrii Nakryiko
  Cc: Networking, bpf, Song Liu, Alexei Starovoitov
In-Reply-To: <CAMHgqJ71XDWSZDFOvcvu-sjDzrVp8+G8VdMx1fNUs5+xefhUSQ@mail.gmail.com>

On 07/10/2019 09:10 PM, Allan Zhang wrote:
> Sure, thanks. will do as suggested.

Yep, bpf-next is currently closed due to merge window, please resubmit
once it reopens.

Thanks,
Daniel

^ permalink raw reply

* Re: bonded active-backup ethernet-wifi drops packets
From: Brian J. Murrell @ 2019-07-12 12:51 UTC (permalink / raw)
  To: netdev
In-Reply-To: <6134.1562373984@famine>

[-- Attachment #1: Type: text/plain, Size: 1255 bytes --]

On Fri, 2019-07-05 at 17:46 -0700, Jay Vosburgh wrote:
> 
> 	I did set this up and test it, but haven't had time to analyze
> in depth.
> 
> 	What I saw was that ping (IPv4) flood worked fine, bonded or
> not, over a span of several hours.

Interesting.  In contrast to my experience.

> However, ping6 showed small numbers
> of drops on a ping6 flood when bonded, on the order of 200 drops out
> of
> 48,000,000 requests sent.

I wonder if that's indicative of what I'm seeing.  Strange that you
only see it on IPv6 though.  I'm seeing it on IPv4.

> Zero losses when no bond in the stack.

That's what I see for IPv4.

> Both
> tests to the same peer connected to the same switch.

Ditto.

> All of the above
> with the bond using the Ethernet slave.

Also ditto.  Wifi introduces latencies (at least) which mask the
underlying issue.

> I haven't tracked down where
> those losses are occurring, so I don't know if it's on the transmit
> or
> receive sides (or both).

Personally, I suspect it's on the receive.  I suspect the host I am
testing from sends the ICMP echo requests just fine.  It's just not
getting the ICMP echo responses back.

Any ideas on further avenues to debugging this?

Cheers,
b.


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox