Netdev List
 help / color / mirror / Atom feed
* [PATCH 4/5] dt-bindings: ptp: add ptp-qoriq.txt
From: Yangbo Lu @ 2018-05-25  4:40 UTC (permalink / raw)
  To: netdev, devicetree, linux-kernel, Richard Cochran, claudiu.manoil,
	Rob Herring
  Cc: Yangbo Lu
In-Reply-To: <20180525044038.37756-1-yangbo.lu@nxp.com>

This patch is to add a documentation for ptp_qoriq dt-bindings.
The description for ptp_qoriq dt-bindings was actually moved
from Documentation/devicetree/bindings/net/fsl-tsec-phy.txt,
since gianfar_ptp driver was moved to ptp_qoriq driver.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
 .../devicetree/bindings/net/fsl-tsec-phy.txt       |   68 +-------------------
 .../devicetree/bindings/ptp/ptp-qoriq.txt          |   69 ++++++++++++++++++++
 2 files changed, 70 insertions(+), 67 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/ptp/ptp-qoriq.txt

diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
index 79bf352..047bdf7 100644
--- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
@@ -86,70 +86,4 @@ Example:
 
 * Gianfar PTP clock nodes
 
-General Properties:
-
-  - compatible   Should be "fsl,etsec-ptp"
-  - reg          Offset and length of the register set for the device
-  - interrupts   There should be at least two interrupts. Some devices
-                 have as many as four PTP related interrupts.
-
-Clock Properties:
-
-  - fsl,cksel        Timer reference clock source.
-  - fsl,tclk-period  Timer reference clock period in nanoseconds.
-  - fsl,tmr-prsc     Prescaler, divides the output clock.
-  - fsl,tmr-add      Frequency compensation value.
-  - fsl,tmr-fiper1   Fixed interval period pulse generator.
-  - fsl,tmr-fiper2   Fixed interval period pulse generator.
-  - fsl,max-adj      Maximum frequency adjustment in parts per billion.
-
-  These properties set the operational parameters for the PTP
-  clock. You must choose these carefully for the clock to work right.
-  Here is how to figure good values:
-
-  TimerOsc     = selected reference clock   MHz
-  tclk_period  = desired clock period       nanoseconds
-  NominalFreq  = 1000 / tclk_period         MHz
-  FreqDivRatio = TimerOsc / NominalFreq     (must be greater that 1.0)
-  tmr_add      = ceil(2^32 / FreqDivRatio)
-  OutputClock  = NominalFreq / tmr_prsc     MHz
-  PulseWidth   = 1 / OutputClock            microseconds
-  FiperFreq1   = desired frequency in Hz
-  FiperDiv1    = 1000000 * OutputClock / FiperFreq1
-  tmr_fiper1   = tmr_prsc * tclk_period * FiperDiv1 - tclk_period
-  max_adj      = 1000000000 * (FreqDivRatio - 1.0) - 1
-
-  The calculation for tmr_fiper2 is the same as for tmr_fiper1. The
-  driver expects that tmr_fiper1 will be correctly set to produce a 1
-  Pulse Per Second (PPS) signal, since this will be offered to the PPS
-  subsystem to synchronize the Linux clock.
-
-  Reference clock source is determined by the value, which is holded
-  in CKSEL bits in TMR_CTRL register. "fsl,cksel" property keeps the
-  value, which will be directly written in those bits, that is why,
-  according to reference manual, the next clock sources can be used:
-
-  <0> - external high precision timer reference clock (TSEC_TMR_CLK
-        input is used for this purpose);
-  <1> - eTSEC system clock;
-  <2> - eTSEC1 transmit clock;
-  <3> - RTC clock input.
-
-  When this attribute is not used, eTSEC system clock will serve as
-  IEEE 1588 timer reference clock.
-
-Example:
-
-	ptp_clock@24e00 {
-		compatible = "fsl,etsec-ptp";
-		reg = <0x24E00 0xB0>;
-		interrupts = <12 0x8 13 0x8>;
-		interrupt-parent = < &ipic >;
-		fsl,cksel       = <1>;
-		fsl,tclk-period = <10>;
-		fsl,tmr-prsc    = <100>;
-		fsl,tmr-add     = <0x999999A4>;
-		fsl,tmr-fiper1  = <0x3B9AC9F6>;
-		fsl,tmr-fiper2  = <0x00018696>;
-		fsl,max-adj     = <659999998>;
-	};
+Refer to Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
new file mode 100644
index 0000000..0f569d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@ -0,0 +1,69 @@
+* Freescale QorIQ 1588 timer based PTP clock
+
+General Properties:
+
+  - compatible   Should be "fsl,etsec-ptp"
+  - reg          Offset and length of the register set for the device
+  - interrupts   There should be at least two interrupts. Some devices
+                 have as many as four PTP related interrupts.
+
+Clock Properties:
+
+  - fsl,cksel        Timer reference clock source.
+  - fsl,tclk-period  Timer reference clock period in nanoseconds.
+  - fsl,tmr-prsc     Prescaler, divides the output clock.
+  - fsl,tmr-add      Frequency compensation value.
+  - fsl,tmr-fiper1   Fixed interval period pulse generator.
+  - fsl,tmr-fiper2   Fixed interval period pulse generator.
+  - fsl,max-adj      Maximum frequency adjustment in parts per billion.
+
+  These properties set the operational parameters for the PTP
+  clock. You must choose these carefully for the clock to work right.
+  Here is how to figure good values:
+
+  TimerOsc     = selected reference clock   MHz
+  tclk_period  = desired clock period       nanoseconds
+  NominalFreq  = 1000 / tclk_period         MHz
+  FreqDivRatio = TimerOsc / NominalFreq     (must be greater that 1.0)
+  tmr_add      = ceil(2^32 / FreqDivRatio)
+  OutputClock  = NominalFreq / tmr_prsc     MHz
+  PulseWidth   = 1 / OutputClock            microseconds
+  FiperFreq1   = desired frequency in Hz
+  FiperDiv1    = 1000000 * OutputClock / FiperFreq1
+  tmr_fiper1   = tmr_prsc * tclk_period * FiperDiv1 - tclk_period
+  max_adj      = 1000000000 * (FreqDivRatio - 1.0) - 1
+
+  The calculation for tmr_fiper2 is the same as for tmr_fiper1. The
+  driver expects that tmr_fiper1 will be correctly set to produce a 1
+  Pulse Per Second (PPS) signal, since this will be offered to the PPS
+  subsystem to synchronize the Linux clock.
+
+  Reference clock source is determined by the value, which is holded
+  in CKSEL bits in TMR_CTRL register. "fsl,cksel" property keeps the
+  value, which will be directly written in those bits, that is why,
+  according to reference manual, the next clock sources can be used:
+
+  <0> - external high precision timer reference clock (TSEC_TMR_CLK
+        input is used for this purpose);
+  <1> - eTSEC system clock;
+  <2> - eTSEC1 transmit clock;
+  <3> - RTC clock input.
+
+  When this attribute is not used, eTSEC system clock will serve as
+  IEEE 1588 timer reference clock.
+
+Example:
+
+	ptp_clock@24e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0x24E00 0xB0>;
+		interrupts = <12 0x8 13 0x8>;
+		interrupt-parent = < &ipic >;
+		fsl,cksel       = <1>;
+		fsl,tclk-period = <10>;
+		fsl,tmr-prsc    = <100>;
+		fsl,tmr-add     = <0x999999A4>;
+		fsl,tmr-fiper1  = <0x3B9AC9F6>;
+		fsl,tmr-fiper2  = <0x00018696>;
+		fsl,max-adj     = <659999998>;
+	};
-- 
1.7.1

^ permalink raw reply related

* [PATCH 3/5] net: ethernet: gianfar_ethtool: get phc index through drvdata
From: Yangbo Lu @ 2018-05-25  4:40 UTC (permalink / raw)
  To: netdev, devicetree, linux-kernel, Richard Cochran, claudiu.manoil,
	Rob Herring
  Cc: Yangbo Lu
In-Reply-To: <20180525044038.37756-1-yangbo.lu@nxp.com>

Global variable gfar_phc_index was used to get and store
phc index through gianfar_ptp driver. However gianfar_ptp
had been renamed as ptp_qoriq for QorIQ common PTP driver.
This gfar_phc_index doesn't work any more, and the phc index
is stored in drvdata now. This patch is to support getting
phc index through ptp_qoriq drvdata.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
 drivers/net/ethernet/freescale/gianfar.h         |    3 --
 drivers/net/ethernet/freescale/gianfar_ethtool.c |   23 +++++++++++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 5aa8147..8e42c02 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1372,7 +1372,4 @@ struct filer_table {
 	struct gfar_filer_entry fe[MAX_FILER_CACHE_IDX + 20];
 };
 
-/* The gianfar_ptp module will set this variable */
-extern int gfar_phc_index;
-
 #endif /* __GIANFAR_H */
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index a93e019..8cb98ca 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -41,6 +41,8 @@
 #include <linux/phy.h>
 #include <linux/sort.h>
 #include <linux/if_vlan.h>
+#include <linux/of_platform.h>
+#include <linux/fsl/ptp_qoriq.h>
 
 #include "gianfar.h"
 
@@ -1509,24 +1511,35 @@ static int gfar_get_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 	return ret;
 }
 
-int gfar_phc_index = -1;
-EXPORT_SYMBOL(gfar_phc_index);
-
 static int gfar_get_ts_info(struct net_device *dev,
 			    struct ethtool_ts_info *info)
 {
 	struct gfar_private *priv = netdev_priv(dev);
+	struct platform_device *ptp_dev;
+	struct device_node *ptp_node;
+	struct qoriq_ptp *ptp = NULL;
+
+	info->phc_index = -1;
 
 	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)) {
 		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
 					SOF_TIMESTAMPING_SOFTWARE;
-		info->phc_index = -1;
 		return 0;
 	}
+
+	ptp_node = of_find_compatible_node(NULL, NULL, "fsl,etsec-ptp");
+	if (ptp_node) {
+		ptp_dev = of_find_device_by_node(ptp_node);
+		if (ptp_dev)
+			ptp = platform_get_drvdata(ptp_dev);
+	}
+
+	if (ptp)
+		info->phc_index = ptp->phc_index;
+
 	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
-	info->phc_index = gfar_phc_index;
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
 			 (1 << HWTSTAMP_TX_ON);
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
-- 
1.7.1

^ permalink raw reply related

* [PATCH 2/5] ptp_qoriq: move some definitions to header file
From: Yangbo Lu @ 2018-05-25  4:40 UTC (permalink / raw)
  To: netdev, devicetree, linux-kernel, Richard Cochran, claudiu.manoil,
	Rob Herring
  Cc: Yangbo Lu
In-Reply-To: <20180525044038.37756-1-yangbo.lu@nxp.com>

This patch is to move some definitions in ptp_qoriq.c
to the header file.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
 drivers/ptp/ptp_qoriq.c       |  132 +--------------------------------------
 include/linux/fsl/ptp_qoriq.h |  141 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+), 131 deletions(-)
 create mode 100644 include/linux/fsl/ptp_qoriq.h

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 5110cce..1468a16 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -28,139 +28,9 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/timex.h>
-#include <linux/io.h>
 #include <linux/slab.h>
 
-#include <linux/ptp_clock_kernel.h>
-
-/*
- * qoriq ptp registers
- * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
- */
-struct qoriq_ptp_registers {
-	u32 tmr_ctrl;     /* Timer control register */
-	u32 tmr_tevent;   /* Timestamp event register */
-	u32 tmr_temask;   /* Timer event mask register */
-	u32 tmr_pevent;   /* Timestamp event register */
-	u32 tmr_pemask;   /* Timer event mask register */
-	u32 tmr_stat;     /* Timestamp status register */
-	u32 tmr_cnt_h;    /* Timer counter high register */
-	u32 tmr_cnt_l;    /* Timer counter low register */
-	u32 tmr_add;      /* Timer drift compensation addend register */
-	u32 tmr_acc;      /* Timer accumulator register */
-	u32 tmr_prsc;     /* Timer prescale */
-	u8  res1[4];
-	u32 tmroff_h;     /* Timer offset high */
-	u32 tmroff_l;     /* Timer offset low */
-	u8  res2[8];
-	u32 tmr_alarm1_h; /* Timer alarm 1 high register */
-	u32 tmr_alarm1_l; /* Timer alarm 1 high register */
-	u32 tmr_alarm2_h; /* Timer alarm 2 high register */
-	u32 tmr_alarm2_l; /* Timer alarm 2 high register */
-	u8  res3[48];
-	u32 tmr_fiper1;   /* Timer fixed period interval */
-	u32 tmr_fiper2;   /* Timer fixed period interval */
-	u32 tmr_fiper3;   /* Timer fixed period interval */
-	u8  res4[20];
-	u32 tmr_etts1_h;  /* Timestamp of general purpose external trigger */
-	u32 tmr_etts1_l;  /* Timestamp of general purpose external trigger */
-	u32 tmr_etts2_h;  /* Timestamp of general purpose external trigger */
-	u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
-};
-
-/* Bit definitions for the TMR_CTRL register */
-#define ALM1P                 (1<<31) /* Alarm1 output polarity */
-#define ALM2P                 (1<<30) /* Alarm2 output polarity */
-#define FIPERST               (1<<28) /* FIPER start indication */
-#define PP1L                  (1<<27) /* Fiper1 pulse loopback mode enabled. */
-#define PP2L                  (1<<26) /* Fiper2 pulse loopback mode enabled. */
-#define TCLK_PERIOD_SHIFT     (16) /* 1588 timer reference clock period. */
-#define TCLK_PERIOD_MASK      (0x3ff)
-#define RTPE                  (1<<15) /* Record Tx Timestamp to PAL Enable. */
-#define FRD                   (1<<14) /* FIPER Realignment Disable */
-#define ESFDP                 (1<<11) /* External Tx/Rx SFD Polarity. */
-#define ESFDE                 (1<<10) /* External Tx/Rx SFD Enable. */
-#define ETEP2                 (1<<9) /* External trigger 2 edge polarity */
-#define ETEP1                 (1<<8) /* External trigger 1 edge polarity */
-#define COPH                  (1<<7) /* Generated clock output phase. */
-#define CIPH                  (1<<6) /* External oscillator input clock phase */
-#define TMSR                  (1<<5) /* Timer soft reset. */
-#define BYP                   (1<<3) /* Bypass drift compensated clock */
-#define TE                    (1<<2) /* 1588 timer enable. */
-#define CKSEL_SHIFT           (0)    /* 1588 Timer reference clock source */
-#define CKSEL_MASK            (0x3)
-
-/* Bit definitions for the TMR_TEVENT register */
-#define ETS2                  (1<<25) /* External trigger 2 timestamp sampled */
-#define ETS1                  (1<<24) /* External trigger 1 timestamp sampled */
-#define ALM2                  (1<<17) /* Current time = alarm time register 2 */
-#define ALM1                  (1<<16) /* Current time = alarm time register 1 */
-#define PP1                   (1<<7)  /* periodic pulse generated on FIPER1 */
-#define PP2                   (1<<6)  /* periodic pulse generated on FIPER2 */
-#define PP3                   (1<<5)  /* periodic pulse generated on FIPER3 */
-
-/* Bit definitions for the TMR_TEMASK register */
-#define ETS2EN                (1<<25) /* External trigger 2 timestamp enable */
-#define ETS1EN                (1<<24) /* External trigger 1 timestamp enable */
-#define ALM2EN                (1<<17) /* Timer ALM2 event enable */
-#define ALM1EN                (1<<16) /* Timer ALM1 event enable */
-#define PP1EN                 (1<<7) /* Periodic pulse event 1 enable */
-#define PP2EN                 (1<<6) /* Periodic pulse event 2 enable */
-
-/* Bit definitions for the TMR_PEVENT register */
-#define TXP2                  (1<<9) /* PTP transmitted timestamp im TXTS2 */
-#define TXP1                  (1<<8) /* PTP transmitted timestamp in TXTS1 */
-#define RXP                   (1<<0) /* PTP frame has been received */
-
-/* Bit definitions for the TMR_PEMASK register */
-#define TXP2EN                (1<<9) /* Transmit PTP packet event 2 enable */
-#define TXP1EN                (1<<8) /* Transmit PTP packet event 1 enable */
-#define RXPEN                 (1<<0) /* Receive PTP packet event enable */
-
-/* Bit definitions for the TMR_STAT register */
-#define STAT_VEC_SHIFT        (0) /* Timer general purpose status vector */
-#define STAT_VEC_MASK         (0x3f)
-
-/* Bit definitions for the TMR_PRSC register */
-#define PRSC_OCK_SHIFT        (0) /* Output clock division/prescale factor. */
-#define PRSC_OCK_MASK         (0xffff)
-
-
-#define DRIVER		"ptp_qoriq"
-#define DEFAULT_CKSEL	1
-#define N_EXT_TS	2
-#define REG_SIZE	sizeof(struct qoriq_ptp_registers)
-
-struct qoriq_ptp {
-	struct qoriq_ptp_registers __iomem *regs;
-	spinlock_t lock; /* protects regs */
-	struct ptp_clock *clock;
-	struct ptp_clock_info caps;
-	struct resource *rsrc;
-	int irq;
-	int phc_index;
-	u64 alarm_interval; /* for periodic alarm */
-	u64 alarm_value;
-	u32 tclk_period;  /* nanoseconds */
-	u32 tmr_prsc;
-	u32 tmr_add;
-	u32 cksel;
-	u32 tmr_fiper1;
-	u32 tmr_fiper2;
-};
-
-static inline u32 qoriq_read(unsigned __iomem *addr)
-{
-	u32 val;
-
-	val = ioread32be(addr);
-	return val;
-}
-
-static inline void qoriq_write(unsigned __iomem *addr, u32 val)
-{
-	iowrite32be(val, addr);
-}
+#include <linux/fsl/ptp_qoriq.h>
 
 /*
  * Register access functions
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
new file mode 100644
index 0000000..b462d9e
--- /dev/null
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ * Copyright 2018 NXP
+ */
+#ifndef __PTP_QORIQ_H__
+#define __PTP_QORIQ_H__
+
+#include <linux/io.h>
+#include <linux/ptp_clock_kernel.h>
+
+/*
+ * qoriq ptp registers
+ * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
+ */
+struct qoriq_ptp_registers {
+	u32 tmr_ctrl;     /* Timer control register */
+	u32 tmr_tevent;   /* Timestamp event register */
+	u32 tmr_temask;   /* Timer event mask register */
+	u32 tmr_pevent;   /* Timestamp event register */
+	u32 tmr_pemask;   /* Timer event mask register */
+	u32 tmr_stat;     /* Timestamp status register */
+	u32 tmr_cnt_h;    /* Timer counter high register */
+	u32 tmr_cnt_l;    /* Timer counter low register */
+	u32 tmr_add;      /* Timer drift compensation addend register */
+	u32 tmr_acc;      /* Timer accumulator register */
+	u32 tmr_prsc;     /* Timer prescale */
+	u8  res1[4];
+	u32 tmroff_h;     /* Timer offset high */
+	u32 tmroff_l;     /* Timer offset low */
+	u8  res2[8];
+	u32 tmr_alarm1_h; /* Timer alarm 1 high register */
+	u32 tmr_alarm1_l; /* Timer alarm 1 high register */
+	u32 tmr_alarm2_h; /* Timer alarm 2 high register */
+	u32 tmr_alarm2_l; /* Timer alarm 2 high register */
+	u8  res3[48];
+	u32 tmr_fiper1;   /* Timer fixed period interval */
+	u32 tmr_fiper2;   /* Timer fixed period interval */
+	u32 tmr_fiper3;   /* Timer fixed period interval */
+	u8  res4[20];
+	u32 tmr_etts1_h;  /* Timestamp of general purpose external trigger */
+	u32 tmr_etts1_l;  /* Timestamp of general purpose external trigger */
+	u32 tmr_etts2_h;  /* Timestamp of general purpose external trigger */
+	u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
+};
+
+/* Bit definitions for the TMR_CTRL register */
+#define ALM1P                 (1<<31) /* Alarm1 output polarity */
+#define ALM2P                 (1<<30) /* Alarm2 output polarity */
+#define FIPERST               (1<<28) /* FIPER start indication */
+#define PP1L                  (1<<27) /* Fiper1 pulse loopback mode enabled. */
+#define PP2L                  (1<<26) /* Fiper2 pulse loopback mode enabled. */
+#define TCLK_PERIOD_SHIFT     (16) /* 1588 timer reference clock period. */
+#define TCLK_PERIOD_MASK      (0x3ff)
+#define RTPE                  (1<<15) /* Record Tx Timestamp to PAL Enable. */
+#define FRD                   (1<<14) /* FIPER Realignment Disable */
+#define ESFDP                 (1<<11) /* External Tx/Rx SFD Polarity. */
+#define ESFDE                 (1<<10) /* External Tx/Rx SFD Enable. */
+#define ETEP2                 (1<<9) /* External trigger 2 edge polarity */
+#define ETEP1                 (1<<8) /* External trigger 1 edge polarity */
+#define COPH                  (1<<7) /* Generated clock output phase. */
+#define CIPH                  (1<<6) /* External oscillator input clock phase */
+#define TMSR                  (1<<5) /* Timer soft reset. */
+#define BYP                   (1<<3) /* Bypass drift compensated clock */
+#define TE                    (1<<2) /* 1588 timer enable. */
+#define CKSEL_SHIFT           (0)    /* 1588 Timer reference clock source */
+#define CKSEL_MASK            (0x3)
+
+/* Bit definitions for the TMR_TEVENT register */
+#define ETS2                  (1<<25) /* External trigger 2 timestamp sampled */
+#define ETS1                  (1<<24) /* External trigger 1 timestamp sampled */
+#define ALM2                  (1<<17) /* Current time = alarm time register 2 */
+#define ALM1                  (1<<16) /* Current time = alarm time register 1 */
+#define PP1                   (1<<7)  /* periodic pulse generated on FIPER1 */
+#define PP2                   (1<<6)  /* periodic pulse generated on FIPER2 */
+#define PP3                   (1<<5)  /* periodic pulse generated on FIPER3 */
+
+/* Bit definitions for the TMR_TEMASK register */
+#define ETS2EN                (1<<25) /* External trigger 2 timestamp enable */
+#define ETS1EN                (1<<24) /* External trigger 1 timestamp enable */
+#define ALM2EN                (1<<17) /* Timer ALM2 event enable */
+#define ALM1EN                (1<<16) /* Timer ALM1 event enable */
+#define PP1EN                 (1<<7) /* Periodic pulse event 1 enable */
+#define PP2EN                 (1<<6) /* Periodic pulse event 2 enable */
+
+/* Bit definitions for the TMR_PEVENT register */
+#define TXP2                  (1<<9) /* PTP transmitted timestamp im TXTS2 */
+#define TXP1                  (1<<8) /* PTP transmitted timestamp in TXTS1 */
+#define RXP                   (1<<0) /* PTP frame has been received */
+
+/* Bit definitions for the TMR_PEMASK register */
+#define TXP2EN                (1<<9) /* Transmit PTP packet event 2 enable */
+#define TXP1EN                (1<<8) /* Transmit PTP packet event 1 enable */
+#define RXPEN                 (1<<0) /* Receive PTP packet event enable */
+
+/* Bit definitions for the TMR_STAT register */
+#define STAT_VEC_SHIFT        (0) /* Timer general purpose status vector */
+#define STAT_VEC_MASK         (0x3f)
+
+/* Bit definitions for the TMR_PRSC register */
+#define PRSC_OCK_SHIFT        (0) /* Output clock division/prescale factor. */
+#define PRSC_OCK_MASK         (0xffff)
+
+
+#define DRIVER		"ptp_qoriq"
+#define DEFAULT_CKSEL	1
+#define N_EXT_TS	2
+#define REG_SIZE	sizeof(struct qoriq_ptp_registers)
+
+struct qoriq_ptp {
+	struct qoriq_ptp_registers __iomem *regs;
+	spinlock_t lock; /* protects regs */
+	struct ptp_clock *clock;
+	struct ptp_clock_info caps;
+	struct resource *rsrc;
+	int irq;
+	int phc_index;
+	u64 alarm_interval; /* for periodic alarm */
+	u64 alarm_value;
+	u32 tclk_period;  /* nanoseconds */
+	u32 tmr_prsc;
+	u32 tmr_add;
+	u32 cksel;
+	u32 tmr_fiper1;
+	u32 tmr_fiper2;
+};
+
+static inline u32 qoriq_read(unsigned __iomem *addr)
+{
+	u32 val;
+
+	val = ioread32be(addr);
+	return val;
+}
+
+static inline void qoriq_write(unsigned __iomem *addr, u32 val)
+{
+	iowrite32be(val, addr);
+}
+
+#endif
-- 
1.7.1

^ permalink raw reply related

* [PATCH 1/5] ptp: rework gianfar_ptp as QorIQ common PTP driver
From: Yangbo Lu @ 2018-05-25  4:40 UTC (permalink / raw)
  To: netdev, devicetree, linux-kernel, Richard Cochran, claudiu.manoil,
	Rob Herring
  Cc: Yangbo Lu

gianfar_ptp was the PTP clock driver for 1588 timer
module of Freescale QorIQ eTSEC (Enhanced Three-Speed
Ethernet Controllers) platforms. Actually QorIQ DPAA
(Data Path Acceleration Architecture) platforms is
also using the same 1588 timer module in hardware.

This patch is to rework gianfar_ptp as QorIQ common
PTP driver to support both DPAA and eTSEC. Moved
gianfar_ptp.c to drivers/ptp/, renamed it as
ptp_qoriq.c, and renamed many variables. There were
not any function changes.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
 drivers/net/ethernet/freescale/Makefile            |    1 -
 drivers/ptp/Kconfig                                |   14 +-
 drivers/ptp/Makefile                               |    1 +
 .../freescale/gianfar_ptp.c => ptp/ptp_qoriq.c}    |  320 ++++++++++----------
 4 files changed, 174 insertions(+), 162 deletions(-)
 rename drivers/{net/ethernet/freescale/gianfar_ptp.c => ptp/ptp_qoriq.c} (58%)

diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index ed8ad0f..0914a3e 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_FS_ENET) += fs_enet/
 obj-$(CONFIG_FSL_PQ_MDIO) += fsl_pq_mdio.o
 obj-$(CONFIG_FSL_XGMAC_MDIO) += xgmac_mdio.o
 obj-$(CONFIG_GIANFAR) += gianfar_driver.o
-obj-$(CONFIG_PTP_1588_CLOCK_GIANFAR) += gianfar_ptp.o
 gianfar_driver-objs := gianfar.o \
 		gianfar_ethtool.o
 obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index a21ad10..474c988 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -41,19 +41,19 @@ config PTP_1588_CLOCK_DTE
 	  To compile this driver as a module, choose M here: the module
 	  will be called ptp_dte.
 
-config PTP_1588_CLOCK_GIANFAR
-	tristate "Freescale eTSEC as PTP clock"
+config PTP_1588_CLOCK_QORIQ
+	tristate "Freescale QorIQ 1588 timer as PTP clock"
 	depends on GIANFAR
 	depends on PTP_1588_CLOCK
 	default y
 	help
-	  This driver adds support for using the eTSEC as a PTP
-	  clock. This clock is only useful if your PTP programs are
-	  getting hardware time stamps on the PTP Ethernet packets
-	  using the SO_TIMESTAMPING API.
+	  This driver adds support for using the Freescale QorIQ 1588
+	  timer as a PTP clock. This clock is only useful if your PTP
+	  programs are getting hardware time stamps on the PTP Ethernet
+	  packets using the SO_TIMESTAMPING API.
 
 	  To compile this driver as a module, choose M here: the module
-	  will be called gianfar_ptp.
+	  will be called ptp_qoriq.
 
 config PTP_1588_CLOCK_IXP46X
 	tristate "Intel IXP46x as PTP clock"
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
index fd28207..19efa9c 100644
--- a/drivers/ptp/Makefile
+++ b/drivers/ptp/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_DTE)	+= ptp_dte.o
 obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)	+= ptp_ixp46x.o
 obj-$(CONFIG_PTP_1588_CLOCK_PCH)	+= ptp_pch.o
 obj-$(CONFIG_PTP_1588_CLOCK_KVM)	+= ptp_kvm.o
+obj-$(CONFIG_PTP_1588_CLOCK_QORIQ)	+= ptp_qoriq.o
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/ptp/ptp_qoriq.c
similarity index 58%
rename from drivers/net/ethernet/freescale/gianfar_ptp.c
rename to drivers/ptp/ptp_qoriq.c
index 9f8d4f8..5110cce 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -1,5 +1,5 @@
 /*
- * PTP 1588 clock using the eTSEC
+ * PTP 1588 clock for Freescale QorIQ 1588 timer
  *
  * Copyright (C) 2010 OMICRON electronics GmbH
  *
@@ -29,16 +29,15 @@
 #include <linux/of_platform.h>
 #include <linux/timex.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 
 #include <linux/ptp_clock_kernel.h>
 
-#include "gianfar.h"
-
 /*
- * gianfar ptp registers
+ * qoriq ptp registers
  * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
  */
-struct gianfar_ptp_registers {
+struct qoriq_ptp_registers {
 	u32 tmr_ctrl;     /* Timer control register */
 	u32 tmr_tevent;   /* Timestamp event register */
 	u32 tmr_temask;   /* Timer event mask register */
@@ -127,18 +126,19 @@ struct gianfar_ptp_registers {
 #define PRSC_OCK_MASK         (0xffff)
 
 
-#define DRIVER		"gianfar_ptp"
+#define DRIVER		"ptp_qoriq"
 #define DEFAULT_CKSEL	1
 #define N_EXT_TS	2
-#define REG_SIZE	sizeof(struct gianfar_ptp_registers)
+#define REG_SIZE	sizeof(struct qoriq_ptp_registers)
 
-struct etsects {
-	struct gianfar_ptp_registers __iomem *regs;
+struct qoriq_ptp {
+	struct qoriq_ptp_registers __iomem *regs;
 	spinlock_t lock; /* protects regs */
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
 	struct resource *rsrc;
 	int irq;
+	int phc_index;
 	u64 alarm_interval; /* for periodic alarm */
 	u64 alarm_value;
 	u32 tclk_period;  /* nanoseconds */
@@ -149,54 +149,67 @@ struct etsects {
 	u32 tmr_fiper2;
 };
 
+static inline u32 qoriq_read(unsigned __iomem *addr)
+{
+	u32 val;
+
+	val = ioread32be(addr);
+	return val;
+}
+
+static inline void qoriq_write(unsigned __iomem *addr, u32 val)
+{
+	iowrite32be(val, addr);
+}
+
 /*
  * Register access functions
  */
 
-/* Caller must hold etsects->lock. */
-static u64 tmr_cnt_read(struct etsects *etsects)
+/* Caller must hold qoriq_ptp->lock. */
+static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 {
 	u64 ns;
 	u32 lo, hi;
 
-	lo = gfar_read(&etsects->regs->tmr_cnt_l);
-	hi = gfar_read(&etsects->regs->tmr_cnt_h);
+	lo = qoriq_read(&qoriq_ptp->regs->tmr_cnt_l);
+	hi = qoriq_read(&qoriq_ptp->regs->tmr_cnt_h);
 	ns = ((u64) hi) << 32;
 	ns |= lo;
 	return ns;
 }
 
-/* Caller must hold etsects->lock. */
-static void tmr_cnt_write(struct etsects *etsects, u64 ns)
+/* Caller must hold qoriq_ptp->lock. */
+static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
 {
 	u32 hi = ns >> 32;
 	u32 lo = ns & 0xffffffff;
 
-	gfar_write(&etsects->regs->tmr_cnt_l, lo);
-	gfar_write(&etsects->regs->tmr_cnt_h, hi);
+	qoriq_write(&qoriq_ptp->regs->tmr_cnt_l, lo);
+	qoriq_write(&qoriq_ptp->regs->tmr_cnt_h, hi);
 }
 
-/* Caller must hold etsects->lock. */
-static void set_alarm(struct etsects *etsects)
+/* Caller must hold qoriq_ptp->lock. */
+static void set_alarm(struct qoriq_ptp *qoriq_ptp)
 {
 	u64 ns;
 	u32 lo, hi;
 
-	ns = tmr_cnt_read(etsects) + 1500000000ULL;
+	ns = tmr_cnt_read(qoriq_ptp) + 1500000000ULL;
 	ns = div_u64(ns, 1000000000UL) * 1000000000ULL;
-	ns -= etsects->tclk_period;
+	ns -= qoriq_ptp->tclk_period;
 	hi = ns >> 32;
 	lo = ns & 0xffffffff;
-	gfar_write(&etsects->regs->tmr_alarm1_l, lo);
-	gfar_write(&etsects->regs->tmr_alarm1_h, hi);
+	qoriq_write(&qoriq_ptp->regs->tmr_alarm1_l, lo);
+	qoriq_write(&qoriq_ptp->regs->tmr_alarm1_h, hi);
 }
 
-/* Caller must hold etsects->lock. */
-static void set_fipers(struct etsects *etsects)
+/* Caller must hold qoriq_ptp->lock. */
+static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 {
-	set_alarm(etsects);
-	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
-	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
+	set_alarm(qoriq_ptp);
+	qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+	qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 }
 
 /*
@@ -205,72 +218,72 @@ static void set_fipers(struct etsects *etsects)
 
 static irqreturn_t isr(int irq, void *priv)
 {
-	struct etsects *etsects = priv;
+	struct qoriq_ptp *qoriq_ptp = priv;
 	struct ptp_clock_event event;
 	u64 ns;
 	u32 ack = 0, lo, hi, mask, val;
 
-	val = gfar_read(&etsects->regs->tmr_tevent);
+	val = qoriq_read(&qoriq_ptp->regs->tmr_tevent);
 
 	if (val & ETS1) {
 		ack |= ETS1;
-		hi = gfar_read(&etsects->regs->tmr_etts1_h);
-		lo = gfar_read(&etsects->regs->tmr_etts1_l);
+		hi = qoriq_read(&qoriq_ptp->regs->tmr_etts1_h);
+		lo = qoriq_read(&qoriq_ptp->regs->tmr_etts1_l);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 0;
 		event.timestamp = ((u64) hi) << 32;
 		event.timestamp |= lo;
-		ptp_clock_event(etsects->clock, &event);
+		ptp_clock_event(qoriq_ptp->clock, &event);
 	}
 
 	if (val & ETS2) {
 		ack |= ETS2;
-		hi = gfar_read(&etsects->regs->tmr_etts2_h);
-		lo = gfar_read(&etsects->regs->tmr_etts2_l);
+		hi = qoriq_read(&qoriq_ptp->regs->tmr_etts2_h);
+		lo = qoriq_read(&qoriq_ptp->regs->tmr_etts2_l);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 1;
 		event.timestamp = ((u64) hi) << 32;
 		event.timestamp |= lo;
-		ptp_clock_event(etsects->clock, &event);
+		ptp_clock_event(qoriq_ptp->clock, &event);
 	}
 
 	if (val & ALM2) {
 		ack |= ALM2;
-		if (etsects->alarm_value) {
+		if (qoriq_ptp->alarm_value) {
 			event.type = PTP_CLOCK_ALARM;
 			event.index = 0;
-			event.timestamp = etsects->alarm_value;
-			ptp_clock_event(etsects->clock, &event);
+			event.timestamp = qoriq_ptp->alarm_value;
+			ptp_clock_event(qoriq_ptp->clock, &event);
 		}
-		if (etsects->alarm_interval) {
-			ns = etsects->alarm_value + etsects->alarm_interval;
+		if (qoriq_ptp->alarm_interval) {
+			ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval;
 			hi = ns >> 32;
 			lo = ns & 0xffffffff;
-			spin_lock(&etsects->lock);
-			gfar_write(&etsects->regs->tmr_alarm2_l, lo);
-			gfar_write(&etsects->regs->tmr_alarm2_h, hi);
-			spin_unlock(&etsects->lock);
-			etsects->alarm_value = ns;
+			spin_lock(&qoriq_ptp->lock);
+			qoriq_write(&qoriq_ptp->regs->tmr_alarm2_l, lo);
+			qoriq_write(&qoriq_ptp->regs->tmr_alarm2_h, hi);
+			spin_unlock(&qoriq_ptp->lock);
+			qoriq_ptp->alarm_value = ns;
 		} else {
-			gfar_write(&etsects->regs->tmr_tevent, ALM2);
-			spin_lock(&etsects->lock);
-			mask = gfar_read(&etsects->regs->tmr_temask);
+			qoriq_write(&qoriq_ptp->regs->tmr_tevent, ALM2);
+			spin_lock(&qoriq_ptp->lock);
+			mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
 			mask &= ~ALM2EN;
-			gfar_write(&etsects->regs->tmr_temask, mask);
-			spin_unlock(&etsects->lock);
-			etsects->alarm_value = 0;
-			etsects->alarm_interval = 0;
+			qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+			spin_unlock(&qoriq_ptp->lock);
+			qoriq_ptp->alarm_value = 0;
+			qoriq_ptp->alarm_interval = 0;
 		}
 	}
 
 	if (val & PP1) {
 		ack |= PP1;
 		event.type = PTP_CLOCK_PPS;
-		ptp_clock_event(etsects->clock, &event);
+		ptp_clock_event(qoriq_ptp->clock, &event);
 	}
 
 	if (ack) {
-		gfar_write(&etsects->regs->tmr_tevent, ack);
+		qoriq_write(&qoriq_ptp->regs->tmr_tevent, ack);
 		return IRQ_HANDLED;
 	} else
 		return IRQ_NONE;
@@ -280,18 +293,18 @@ static irqreturn_t isr(int irq, void *priv)
  * PTP clock operations
  */
 
-static int ptp_gianfar_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
 	u64 adj, diff;
 	u32 tmr_add;
 	int neg_adj = 0;
-	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
 
 	if (scaled_ppm < 0) {
 		neg_adj = 1;
 		scaled_ppm = -scaled_ppm;
 	}
-	tmr_add = etsects->tmr_add;
+	tmr_add = qoriq_ptp->tmr_add;
 	adj = tmr_add;
 
 	/* calculate diff as adj*(scaled_ppm/65536)/1000000
@@ -303,70 +316,70 @@ static int ptp_gianfar_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
 	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
-	gfar_write(&etsects->regs->tmr_add, tmr_add);
+	qoriq_write(&qoriq_ptp->regs->tmr_add, tmr_add);
 
 	return 0;
 }
 
-static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta)
+static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	s64 now;
 	unsigned long flags;
-	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
 
-	spin_lock_irqsave(&etsects->lock, flags);
+	spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-	now = tmr_cnt_read(etsects);
+	now = tmr_cnt_read(qoriq_ptp);
 	now += delta;
-	tmr_cnt_write(etsects, now);
-	set_fipers(etsects);
+	tmr_cnt_write(qoriq_ptp, now);
+	set_fipers(qoriq_ptp);
 
-	spin_unlock_irqrestore(&etsects->lock, flags);
+	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
 	return 0;
 }
 
-static int ptp_gianfar_gettime(struct ptp_clock_info *ptp,
+static int ptp_qoriq_gettime(struct ptp_clock_info *ptp,
 			       struct timespec64 *ts)
 {
 	u64 ns;
 	unsigned long flags;
-	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
 
-	spin_lock_irqsave(&etsects->lock, flags);
+	spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-	ns = tmr_cnt_read(etsects);
+	ns = tmr_cnt_read(qoriq_ptp);
 
-	spin_unlock_irqrestore(&etsects->lock, flags);
+	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
 	return 0;
 }
 
-static int ptp_gianfar_settime(struct ptp_clock_info *ptp,
+static int ptp_qoriq_settime(struct ptp_clock_info *ptp,
 			       const struct timespec64 *ts)
 {
 	u64 ns;
 	unsigned long flags;
-	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
 
 	ns = timespec64_to_ns(ts);
 
-	spin_lock_irqsave(&etsects->lock, flags);
+	spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-	tmr_cnt_write(etsects, ns);
-	set_fipers(etsects);
+	tmr_cnt_write(qoriq_ptp, ns);
+	set_fipers(qoriq_ptp);
 
-	spin_unlock_irqrestore(&etsects->lock, flags);
+	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
 	return 0;
 }
 
-static int ptp_gianfar_enable(struct ptp_clock_info *ptp,
+static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 			      struct ptp_clock_request *rq, int on)
 {
-	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
 	unsigned long flags;
 	u32 bit, mask;
 
@@ -382,25 +395,25 @@ static int ptp_gianfar_enable(struct ptp_clock_info *ptp,
 		default:
 			return -EINVAL;
 		}
-		spin_lock_irqsave(&etsects->lock, flags);
-		mask = gfar_read(&etsects->regs->tmr_temask);
+		spin_lock_irqsave(&qoriq_ptp->lock, flags);
+		mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
 		if (on)
 			mask |= bit;
 		else
 			mask &= ~bit;
-		gfar_write(&etsects->regs->tmr_temask, mask);
-		spin_unlock_irqrestore(&etsects->lock, flags);
+		qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+		spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 		return 0;
 
 	case PTP_CLK_REQ_PPS:
-		spin_lock_irqsave(&etsects->lock, flags);
-		mask = gfar_read(&etsects->regs->tmr_temask);
+		spin_lock_irqsave(&qoriq_ptp->lock, flags);
+		mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
 		if (on)
 			mask |= PP1EN;
 		else
 			mask &= ~PP1EN;
-		gfar_write(&etsects->regs->tmr_temask, mask);
-		spin_unlock_irqrestore(&etsects->lock, flags);
+		qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+		spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 		return 0;
 
 	default:
@@ -410,142 +423,141 @@ static int ptp_gianfar_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static const struct ptp_clock_info ptp_gianfar_caps = {
+static const struct ptp_clock_info ptp_qoriq_caps = {
 	.owner		= THIS_MODULE,
-	.name		= "gianfar clock",
+	.name		= "qoriq ptp clock",
 	.max_adj	= 512000,
 	.n_alarm	= 0,
 	.n_ext_ts	= N_EXT_TS,
 	.n_per_out	= 0,
 	.n_pins		= 0,
 	.pps		= 1,
-	.adjfine	= ptp_gianfar_adjfine,
-	.adjtime	= ptp_gianfar_adjtime,
-	.gettime64	= ptp_gianfar_gettime,
-	.settime64	= ptp_gianfar_settime,
-	.enable		= ptp_gianfar_enable,
+	.adjfine	= ptp_qoriq_adjfine,
+	.adjtime	= ptp_qoriq_adjtime,
+	.gettime64	= ptp_qoriq_gettime,
+	.settime64	= ptp_qoriq_settime,
+	.enable		= ptp_qoriq_enable,
 };
 
-static int gianfar_ptp_probe(struct platform_device *dev)
+static int qoriq_ptp_probe(struct platform_device *dev)
 {
 	struct device_node *node = dev->dev.of_node;
-	struct etsects *etsects;
+	struct qoriq_ptp *qoriq_ptp;
 	struct timespec64 now;
 	int err = -ENOMEM;
 	u32 tmr_ctrl;
 	unsigned long flags;
 
-	etsects = kzalloc(sizeof(*etsects), GFP_KERNEL);
-	if (!etsects)
+	qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL);
+	if (!qoriq_ptp)
 		goto no_memory;
 
 	err = -ENODEV;
 
-	etsects->caps = ptp_gianfar_caps;
+	qoriq_ptp->caps = ptp_qoriq_caps;
 
-	if (of_property_read_u32(node, "fsl,cksel", &etsects->cksel))
-		etsects->cksel = DEFAULT_CKSEL;
+	if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel))
+		qoriq_ptp->cksel = DEFAULT_CKSEL;
 
 	if (of_property_read_u32(node,
-				 "fsl,tclk-period", &etsects->tclk_period) ||
+				 "fsl,tclk-period", &qoriq_ptp->tclk_period) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-prsc", &etsects->tmr_prsc) ||
+				 "fsl,tmr-prsc", &qoriq_ptp->tmr_prsc) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-add", &etsects->tmr_add) ||
+				 "fsl,tmr-add", &qoriq_ptp->tmr_add) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-fiper1", &etsects->tmr_fiper1) ||
+				 "fsl,tmr-fiper1", &qoriq_ptp->tmr_fiper1) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-fiper2", &etsects->tmr_fiper2) ||
+				 "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) ||
 	    of_property_read_u32(node,
-				 "fsl,max-adj", &etsects->caps.max_adj)) {
+				 "fsl,max-adj", &qoriq_ptp->caps.max_adj)) {
 		pr_err("device tree node missing required elements\n");
 		goto no_node;
 	}
 
-	etsects->irq = platform_get_irq(dev, 0);
+	qoriq_ptp->irq = platform_get_irq(dev, 0);
 
-	if (etsects->irq < 0) {
+	if (qoriq_ptp->irq < 0) {
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
-	if (request_irq(etsects->irq, isr, 0, DRIVER, etsects)) {
+	if (request_irq(qoriq_ptp->irq, isr, 0, DRIVER, qoriq_ptp)) {
 		pr_err("request_irq failed\n");
 		goto no_node;
 	}
 
-	etsects->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!etsects->rsrc) {
+	qoriq_ptp->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!qoriq_ptp->rsrc) {
 		pr_err("no resource\n");
 		goto no_resource;
 	}
-	if (request_resource(&iomem_resource, etsects->rsrc)) {
+	if (request_resource(&iomem_resource, qoriq_ptp->rsrc)) {
 		pr_err("resource busy\n");
 		goto no_resource;
 	}
 
-	spin_lock_init(&etsects->lock);
+	spin_lock_init(&qoriq_ptp->lock);
 
-	etsects->regs = ioremap(etsects->rsrc->start,
-				resource_size(etsects->rsrc));
-	if (!etsects->regs) {
+	qoriq_ptp->regs = ioremap(qoriq_ptp->rsrc->start,
+				resource_size(qoriq_ptp->rsrc));
+	if (!qoriq_ptp->regs) {
 		pr_err("ioremap ptp registers failed\n");
 		goto no_ioremap;
 	}
 	getnstimeofday64(&now);
-	ptp_gianfar_settime(&etsects->caps, &now);
+	ptp_qoriq_settime(&qoriq_ptp->caps, &now);
 
 	tmr_ctrl =
-	  (etsects->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
-	  (etsects->cksel & CKSEL_MASK) << CKSEL_SHIFT;
+	  (qoriq_ptp->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
+	  (qoriq_ptp->cksel & CKSEL_MASK) << CKSEL_SHIFT;
 
-	spin_lock_irqsave(&etsects->lock, flags);
+	spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl);
-	gfar_write(&etsects->regs->tmr_add,    etsects->tmr_add);
-	gfar_write(&etsects->regs->tmr_prsc,   etsects->tmr_prsc);
-	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
-	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
-	set_alarm(etsects);
-	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
+	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl);
+	qoriq_write(&qoriq_ptp->regs->tmr_add,    qoriq_ptp->tmr_add);
+	qoriq_write(&qoriq_ptp->regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
+	qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+	qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+	set_alarm(qoriq_ptp);
+	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
-	spin_unlock_irqrestore(&etsects->lock, flags);
+	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
-	etsects->clock = ptp_clock_register(&etsects->caps, &dev->dev);
-	if (IS_ERR(etsects->clock)) {
-		err = PTR_ERR(etsects->clock);
+	qoriq_ptp->clock = ptp_clock_register(&qoriq_ptp->caps, &dev->dev);
+	if (IS_ERR(qoriq_ptp->clock)) {
+		err = PTR_ERR(qoriq_ptp->clock);
 		goto no_clock;
 	}
-	gfar_phc_index = ptp_clock_index(etsects->clock);
+	qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock);
 
-	platform_set_drvdata(dev, etsects);
+	platform_set_drvdata(dev, qoriq_ptp);
 
 	return 0;
 
 no_clock:
-	iounmap(etsects->regs);
+	iounmap(qoriq_ptp->regs);
 no_ioremap:
-	release_resource(etsects->rsrc);
+	release_resource(qoriq_ptp->rsrc);
 no_resource:
-	free_irq(etsects->irq, etsects);
+	free_irq(qoriq_ptp->irq, qoriq_ptp);
 no_node:
-	kfree(etsects);
+	kfree(qoriq_ptp);
 no_memory:
 	return err;
 }
 
-static int gianfar_ptp_remove(struct platform_device *dev)
+static int qoriq_ptp_remove(struct platform_device *dev)
 {
-	struct etsects *etsects = platform_get_drvdata(dev);
+	struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev);
 
-	gfar_write(&etsects->regs->tmr_temask, 0);
-	gfar_write(&etsects->regs->tmr_ctrl,   0);
+	qoriq_write(&qoriq_ptp->regs->tmr_temask, 0);
+	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   0);
 
-	gfar_phc_index = -1;
-	ptp_clock_unregister(etsects->clock);
-	iounmap(etsects->regs);
-	release_resource(etsects->rsrc);
-	free_irq(etsects->irq, etsects);
-	kfree(etsects);
+	ptp_clock_unregister(qoriq_ptp->clock);
+	iounmap(qoriq_ptp->regs);
+	release_resource(qoriq_ptp->rsrc);
+	free_irq(qoriq_ptp->irq, qoriq_ptp);
+	kfree(qoriq_ptp);
 
 	return 0;
 }
@@ -556,17 +568,17 @@ static int gianfar_ptp_remove(struct platform_device *dev)
 };
 MODULE_DEVICE_TABLE(of, match_table);
 
-static struct platform_driver gianfar_ptp_driver = {
+static struct platform_driver qoriq_ptp_driver = {
 	.driver = {
-		.name		= "gianfar_ptp",
+		.name		= "ptp_qoriq",
 		.of_match_table	= match_table,
 	},
-	.probe       = gianfar_ptp_probe,
-	.remove      = gianfar_ptp_remove,
+	.probe       = qoriq_ptp_probe,
+	.remove      = qoriq_ptp_remove,
 };
 
-module_platform_driver(gianfar_ptp_driver);
+module_platform_driver(qoriq_ptp_driver);
 
 MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
-MODULE_DESCRIPTION("PTP clock using the eTSEC");
+MODULE_DESCRIPTION("PTP clock for Freescale QorIQ 1588 timer");
 MODULE_LICENSE("GPL");
-- 
1.7.1

^ permalink raw reply related

* [PATCH net] tun: Fix NULL pointer dereference in XDP redirect
From: Toshiaki Makita @ 2018-05-25  4:32 UTC (permalink / raw)
  To: David S. Miller; +Cc: Toshiaki Makita, netdev, Jason Wang

Calling XDP redirection requires preempt/bh disabled. Especially softirq
can call another XDP function and redirection functions, then percpu
value ri->map can be overwritten to NULL.

This is a generic XDP case called from tun.

[ 3535.736058] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 3535.743974] PGD 0 P4D 0
[ 3535.746530] Oops: 0000 [#1] SMP PTI
[ 3535.750049] Modules linked in: vhost_net vhost tap tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc vfat fat ext4 mbcache jbd2 intel_rapl skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm ipmi_ssif irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc ses aesni_intel crypto_simd cryptd enclosure hpwdt hpilo glue_helper ipmi_si pcspkr wmi mei_me ioatdma mei ipmi_devintf shpchp dca ipmi_msghandler lpc_ich acpi_power_meter sch_fq_codel ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm smartpqi i40e crc32c_intel scsi_transport_sas tg3 i2c_core ptp pps_core
[ 3535.813456] CPU: 5 PID: 1630 Comm: vhost-1614 Not tainted 4.17.0-rc4 #2
[ 3535.820127] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 11/14/2017
[ 3535.828732] RIP: 0010:__xdp_map_lookup_elem+0x5/0x30
[ 3535.833740] RSP: 0018:ffffb4bc47bf7c58 EFLAGS: 00010246
[ 3535.839009] RAX: ffff9fdfcfea1c40 RBX: 0000000000000000 RCX: ffff9fdf27fe3100
[ 3535.846205] RDX: ffff9fdfca769200 RSI: 0000000000000000 RDI: 0000000000000000
[ 3535.853402] RBP: ffffb4bc491d9000 R08: 00000000000045ad R09: 0000000000000ec0
[ 3535.860597] R10: 0000000000000001 R11: ffff9fdf26c3ce4e R12: ffff9fdf9e72c000
[ 3535.867794] R13: 0000000000000000 R14: fffffffffffffff2 R15: ffff9fdfc82cdd00
[ 3535.874990] FS:  0000000000000000(0000) GS:ffff9fdfcfe80000(0000) knlGS:0000000000000000
[ 3535.883152] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3535.888948] CR2: 0000000000000018 CR3: 0000000bde724004 CR4: 00000000007626e0
[ 3535.896145] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 3535.903342] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 3535.910538] PKRU: 55555554
[ 3535.913267] Call Trace:
[ 3535.915736]  xdp_do_generic_redirect+0x7a/0x310
[ 3535.920310]  do_xdp_generic.part.117+0x285/0x370
[ 3535.924970]  tun_get_user+0x5b9/0x1260 [tun]
[ 3535.929279]  tun_sendmsg+0x52/0x70 [tun]
[ 3535.933237]  handle_tx+0x2ad/0x5f0 [vhost_net]
[ 3535.937721]  vhost_worker+0xa5/0x100 [vhost]
[ 3535.942030]  kthread+0xf5/0x130
[ 3535.945198]  ? vhost_dev_ioctl+0x3b0/0x3b0 [vhost]
[ 3535.950031]  ? kthread_bind+0x10/0x10
[ 3535.953727]  ret_from_fork+0x35/0x40
[ 3535.957334] Code: 0e 74 15 83 f8 10 75 05 e9 49 aa b3 ff f3 c3 0f 1f 80 00 00 00 00 f3 c3 e9 29 9d b3 ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <8b> 47 18 83 f8 0e 74 0d 83 f8 10 75 05 e9 49 a9 b3 ff 31 c0 c3
[ 3535.976387] RIP: __xdp_map_lookup_elem+0x5/0x30 RSP: ffffb4bc47bf7c58
[ 3535.982883] CR2: 0000000000000018
[ 3535.987096] ---[ end trace 383b299dd1430240 ]---
[ 3536.131325] Kernel panic - not syncing: Fatal exception
[ 3536.137484] Kernel Offset: 0x26a00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 3536.281406] ---[ end Kernel panic - not syncing: Fatal exception ]---

And a kernel with generic case fixed still panics in tun driver XDP
redirect, because it did not disable bh.

[ 2055.128746] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 2055.136662] PGD 0 P4D 0
[ 2055.139219] Oops: 0000 [#1] SMP PTI
[ 2055.142736] Modules linked in: vhost_net vhost tap tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc vfat fat ext4 mbcache jbd2 intel_rapl skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc ses aesni_intel ipmi_ssif crypto_simd enclosure cryptd hpwdt glue_helper ioatdma hpilo wmi dca pcspkr ipmi_si acpi_power_meter ipmi_devintf shpchp mei_me ipmi_msghandler mei lpc_ich sch_fq_codel ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm i40e smartpqi tg3 scsi_transport_sas crc32c_intel i2c_core ptp pps_core
[ 2055.206142] CPU: 6 PID: 1693 Comm: vhost-1683 Tainted: G        W         4.17.0-rc5-fix-tun+ #1
[ 2055.215011] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 11/14/2017
[ 2055.223617] RIP: 0010:__xdp_map_lookup_elem+0x5/0x30
[ 2055.228624] RSP: 0018:ffff998b07607cc0 EFLAGS: 00010246
[ 2055.233892] RAX: ffff8dbd8e235700 RBX: ffff8dbd8ff21c40 RCX: 0000000000000004
[ 2055.241089] RDX: ffff998b097a9000 RSI: 0000000000000000 RDI: 0000000000000000
[ 2055.248286] RBP: 0000000000000000 R08: 00000000000065a8 R09: 0000000000005d80
[ 2055.255483] R10: 0000000000000040 R11: ffff8dbcf0100000 R12: ffff998b097a9000
[ 2055.262681] R13: ffff8dbd8c98c000 R14: 0000000000000000 R15: ffff998b07607d78
[ 2055.269879] FS:  0000000000000000(0000) GS:ffff8dbd8ff00000(0000) knlGS:0000000000000000
[ 2055.278039] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2055.283834] CR2: 0000000000000018 CR3: 0000000c0c8cc005 CR4: 00000000007626e0
[ 2055.291030] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2055.298227] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2055.305424] PKRU: 55555554
[ 2055.308153] Call Trace:
[ 2055.310624]  xdp_do_redirect+0x7b/0x380
[ 2055.314499]  tun_get_user+0x10fe/0x12a0 [tun]
[ 2055.318895]  tun_sendmsg+0x52/0x70 [tun]
[ 2055.322852]  handle_tx+0x2ad/0x5f0 [vhost_net]
[ 2055.327337]  vhost_worker+0xa5/0x100 [vhost]
[ 2055.331646]  kthread+0xf5/0x130
[ 2055.334813]  ? vhost_dev_ioctl+0x3b0/0x3b0 [vhost]
[ 2055.339646]  ? kthread_bind+0x10/0x10
[ 2055.343343]  ret_from_fork+0x35/0x40
[ 2055.346950] Code: 0e 74 15 83 f8 10 75 05 e9 e9 aa b3 ff f3 c3 0f 1f 80 00 00 00 00 f3 c3 e9 c9 9d b3 ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <8b> 47 18 83 f8 0e 74 0d 83 f8 10 75 05 e9 e9 a9 b3 ff 31 c0 c3
[ 2055.366004] RIP: __xdp_map_lookup_elem+0x5/0x30 RSP: ffff998b07607cc0
[ 2055.372500] CR2: 0000000000000018
[ 2055.375856] ---[ end trace 2a2dcc5e9e174268 ]---
[ 2055.523626] Kernel panic - not syncing: Fatal exception
[ 2055.529796] Kernel Offset: 0x2e000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 2055.677539] ---[ end Kernel panic - not syncing: Fatal exception ]---

Fixes: 761876c857cb ("tap: XDP support")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
---
 drivers/net/tun.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 45d8077..4fc7dbf 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1650,6 +1650,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	else
 		*skb_xdp = 0;
 
+	local_bh_disable();
 	preempt_disable();
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(tun->xdp_prog);
@@ -1676,6 +1677,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 				goto err_redirect;
 			rcu_read_unlock();
 			preempt_enable();
+			local_bh_enable();
 			return NULL;
 		case XDP_TX:
 			get_page(alloc_frag->page);
@@ -1685,6 +1687,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 			tun_xdp_flush(tun->dev);
 			rcu_read_unlock();
 			preempt_enable();
+			local_bh_enable();
 			return NULL;
 		case XDP_PASS:
 			delta = orig_data - xdp.data;
@@ -1704,6 +1707,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	if (!skb) {
 		rcu_read_unlock();
 		preempt_enable();
+		local_bh_enable();
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1714,6 +1718,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 
 	rcu_read_unlock();
 	preempt_enable();
+	local_bh_enable();
 
 	return skb;
 
@@ -1722,6 +1727,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 err_xdp:
 	rcu_read_unlock();
 	preempt_enable();
+	local_bh_enable();
 	this_cpu_inc(tun->pcpu_stats->rx_dropped);
 	return NULL;
 }
@@ -1917,16 +1923,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		struct bpf_prog *xdp_prog;
 		int ret;
 
+		local_bh_disable();
+		preempt_disable();
 		rcu_read_lock();
 		xdp_prog = rcu_dereference(tun->xdp_prog);
 		if (xdp_prog) {
 			ret = do_xdp_generic(xdp_prog, skb);
 			if (ret != XDP_PASS) {
 				rcu_read_unlock();
+				preempt_enable();
+				local_bh_enable();
 				return total_len;
 			}
 		}
 		rcu_read_unlock();
+		preempt_enable();
+		local_bh_enable();
 	}
 
 	rcu_read_lock();
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net] ipv4: remove warning in ip_recv_error
From: Willem de Bruijn @ 2018-05-25  3:59 UTC (permalink / raw)
  To: David Miller; +Cc: Network Development, Willem de Bruijn
In-Reply-To: <20180524.221810.979164721768479533.davem@davemloft.net>

On Thu, May 24, 2018 at 10:18 PM, David Miller <davem@davemloft.net> wrote:
> From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
> Date: Wed, 23 May 2018 14:29:52 -0400
>
>> From: Willem de Bruijn <willemb@google.com>
>>
>> A precondition check in ip_recv_error triggered on an otherwise benign
>> race. Remove the warning.
>>
>> The warning triggers when passing an ipv6 socket to this ipv4 error
>> handling function. RaceFuzzer was able to trigger it due to a race
>> in setsockopt IPV6_ADDRFORM.
>  ...
>> This socket option converts a v6 socket that is connected to a v4 peer
>> to an v4 socket. It updates the socket on the fly, changing fields in
>> sk as well as other structs. This is inherently non-atomic. It races
>> with the lockless udp_recvmsg path.
>>
>> No other code makes an assumption that these fields are updated
>> atomically. It is benign here, too, as ip_recv_error cares only about
>> the protocol of the skbs enqueued on the error queue, for which
>> sk_family is not a precise predictor (thanks to another isue with
>> IPV6_ADDRFORM).
>>
>> Link: http://lkml.kernel.org/r/20180518120826.GA19515@dragonet.kaist.ac.kr
>> Fixes: ("7ce875e5ecb8 ipv4: warn once on passing AF_INET6 socket to ip_recv_error")
>> Reported-by: DaeRyong Jeong <threeearcat@gmail.com>
>> Suggested-by: Eric Dumazet <edumazet@google.com>
>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>
> Applied and queued up for -stable.
>
> The SHA1_ID doesn't go inside the (" ") of the Fixes tag, I fixed
> it up this time.

Thanks David. Sorry about that.

I'll send a checkpatch.pl patch to catch such typos myself
in the future. Something like

+# Check format of Fixes line
+               if ($in_commit_log && $line =~ /^\s*fixes:/i) {
+                       if ($line !~ /^fixes:\s[0-9a-f]{12,40}\s\(".*"\)$/i) {
+                               WARN("BAD_FIXES",
+                                    "Fixes tag is not of form
\"Fixes: <12+ chars of sha1> \(\"title line\"\)");
+                       }
+               }
+

though it seems that Fixes lines are expressly omitted from strict
commit style checking at the moment. I don't immediately see why.

^ permalink raw reply

* [PATCH net-next] net: dsa: dsa_loop: Make dynamic debugging helpful
From: Florian Fainelli @ 2018-05-25  3:52 UTC (permalink / raw)
  To: netdev
  Cc: Florian Fainelli, Andrew Lunn, Vivien Didelot, David S. Miller,
	open list

Remove redundant debug prints from phy_read/write since we can trace those
calls through trace events. Enhance dynamic debug prints to print arguments
which helps figuring how what is going on at the driver level with higher level
configuration interfaces.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/dsa_loop.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index 58f14af04639..816f34d64736 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -67,7 +67,7 @@ static struct phy_device *phydevs[PHY_MAX_ADDR];
 static enum dsa_tag_protocol dsa_loop_get_protocol(struct dsa_switch *ds,
 						   int port)
 {
-	dev_dbg(ds->dev, "%s\n", __func__);
+	dev_dbg(ds->dev, "%s: port: %d\n", __func__, port);
 
 	return DSA_TAG_PROTO_NONE;
 }
@@ -124,8 +124,6 @@ static int dsa_loop_phy_read(struct dsa_switch *ds, int port, int regnum)
 	struct mii_bus *bus = ps->bus;
 	int ret;
 
-	dev_dbg(ds->dev, "%s\n", __func__);
-
 	ret = mdiobus_read_nested(bus, ps->port_base + port, regnum);
 	if (ret < 0)
 		ps->ports[port].mib[DSA_LOOP_PHY_READ_ERR].val++;
@@ -142,8 +140,6 @@ static int dsa_loop_phy_write(struct dsa_switch *ds, int port,
 	struct mii_bus *bus = ps->bus;
 	int ret;
 
-	dev_dbg(ds->dev, "%s\n", __func__);
-
 	ret = mdiobus_write_nested(bus, ps->port_base + port, regnum, value);
 	if (ret < 0)
 		ps->ports[port].mib[DSA_LOOP_PHY_WRITE_ERR].val++;
@@ -156,7 +152,8 @@ static int dsa_loop_phy_write(struct dsa_switch *ds, int port,
 static int dsa_loop_port_bridge_join(struct dsa_switch *ds, int port,
 				     struct net_device *bridge)
 {
-	dev_dbg(ds->dev, "%s\n", __func__);
+	dev_dbg(ds->dev, "%s: port: %d, bridge: %s\n",
+		__func__, port, bridge->name);
 
 	return 0;
 }
@@ -164,19 +161,22 @@ static int dsa_loop_port_bridge_join(struct dsa_switch *ds, int port,
 static void dsa_loop_port_bridge_leave(struct dsa_switch *ds, int port,
 				       struct net_device *bridge)
 {
-	dev_dbg(ds->dev, "%s\n", __func__);
+	dev_dbg(ds->dev, "%s: port: %d, bridge: %s\n",
+		__func__, port, bridge->name);
 }
 
 static void dsa_loop_port_stp_state_set(struct dsa_switch *ds, int port,
 					u8 state)
 {
-	dev_dbg(ds->dev, "%s\n", __func__);
+	dev_dbg(ds->dev, "%s: port: %d, state: %d\n",
+		__func__, port, state);
 }
 
 static int dsa_loop_port_vlan_filtering(struct dsa_switch *ds, int port,
 					bool vlan_filtering)
 {
-	dev_dbg(ds->dev, "%s\n", __func__);
+	dev_dbg(ds->dev, "%s: port: %d, vlan_filtering: %d\n",
+		__func__, port, vlan_filtering);
 
 	return 0;
 }
@@ -188,7 +188,8 @@ dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port,
 	struct dsa_loop_priv *ps = ds->priv;
 	struct mii_bus *bus = ps->bus;
 
-	dev_dbg(ds->dev, "%s\n", __func__);
+	dev_dbg(ds->dev, "%s: port: %d, vlan: %d-%d",
+		__func__, port, vlan->vid_begin, vlan->vid_end);
 
 	/* Just do a sleeping operation to make lockdep checks effective */
 	mdiobus_read(bus, ps->port_base + port, MII_BMSR);
@@ -209,8 +210,6 @@ static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port,
 	struct dsa_loop_vlan *vl;
 	u16 vid;
 
-	dev_dbg(ds->dev, "%s\n", __func__);
-
 	/* Just do a sleeping operation to make lockdep checks effective */
 	mdiobus_read(bus, ps->port_base + port, MII_BMSR);
 
@@ -222,6 +221,9 @@ static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port,
 			vl->untagged |= BIT(port);
 		else
 			vl->untagged &= ~BIT(port);
+
+		dev_dbg(ds->dev, "%s: port: %d vlan: %d, %stagged, pvid: %d\n",
+			__func__, port, vid, untagged ? "un" : "", pvid);
 	}
 
 	if (pvid)
@@ -237,8 +239,6 @@ static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port,
 	struct dsa_loop_vlan *vl;
 	u16 vid, pvid = ps->pvid;
 
-	dev_dbg(ds->dev, "%s\n", __func__);
-
 	/* Just do a sleeping operation to make lockdep checks effective */
 	mdiobus_read(bus, ps->port_base + port, MII_BMSR);
 
@@ -251,6 +251,9 @@ static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port,
 
 		if (pvid == vid)
 			pvid = 1;
+
+		dev_dbg(ds->dev, "%s: port: %d vlan: %d, %stagged, pvid: %d\n",
+			__func__, port, vid, untagged ? "un" : "", pvid);
 	}
 	ps->pvid = pvid;
 
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH net-next 0/7] net: bridge: Notify about bridge VLANs
From: Florian Fainelli @ 2018-05-25  3:41 UTC (permalink / raw)
  To: Petr Machata, netdev, devel, bridge
  Cc: jiri, idosch, davem, razvan.stefanescu, gregkh, stephen, andrew,
	vivien.didelot, nikolay
In-Reply-To: <cover.1527173527.git.petrm@mellanox.com>



On 05/24/2018 08:09 AM, Petr Machata wrote:
> In commit 946a11e7408e ("mlxsw: spectrum_span: Allow bridge for gretap
> mirror"), mlxsw got support for offloading mirror-to-gretap such that
> the underlay packet path involves a bridge. In that case, the offload is
> also influenced by PVID setting of said bridge. However, changes to VLAN
> configuration of the bridge itself do not generate switchdev
> notifications, so there's no mechanism to prod mlxsw to update the
> offload when these settings change.
> 
> In this patchset, the problem is resolved by distributing the switchdev
> notification SWITCHDEV_OBJ_ID_PORT_VLAN also for configuration changes
> on bridge VLANs. Since stacked devices distribute the notification to
> lower devices, such event eventually reaches the driver, which can
> determine whether it's a bridge or port VLAN by inspecting orig_dev.
> 
> To keep things consistent, the newly-distributed notifications observe
> the same protocol as the existing ones: dual prepare/commit, with
> -EOPNOTSUPP indicating lack of support, even though there's currently
> nothing to prepare for and nothing to support. Correspondingly, all
> switchdev drivers have been updated to return -EOPNOTSUPP for bridge
> VLAN notifications.

This is great, see the other two emails about why I like it so much:

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>

Thanks!

> 
> In patch #1, the code to send notifications for adding and deleting is
> factored out into two named functions.
> 
> In patches #2-#5, respectively for mlxsw, rocker, DSA and DPAA2 ethsw,
> the new notifications (which are not enabled yet) are ignored to
> maintain the current behavior.
> 
> In patch #6, the notification is actually enabled.
> 
> In patch #7, mlxsw is changed to update offloads of mirror-to-gre also
> for bridge-related notifications.
> 
> Petr Machata (7):
>   net: bridge: Extract boilerplate around switchdev_port_obj_*()
>   mlxsw: spectrum_switchdev: Ignore bridge VLAN events
>   rocker: rocker_main: Ignore bridge VLAN events
>   dsa: port: Ignore bridge VLAN events
>   staging: fsl-dpaa2: ethsw: Ignore bridge VLAN events
>   net: bridge: Notify about bridge VLANs
>   mlxsw: spectrum_switchdev: Schedule respin during trans prepare
> 
>  .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |  8 ++-
>  drivers/net/ethernet/rocker/rocker_main.c          |  6 +++
>  drivers/staging/fsl-dpaa2/ethsw/ethsw.c            |  6 +++
>  net/bridge/br_vlan.c                               | 58 ++++++++++++++--------
>  net/dsa/port.c                                     |  6 +++
>  5 files changed, 62 insertions(+), 22 deletions(-)
> 

-- 
Florian

^ permalink raw reply

* Re: [PATCH net-next 0/7] net: bridge: Notify about bridge VLANs
From: Florian Fainelli @ 2018-05-25  3:40 UTC (permalink / raw)
  To: Petr Machata, netdev, devel, bridge
  Cc: andrew, nikolay, gregkh, vivien.didelot, idosch, jiri,
	razvan.stefanescu, davem
In-Reply-To: <61962919-cf93-cd7b-f248-9dfee5c9529d@gmail.com>



On 05/24/2018 10:20 AM, Florian Fainelli wrote:
> Hi Petr,
> 
> On 05/24/2018 08:09 AM, Petr Machata wrote:
>> In commit 946a11e7408e ("mlxsw: spectrum_span: Allow bridge for gretap
>> mirror"), mlxsw got support for offloading mirror-to-gretap such that
>> the underlay packet path involves a bridge. In that case, the offload is
>> also influenced by PVID setting of said bridge. However, changes to VLAN
>> configuration of the bridge itself do not generate switchdev
>> notifications, so there's no mechanism to prod mlxsw to update the
>> offload when these settings change.
>>
>> In this patchset, the problem is resolved by distributing the switchdev
>> notification SWITCHDEV_OBJ_ID_PORT_VLAN also for configuration changes
>> on bridge VLANs. Since stacked devices distribute the notification to
>> lower devices, such event eventually reaches the driver, which can
>> determine whether it's a bridge or port VLAN by inspecting orig_dev.
>>
>> To keep things consistent, the newly-distributed notifications observe
>> the same protocol as the existing ones: dual prepare/commit, with
>> -EOPNOTSUPP indicating lack of support, even though there's currently
>> nothing to prepare for and nothing to support. Correspondingly, all
>> switchdev drivers have been updated to return -EOPNOTSUPP for bridge
>> VLAN notifications.
> 
> You seem to have approached the bridge changes a little differently from
> this series:
> 
> https://lists.linux-foundation.org/pipermail/bridge/2016-November/010112.html
> 
> Both have the same intent that by targeting the bridge device itself,
> you can propagate that through switchdev to the switch drivers, and in
> turn create configurations where for instance, you have:
> 
> - CPU/management port present in specific VLANs that is a subset or
> superset of the VLANs configured on front-panel ports
> - CPU/management port tagged/untagged in specific VLANs which can be a
> different setting from the front-panel ports
> 
> One problem we have in DSA at the moment is that we always add the CPU
> port to the VLANs configured to the front-panel port but we do this with
> the same attributes as the front panel ports! For instance, if you add
> Port 0 to VLAN1 untagged, the the CPU port also gets added to that
> VLAN1, also untagged. As long as there is just one VLAN untagged, this
> is not much of a problem. Now do this with another VLAN or another port,
> and the CPU can no longer differentiate the traffic from which VLAN it
> is coming from, no bueno.
> 
> I had specifically changed b53 to always add the CPU port as tagged,
> because that would always allow for differentiating traffic, but I would
> rather have the capability to configure that at the bridge layer, which
> you series seem to allow.
> 
> For the record, here is what the first commit in the series intended to
> let an user do:
> 
> The following happens now (assuming bridge master device is already
> created):
> 
> bridge vlan add vid 2 dev port0 pvid untagged
> 	-> port0 (e.g: switch port 0) gets programmed
> 	-> CPU port gets programmed
> bridge vlan add vid 2 dev br0 self
> 	-> CPU port gets programmed
> bridge vlan add vid 2 dev port0
> 	-> port0 (switch port 0) gets programmed
> 
> Are these use cases possible with your series? It seems to me like it is
> if we drop the netif_is_bridge_master() checks and resolve orig_dev as
> being a hint for the CPU/management port.

So I changed your code a little bit in net/dsa/port.c to verify what
would happen and so far, this looks good except that I am seeing more
programming events than I am expecting. The change I did is the following:

diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed0595459df1..37385e491117 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -253,7 +253,7 @@ int dsa_port_vlan_add(struct dsa_port *dp,
        };

        if (netif_is_bridge_master(vlan->obj.orig_dev))
-               return -EOPNOTSUPP;
+               info.port = dp->cpu_dp->index;

        if (br_vlan_enabled(dp->bridge_dev))
                return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
@@ -271,7 +271,7 @@ int dsa_port_vlan_del(struct dsa_port *dp,
        };

        if (netif_is_bridge_master(vlan->obj.orig_dev))
-               return -EOPNOTSUPP;
+               info.port = dp->cpu_dp->index;

        if (br_vlan_enabled(dp->bridge_dev))
                return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);

And the commands above result in the following:

root@net-vm:~# bridge vlan add vid 2 dev lan1 pvid untagged
[  478.065728] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 0
vlan: 2, untagged
[  478.066440] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, untagged
[  478.067890] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged
[  478.068486] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged
root@net-vm:~# bridge vlan add vid 2 dev br0 self
[  507.931313] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged
[  507.931826] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged
root@net-vm:~# bridge vlan add vid 2 dev lan1
[  518.955814] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 0
vlan: 2, tagged
[  518.956454] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged
root@net-vm:~#

So commands #1 and #2 get too many events targeting the CPU port, if we
remove the following hunk:

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index b93511726069..60ecc87bf6c0 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -212,7 +212,7 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
        if (ds->index == info->sw_index)
                set_bit(info->port, members);
        for (port = 0; port < ds->num_ports; port++)
-               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+               if (dsa_is_dsa_port(ds, port))
                        set_bit(port, members);

        if (switchdev_trans_ph_prepare(trans))

Then we get the expected number of events for the ports:

root@net-vm:~# bridge vlan add vid 2 dev lan1 pvid untagged
[  111.906710] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 0
vlan: 2, untagged, pvid: 1
[  111.908988] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged, pvid: 0
root@net-vm:~# bridge vlan add vid 2 dev br0 self
[  121.829272] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 5
vlan: 2, tagged, pvid: 0
root@net-vm:~# bridge vlan add vid 2 dev lan1
[  133.224113] dsa-loop fixed-0:1f: dsa_loop_port_vlan_add: port: 0
vlan: 2, tagged, pvid: 0

Andrew, Vivien, if the following hunks get applied are we possibly
breaking mv88e6xxx? This is the use case that is really missing IMHO at
the moment in DSA: we cannot control the VLAN membership and attributes
of the CPU port(s), so either we make it always tagged in every VLAN
(not great), or we introduce the ability to target the CPU port which is
what Petr's patches + mine do.

Thanks a second time for reading me :)
-- 
Florian

^ permalink raw reply related

* Re: [PATCH net-next 0/8] nfp: offload LAG for tc flower egress
From: David Miller @ 2018-05-25  3:11 UTC (permalink / raw)
  To: jakub.kicinski; +Cc: netdev, oss-drivers, jiri, j.vosburgh, vfalico, andy
In-Reply-To: <20180524022255.18548-1-jakub.kicinski@netronome.com>

From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 23 May 2018 19:22:47 -0700

> This series from John adds bond offload to the nfp driver.  Patch 5
> exposes the hash type for NETDEV_LAG_TX_TYPE_HASH to make sure nfp
> hashing matches that of the software LAG.  This may be unnecessarily
> conservative, let's see what LAG maintainers think :)

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next] hv_netvsc: fix bogus ifalias on network device
From: David Miller @ 2018-05-25  3:10 UTC (permalink / raw)
  To: stephen; +Cc: netdev, sthemmin
In-Reply-To: <20180524010200.11722-1-sthemmin@microsoft.com>

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed, 23 May 2018 18:02:00 -0700

> If the guest network adapter is not configured with DeviceNaming
> enabled on the host, then the query for friendly name will return
> success but with a zero length name. Which then leads to a garbage value
> (stack contents) for ifalias.
> 
> Fix is simple, just don't set name if  host doesn't return it.
> 
> Fixes: 0fe554a46a0f ("hv_netvsc: propogate Hyper-V friendly name into interface alias")
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>

Applied.

^ permalink raw reply

* Re: [RFC v5 0/5] virtio: support packed ring
From: Tiwei Bie @ 2018-05-25  3:07 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, virtualization, linux-kernel, netdev, wexu, jfreimann
In-Reply-To: <b19e0888-61dc-c067-0b05-fde3b5eb0902@redhat.com>

On Fri, May 25, 2018 at 10:31:26AM +0800, Jason Wang wrote:
> On 2018年05月22日 16:16, Tiwei Bie wrote:
> > Hello everyone,
> > 
> > This RFC implements packed ring support in virtio driver.
> > 
> > Some simple functional tests have been done with Jason's
> > packed ring implementation in vhost (RFC v4):
> > 
> > https://lkml.org/lkml/2018/5/16/501
> > 
> > Both of ping and netperf worked as expected w/ EVENT_IDX
> > disabled. Ping worked as expected w/ EVENT_IDX enabled,
> > but netperf didn't (A hack has been added in the driver
> > to make netperf test pass in this case. The hack can be
> > found by `grep -rw XXX` in the code).
> 
> Looks like this is because a bug in vhost which wrongly track signalled_used
> and may miss an interrupt. After fixing that, both side works like a charm.
> 
> I'm testing vIOMMU and zerocopy, and will post a new version shortly. (Hope
> it would be the last RFC version).

Great to know that. Thanks a lot! :)

Best regards,
Tiwei Bie

^ permalink raw reply

* Re: [PATCH net v2] enic: set DMA mask to 47 bit
From: David Miller @ 2018-05-25  3:06 UTC (permalink / raw)
  To: gvaradar; +Cc: netdev, benve
In-Reply-To: <20180523181739.1347-1-gvaradar@cisco.com>

From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Wed, 23 May 2018 11:17:39 -0700

> In commit 624dbf55a359b ("driver/net: enic: Try DMA 64 first, then
> failover to DMA") DMA mask was changed from 40 bits to 64 bits.
> Hardware actually supports only 47 bits.
> 
> Fixes: 624dbf55a359b ("driver/net: enic: Try DMA 64 first, then failover to DMA")
> Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH v2 net-next 0/3] net: Update fib_table_lookup tracepoints
From: David Miller @ 2018-05-25  3:01 UTC (permalink / raw)
  To: dsahern; +Cc: netdev, dsahern
In-Reply-To: <20180524000849.6553-1-dsahern@kernel.org>

From: dsahern@kernel.org
Date: Wed, 23 May 2018 17:08:46 -0700

> From: David Ahern <dsahern@gmail.com>
> 
> Update the FIB lookup tracepoints to include ip proto and port fields
> from the flow struct. In the process make the IPv4 tracepoint inline
> with IPv6 which is much easier to use and follow the lookup and result.
> 
> Remove the tracepoint in fib_validate_source which does not provide
> value above the fib_table_lookup which immediately follows it.
> 
> v2
> - move CREATE_TRACE_POINTS for the v6 tracepoint to route.c to handle
>   its need for an internal function to convert route type to error and
>   handle IPv6 as a module or builtin. Reported by kbuild robot.

Series applied.

^ permalink raw reply

* Re: [Patch net-next] net_sched: switch to rcu_work
From: David Miller @ 2018-05-25  2:56 UTC (permalink / raw)
  To: xiyou.wangcong; +Cc: netdev, tj, paulmck, jhs
In-Reply-To: <20180523222653.19376-1-xiyou.wangcong@gmail.com>

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 23 May 2018 15:26:53 -0700

> Commit 05f0fe6b74db ("RCU, workqueue: Implement rcu_work") introduces
> new API's for dispatching work in a RCU callback. Now we can just
> switch to the new API's for tc filters. This could get rid of a lot
> of code.
> 
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH v2] ppp: remove the PPPIOCDETACH ioctl
From: David Miller @ 2018-05-25  2:55 UTC (permalink / raw)
  To: ebiggers3
  Cc: linux-ppp, paulus, netdev, linux-fsdevel, linux-kernel, g.nault,
	syzkaller-bugs, ebiggers
In-Reply-To: <20180523213738.146911-1-ebiggers3@gmail.com>

From: Eric Biggers <ebiggers3@gmail.com>
Date: Wed, 23 May 2018 14:37:38 -0700

> From: Eric Biggers <ebiggers@google.com>
> 
> The PPPIOCDETACH ioctl effectively tries to "close" the given ppp file
> before f_count has reached 0, which is fundamentally a bad idea.  It
> does check 'f_count < 2', which excludes concurrent operations on the
> file since they would only be possible with a shared fd table, in which
> case each fdget() would take a file reference.  However, it fails to
> account for the fact that even with 'f_count == 1' the file can still be
> linked into epoll instances.  As reported by syzbot, this can trivially
> be used to cause a use-after-free.
> 
> Yet, the only known user of PPPIOCDETACH is pppd versions older than
> ppp-2.4.2, which was released almost 15 years ago (November 2003).
> Also, PPPIOCDETACH apparently stopped working reliably at around the
> same time, when the f_count check was added to the kernel, e.g. see
> https://lkml.org/lkml/2002/12/31/83.  Also, the current 'f_count < 2'
> check makes PPPIOCDETACH only work in single-threaded applications; it
> always fails if called from a multithreaded application.
> 
> All pppd versions released in the last 15 years just close() the file
> descriptor instead.
> 
> Therefore, instead of hacking around this bug by exporting epoll
> internals to modules, and probably missing other related bugs, just
> remove the PPPIOCDETACH ioctl and see if anyone actually notices.  Leave
> a stub in place that prints a one-time warning and returns EINVAL.
> 
> Reported-by: syzbot+16363c99d4134717c05b@syzkaller.appspotmail.com
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Eric Biggers <ebiggers@google.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add PPS and Flexible PPS support
From: kbuild test robot @ 2018-05-25  2:42 UTC (permalink / raw)
  To: Jose Abreu
  Cc: kbuild-all, netdev, Jose Abreu, David S. Miller, Joao Pinto,
	Vitor Soares, Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <072478625b1cb3d4af9e3b42f83ece7303fd554e.1526993857.git.joabreu@synopsys.com>

[-- Attachment #1: Type: text/plain, Size: 919 bytes --]

Hi Jose,

I love your patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jose-Abreu/net-stmmac-Add-PPS-and-Flexible-PPS-support/20180525-074128
config: arm-sunxi_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   drivers/net/ethernet/stmicro/stmmac/dwmac5.o: In function `dwmac5_flex_pps_config':
>> dwmac5.c:(.text+0x974): undefined reference to `__aeabi_uldivmod'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 22791 bytes --]

^ permalink raw reply

* Re: [RFC v5 0/5] virtio: support packed ring
From: Jason Wang @ 2018-05-25  2:31 UTC (permalink / raw)
  To: Tiwei Bie, mst, virtualization, linux-kernel, netdev; +Cc: wexu, jfreimann
In-Reply-To: <20180522081648.14768-1-tiwei.bie@intel.com>



On 2018年05月22日 16:16, Tiwei Bie wrote:
> Hello everyone,
>
> This RFC implements packed ring support in virtio driver.
>
> Some simple functional tests have been done with Jason's
> packed ring implementation in vhost (RFC v4):
>
> https://lkml.org/lkml/2018/5/16/501
>
> Both of ping and netperf worked as expected w/ EVENT_IDX
> disabled. Ping worked as expected w/ EVENT_IDX enabled,
> but netperf didn't (A hack has been added in the driver
> to make netperf test pass in this case. The hack can be
> found by `grep -rw XXX` in the code).

Looks like this is because a bug in vhost which wrongly track 
signalled_used and may miss an interrupt. After fixing that, both side 
works like a charm.

I'm testing vIOMMU and zerocopy, and will post a new version shortly. 
(Hope it would be the last RFC version).

Thanks

^ permalink raw reply

* Re: [PATCH bpf-next v7 6/6] selftests/bpf: test for seg6local End.BPF action
From: Y Song @ 2018-05-25  2:30 UTC (permalink / raw)
  To: Mathieu Xhonneux; +Cc: netdev, Daniel Borkmann, dlebrun, Alexei Starovoitov
In-Reply-To: <ec46c74ede080ae7371c36efc8aa6792b445b9ac.1526824042.git.m.xhonneux@gmail.com>

When compiling latest bpf-next, I hit the following compilation error:

clang -I. -I./include/uapi -I../../../include/uapi -idirafter
/usr/local/include -idirafter
/data/users/yhs/work/llvm/build/install/lib/clang/7.0.0/include
-idirafter /usr/include -Wno-compare-distinct-pointer-types \
         -O2 -target bpf -emit-llvm -c test_lwt_seg6local.c -o - |      \
llc -march=bpf -mcpu=generic  -filetype=obj -o
/data/users/yhs/work/net-next/tools/testing/selftests/bpf/test_lwt_seg6local.o
test_lwt_seg6local.c:4:10: fatal error: 'linux/seg6_local.h' file not found
#include <linux/seg6_local.h>
         ^~~~~~~~~~~~~~~~~~~~
1 error generated.
make: Leaving directory
`/data/users/yhs/work/net-next/tools/testing/selftests/bpf'

Should the seg6_local.h be copied to tools/ directory?

On Sun, May 20, 2018 at 6:58 AM, Mathieu Xhonneux <m.xhonneux@gmail.com> wrote:
> Add a new test for the seg6local End.BPF action. The following helpers
> are also tested:
>
> - bpf_lwt_push_encap within the LWT BPF IN hook
> - bpf_lwt_seg6_action
> - bpf_lwt_seg6_adjust_srh
> - bpf_lwt_seg6_store_bytes
>
> A chain of End.BPF actions is built. The SRH is injected through a LWT
> BPF IN hook before entering this chain. Each End.BPF action validates
> the previous one, otherwise the packet is dropped. The test succeeds
> if the last node in the chain receives the packet and the UDP datagram
> contained can be retrieved from userspace.
>
> Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
> ---
>  tools/include/uapi/linux/bpf.h                    |  97 ++++-
>  tools/testing/selftests/bpf/Makefile              |   6 +-
>  tools/testing/selftests/bpf/bpf_helpers.h         |  12 +
>  tools/testing/selftests/bpf/test_lwt_seg6local.c  | 437 ++++++++++++++++++++++
>  tools/testing/selftests/bpf/test_lwt_seg6local.sh | 140 +++++++
>  5 files changed, 689 insertions(+), 3 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/test_lwt_seg6local.c
>  create mode 100755 tools/testing/selftests/bpf/test_lwt_seg6local.sh
>
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 97446bbe2ca5..b217a33d80a4 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -141,6 +141,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_SK_MSG,
>         BPF_PROG_TYPE_RAW_TRACEPOINT,
>         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> +       BPF_PROG_TYPE_LWT_SEG6LOCAL,
>  };
>
>  enum bpf_attach_type {
> @@ -1902,6 +1903,90 @@ union bpf_attr {
>   *             egress otherwise). This is the only flag supported for now.
>   *     Return
>   *             **SK_PASS** on success, or **SK_DROP** on error.
> + *
> + * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
> + *     Description
> + *             Encapsulate the packet associated to *skb* within a Layer 3
> + *             protocol header. This header is provided in the buffer at
> + *             address *hdr*, with *len* its size in bytes. *type* indicates
> + *             the protocol of the header and can be one of:
> + *
> + *             **BPF_LWT_ENCAP_SEG6**
> + *                     IPv6 encapsulation with Segment Routing Header
> + *                     (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
> + *                     the IPv6 header is computed by the kernel.
> + *             **BPF_LWT_ENCAP_SEG6_INLINE**
> + *                     Only works if *skb* contains an IPv6 packet. Insert a
> + *                     Segment Routing Header (**struct ipv6_sr_hdr**) inside
> + *                     the IPv6 header.
> + *
> + *             A call to this helper is susceptible to change the underlaying
> + *             packet buffer. Therefore, at load time, all checks on pointers
> + *             previously done by the verifier are invalidated and must be
> + *             performed again, if the helper is used in combination with
> + *             direct packet access.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
> + * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
> + *     Description
> + *             Store *len* bytes from address *from* into the packet
> + *             associated to *skb*, at *offset*. Only the flags, tag and TLVs
> + *             inside the outermost IPv6 Segment Routing Header can be
> + *             modified through this helper.
> + *
> + *             A call to this helper is susceptible to change the underlaying
> + *             packet buffer. Therefore, at load time, all checks on pointers
> + *             previously done by the verifier are invalidated and must be
> + *             performed again, if the helper is used in combination with
> + *             direct packet access.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
> + * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
> + *     Description
> + *             Adjust the size allocated to TLVs in the outermost IPv6
> + *             Segment Routing Header contained in the packet associated to
> + *             *skb*, at position *offset* by *delta* bytes. Only offsets
> + *             after the segments are accepted. *delta* can be as well
> + *             positive (growing) as negative (shrinking).
> + *
> + *             A call to this helper is susceptible to change the underlaying
> + *             packet buffer. Therefore, at load time, all checks on pointers
> + *             previously done by the verifier are invalidated and must be
> + *             performed again, if the helper is used in combination with
> + *             direct packet access.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
> + * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
> + *     Description
> + *             Apply an IPv6 Segment Routing action of type *action* to the
> + *             packet associated to *skb*. Each action takes a parameter
> + *             contained at address *param*, and of length *param_len* bytes.
> + *             *action* can be one of:
> + *
> + *             **SEG6_LOCAL_ACTION_END_X**
> + *                     End.X action: Endpoint with Layer-3 cross-connect.
> + *                     Type of *param*: **struct in6_addr**.
> + *             **SEG6_LOCAL_ACTION_END_T**
> + *                     End.T action: Endpoint with specific IPv6 table lookup.
> + *                     Type of *param*: **int**.
> + *             **SEG6_LOCAL_ACTION_END_B6**
> + *                     End.B6 action: Endpoint bound to an SRv6 policy.
> + *                     Type of param: **struct ipv6_sr_hdr**.
> + *             **SEG6_LOCAL_ACTION_END_B6_ENCAP**
> + *                     End.B6.Encap action: Endpoint bound to an SRv6
> + *                     encapsulation policy.
> + *                     Type of param: **struct ipv6_sr_hdr**.
> + *
> + *             A call to this helper is susceptible to change the underlaying
> + *             packet buffer. Therefore, at load time, all checks on pointers
> + *             previously done by the verifier are invalidated and must be
> + *             performed again, if the helper is used in combination with
> + *             direct packet access.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -1976,7 +2061,11 @@ union bpf_attr {
>         FN(fib_lookup),                 \
>         FN(sock_hash_update),           \
>         FN(msg_redirect_hash),          \
> -       FN(sk_redirect_hash),
> +       FN(sk_redirect_hash),           \
> +       FN(lwt_push_encap),             \
> +       FN(lwt_seg6_store_bytes),       \
> +       FN(lwt_seg6_adjust_srh),        \
> +       FN(lwt_seg6_action),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> @@ -2043,6 +2132,12 @@ enum bpf_hdr_start_off {
>         BPF_HDR_START_NET,
>  };
>
> +/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
> +enum bpf_lwt_encap_mode {
> +       BPF_LWT_ENCAP_SEG6,
> +       BPF_LWT_ENCAP_SEG6_INLINE
> +};
> +
>  /* user accessible mirror of in-kernel sk_buff.
>   * new fields can only be added to the end of this structure
>   */
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 1eb0fa2aba92..b6222b3f8fab 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
>         sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
>         sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
>         test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
> -       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
> +       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
> +       test_lwt_seg6local.o
>
>  # Order correspond to 'make run_tests' order
>  TEST_PROGS := test_kmod.sh \
> @@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
>         test_xdp_meta.sh \
>         test_offload.py \
>         test_sock_addr.sh \
> -       test_tunnel.sh
> +       test_tunnel.sh \
> +       test_lwt_seg6local.sh
>
>  # Compile but not part of 'make run_tests'
>  TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
> diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
> index 8f143dfb3700..334d3e8c5e89 100644
> --- a/tools/testing/selftests/bpf/bpf_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_helpers.h
> @@ -114,6 +114,18 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
>  static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
>                              int plen, __u32 flags) =
>         (void *) BPF_FUNC_fib_lookup;
> +static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
> +                                unsigned int len) =
> +       (void *) BPF_FUNC_lwt_push_encap;
> +static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
> +                                      void *from, unsigned int len) =
> +       (void *) BPF_FUNC_lwt_seg6_store_bytes;
> +static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
> +                                 unsigned int param_len) =
> +       (void *) BPF_FUNC_lwt_seg6_action;
> +static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
> +                                     unsigned int len) =
> +       (void *) BPF_FUNC_lwt_seg6_adjust_srh;
>
>  /* llvm builtin functions that eBPF C program may use to
>   * emit BPF_LD_ABS and BPF_LD_IND instructions
> diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
> new file mode 100644
> index 000000000000..0575751bc1bc
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
> @@ -0,0 +1,437 @@
> +#include <stddef.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <linux/seg6_local.h>
> +#include <linux/bpf.h>
> +#include "bpf_helpers.h"
> +#include "bpf_endian.h"
> +
> +#define bpf_printk(fmt, ...)                           \
> +({                                                     \
> +       char ____fmt[] = fmt;                           \
> +       bpf_trace_printk(____fmt, sizeof(____fmt),      \
> +                       ##__VA_ARGS__);                 \
> +})
> +
> +/* Packet parsing state machine helpers. */
> +#define cursor_advance(_cursor, _len) \
> +       ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
> +
> +#define SR6_FLAG_ALERT (1 << 4)
> +
> +#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
> +                               0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
> +#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
> +                               0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
> +#define BPF_PACKET_HEADER __attribute__((packed))
> +
> +struct ip6_t {
> +       unsigned int ver:4;
> +       unsigned int priority:8;
> +       unsigned int flow_label:20;
> +       unsigned short payload_len;
> +       unsigned char next_header;
> +       unsigned char hop_limit;
> +       unsigned long long src_hi;
> +       unsigned long long src_lo;
> +       unsigned long long dst_hi;
> +       unsigned long long dst_lo;
> +} BPF_PACKET_HEADER;
> +
> +struct ip6_addr_t {
> +       unsigned long long hi;
> +       unsigned long long lo;
> +} BPF_PACKET_HEADER;
> +
> +struct ip6_srh_t {
> +       unsigned char nexthdr;
> +       unsigned char hdrlen;
> +       unsigned char type;
> +       unsigned char segments_left;
> +       unsigned char first_segment;
> +       unsigned char flags;
> +       unsigned short tag;
> +
> +       struct ip6_addr_t segments[0];
> +} BPF_PACKET_HEADER;
> +
> +struct sr6_tlv_t {
> +       unsigned char type;
> +       unsigned char len;
> +       unsigned char value[0];
> +} BPF_PACKET_HEADER;
> +
> +__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
> +{
> +       void *cursor, *data_end;
> +       struct ip6_srh_t *srh;
> +       struct ip6_t *ip;
> +       uint8_t *ipver;
> +
> +       data_end = (void *)(long)skb->data_end;
> +       cursor = (void *)(long)skb->data;
> +       ipver = (uint8_t *)cursor;
> +
> +       if ((void *)ipver + sizeof(*ipver) > data_end)
> +               return NULL;
> +
> +       if ((*ipver >> 4) != 6)
> +               return NULL;
> +
> +       ip = cursor_advance(cursor, sizeof(*ip));
> +       if ((void *)ip + sizeof(*ip) > data_end)
> +               return NULL;
> +
> +       if (ip->next_header != 43)
> +               return NULL;
> +
> +       srh = cursor_advance(cursor, sizeof(*srh));
> +       if ((void *)srh + sizeof(*srh) > data_end)
> +               return NULL;
> +
> +       if (srh->type != 4)
> +               return NULL;
> +
> +       return srh;
> +}
> +
> +__attribute__((always_inline))
> +int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
> +                  uint32_t old_pad, uint32_t pad_off)
> +{
> +       int err;
> +
> +       if (new_pad != old_pad) {
> +               err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
> +                                         (int) new_pad - (int) old_pad);
> +               if (err)
> +                       return err;
> +       }
> +
> +       if (new_pad > 0) {
> +               char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +                                       0, 0, 0};
> +               struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
> +
> +               pad_tlv->type = SR6_TLV_PADDING;
> +               pad_tlv->len = new_pad - 2;
> +
> +               err = bpf_lwt_seg6_store_bytes(skb, pad_off,
> +                                              (void *)pad_tlv_buf, new_pad);
> +               if (err)
> +                       return err;
> +       }
> +
> +       return 0;
> +}
> +
> +__attribute__((always_inline))
> +int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
> +                         uint32_t *tlv_off, uint32_t *pad_size,
> +                         uint32_t *pad_off)
> +{
> +       uint32_t srh_off, cur_off;
> +       int offset_valid = 0;
> +       int err;
> +
> +       srh_off = (char *)srh - (char *)(long)skb->data;
> +       // cur_off = end of segments, start of possible TLVs
> +       cur_off = srh_off + sizeof(*srh) +
> +               sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
> +
> +       *pad_off = 0;
> +
> +       // we can only go as far as ~10 TLVs due to the BPF max stack size
> +       #pragma clang loop unroll(full)
> +       for (int i = 0; i < 10; i++) {
> +               struct sr6_tlv_t tlv;
> +
> +               if (cur_off == *tlv_off)
> +                       offset_valid = 1;
> +
> +               if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
> +                       break;
> +
> +               err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
> +               if (err)
> +                       return err;
> +
> +               if (tlv.type == SR6_TLV_PADDING) {
> +                       *pad_size = tlv.len + sizeof(tlv);
> +                       *pad_off = cur_off;
> +
> +                       if (*tlv_off == srh_off) {
> +                               *tlv_off = cur_off;
> +                               offset_valid = 1;
> +                       }
> +                       break;
> +
> +               } else if (tlv.type == SR6_TLV_HMAC) {
> +                       break;
> +               }
> +
> +               cur_off += sizeof(tlv) + tlv.len;
> +       } // we reached the padding or HMAC TLVs, or the end of the SRH
> +
> +       if (*pad_off == 0)
> +               *pad_off = cur_off;
> +
> +       if (*tlv_off == -1)
> +               *tlv_off = cur_off;
> +       else if (!offset_valid)
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +
> +__attribute__((always_inline))
> +int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
> +           struct sr6_tlv_t *itlv, uint8_t tlv_size)
> +{
> +       uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
> +       uint8_t len_remaining, new_pad;
> +       uint32_t pad_off = 0;
> +       uint32_t pad_size = 0;
> +       uint32_t partial_srh_len;
> +       int err;
> +
> +       if (tlv_off != -1)
> +               tlv_off += srh_off;
> +
> +       if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
> +               return -EINVAL;
> +
> +       err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
> +       if (err)
> +               return err;
> +
> +       err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
> +       if (err)
> +               return err;
> +
> +       err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
> +       if (err)
> +               return err;
> +
> +       // the following can't be moved inside update_tlv_pad because the
> +       // bpf verifier has some issues with it
> +       pad_off += sizeof(*itlv) + itlv->len;
> +       partial_srh_len = pad_off - srh_off;
> +       len_remaining = partial_srh_len % 8;
> +       new_pad = 8 - len_remaining;
> +
> +       if (new_pad == 1) // cannot pad for 1 byte only
> +               new_pad = 9;
> +       else if (new_pad == 8)
> +               new_pad = 0;
> +
> +       return update_tlv_pad(skb, new_pad, pad_size, pad_off);
> +}
> +
> +__attribute__((always_inline))
> +int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
> +              uint32_t tlv_off)
> +{
> +       uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
> +       uint8_t len_remaining, new_pad;
> +       uint32_t partial_srh_len;
> +       uint32_t pad_off = 0;
> +       uint32_t pad_size = 0;
> +       struct sr6_tlv_t tlv;
> +       int err;
> +
> +       tlv_off += srh_off;
> +
> +       err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
> +       if (err)
> +               return err;
> +
> +       err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
> +       if (err)
> +               return err;
> +
> +       err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
> +       if (err)
> +               return err;
> +
> +       pad_off -= sizeof(tlv) + tlv.len;
> +       partial_srh_len = pad_off - srh_off;
> +       len_remaining = partial_srh_len % 8;
> +       new_pad = 8 - len_remaining;
> +       if (new_pad == 1) // cannot pad for 1 byte only
> +               new_pad = 9;
> +       else if (new_pad == 8)
> +               new_pad = 0;
> +
> +       return update_tlv_pad(skb, new_pad, pad_size, pad_off);
> +}
> +
> +__attribute__((always_inline))
> +int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
> +{
> +       int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
> +               ((srh->first_segment + 1) << 4);
> +       struct sr6_tlv_t tlv;
> +
> +       if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
> +               return 0;
> +
> +       if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
> +               struct ip6_addr_t egr_addr;
> +
> +               if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
> +                       return 0;
> +
> +               // check if egress TLV value is correct
> +               if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
> +                               ntohll(egr_addr.lo) == 0x4)
> +                       return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
> +// fd00::4
> +SEC("encap_srh")
> +int __encap_srh(struct __sk_buff *skb)
> +{
> +       unsigned long long hi = 0xfd00000000000000;
> +       struct ip6_addr_t *seg;
> +       struct ip6_srh_t *srh;
> +       char srh_buf[72]; // room for 4 segments
> +       int err;
> +
> +       srh = (struct ip6_srh_t *)srh_buf;
> +       srh->nexthdr = 0;
> +       srh->hdrlen = 8;
> +       srh->type = 4;
> +       srh->segments_left = 3;
> +       srh->first_segment = 3;
> +       srh->flags = 0;
> +       srh->tag = 0;
> +
> +       seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
> +
> +       #pragma clang loop unroll(full)
> +       for (unsigned long long lo = 0; lo < 4; lo++) {
> +               seg->lo = htonll(4 - lo);
> +               seg->hi = htonll(hi);
> +               seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
> +       }
> +
> +       err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
> +       if (err)
> +               return BPF_DROP;
> +
> +       return BPF_REDIRECT;
> +}
> +
> +// Add an Egress TLV fc00::4, add the flag A,
> +// and apply End.X action to fc42::1
> +SEC("add_egr_x")
> +int __add_egr_x(struct __sk_buff *skb)
> +{
> +       unsigned long long hi = 0xfc42000000000000;
> +       unsigned long long lo = 0x1;
> +       struct ip6_srh_t *srh = get_srh(skb);
> +       uint8_t new_flags = SR6_FLAG_ALERT;
> +       struct ip6_addr_t addr;
> +       int err, offset;
> +
> +       if (srh == NULL)
> +               return BPF_DROP;
> +
> +       uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
> +                          0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
> +
> +       err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
> +                     (struct sr6_tlv_t *)&tlv, 20);
> +       if (err)
> +               return BPF_DROP;
> +
> +       offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
> +       err = bpf_lwt_seg6_store_bytes(skb, offset,
> +                                      (void *)&new_flags, sizeof(new_flags));
> +       if (err)
> +               return BPF_DROP;
> +
> +       addr.lo = htonll(lo);
> +       addr.hi = htonll(hi);
> +       err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
> +                                 (void *)&addr, sizeof(addr));
> +       if (err)
> +               return BPF_DROP;
> +       return BPF_REDIRECT;
> +}
> +
> +// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
> +// simple End action
> +SEC("pop_egr")
> +int __pop_egr(struct __sk_buff *skb)
> +{
> +       struct ip6_srh_t *srh = get_srh(skb);
> +       uint16_t new_tag = bpf_htons(2442);
> +       uint8_t new_flags = 0;
> +       int err, offset;
> +
> +       if (srh == NULL)
> +               return BPF_DROP;
> +
> +       if (srh->flags != SR6_FLAG_ALERT)
> +               return BPF_DROP;
> +
> +       if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
> +               return BPF_DROP;
> +
> +       if (!has_egr_tlv(skb, srh))
> +               return BPF_DROP;
> +
> +       err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
> +       if (err)
> +               return BPF_DROP;
> +
> +       offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
> +       if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
> +                                    sizeof(new_flags)))
> +               return BPF_DROP;
> +
> +       offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
> +       if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
> +                                    sizeof(new_tag)))
> +               return BPF_DROP;
> +
> +       return BPF_OK;
> +}
> +
> +// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
> +// then apply a End.T action to reach the last segment
> +SEC("inspect_t")
> +int __inspect_t(struct __sk_buff *skb)
> +{
> +       struct ip6_srh_t *srh = get_srh(skb);
> +       int table = 117;
> +       int err;
> +
> +       if (srh == NULL)
> +               return BPF_DROP;
> +
> +       if (srh->flags != 0)
> +               return BPF_DROP;
> +
> +       if (srh->tag != bpf_htons(2442))
> +               return BPF_DROP;
> +
> +       if (srh->hdrlen != 8) // 4 segments
> +               return BPF_DROP;
> +
> +       err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
> +                                 (void *)&table, sizeof(table));
> +
> +       if (err)
> +               return BPF_DROP;
> +
> +       return BPF_REDIRECT;
> +}
> +
> +char __license[] SEC("license") = "GPL";
> diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
> new file mode 100755
> index 000000000000..1c77994b5e71
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
> @@ -0,0 +1,140 @@
> +#!/bin/bash
> +# Connects 6 network namespaces through veths.
> +# Each NS may have different IPv6 global scope addresses :
> +#   NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
> +# fb00::1           fd00::1  fd00::2  fd00::3  fb00::6
> +#                   fc42::1           fd00::4
> +#
> +# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
> +# IPv6 header with a Segment Routing Header, with segments :
> +#      fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
> +#
> +# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
> +# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
> +# - fd00::2 : remove the TLV, change the flags, add a tag
> +# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
> +#
> +# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
> +# Each End.BPF action will validate the operations applied on the SRH by the
> +# previous BPF program in the chain, otherwise the packet is dropped.
> +#
> +# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
> +# datagram can be read on NS6 when binding to fb00::6.
> +
> +TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
> +
> +cleanup()
> +{
> +       if [ "$?" = "0" ]; then
> +               echo "selftests: test_lwt_seg6local [PASS]";
> +       else
> +               echo "selftests: test_lwt_seg6local [FAILED]";
> +       fi
> +
> +       set +e
> +       ip netns del ns1 2> /dev/null
> +       ip netns del ns2 2> /dev/null
> +       ip netns del ns3 2> /dev/null
> +       ip netns del ns4 2> /dev/null
> +       ip netns del ns5 2> /dev/null
> +       ip netns del ns6 2> /dev/null
> +       rm -f $TMP_FILE
> +}
> +
> +set -e
> +
> +ip netns add ns1
> +ip netns add ns2
> +ip netns add ns3
> +ip netns add ns4
> +ip netns add ns5
> +ip netns add ns6
> +
> +trap cleanup 0 2 3 6 9
> +
> +ip link add veth1 type veth peer name veth2
> +ip link add veth3 type veth peer name veth4
> +ip link add veth5 type veth peer name veth6
> +ip link add veth7 type veth peer name veth8
> +ip link add veth9 type veth peer name veth10
> +
> +ip link set veth1 netns ns1
> +ip link set veth2 netns ns2
> +ip link set veth3 netns ns2
> +ip link set veth4 netns ns3
> +ip link set veth5 netns ns3
> +ip link set veth6 netns ns4
> +ip link set veth7 netns ns4
> +ip link set veth8 netns ns5
> +ip link set veth9 netns ns5
> +ip link set veth10 netns ns6
> +
> +ip netns exec ns1 ip link set dev veth1 up
> +ip netns exec ns2 ip link set dev veth2 up
> +ip netns exec ns2 ip link set dev veth3 up
> +ip netns exec ns3 ip link set dev veth4 up
> +ip netns exec ns3 ip link set dev veth5 up
> +ip netns exec ns4 ip link set dev veth6 up
> +ip netns exec ns4 ip link set dev veth7 up
> +ip netns exec ns5 ip link set dev veth8 up
> +ip netns exec ns5 ip link set dev veth9 up
> +ip netns exec ns6 ip link set dev veth10 up
> +ip netns exec ns6 ip link set dev lo up
> +
> +# All link scope addresses and routes required between veths
> +ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
> +ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
> +ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
> +ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
> +ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
> +ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
> +ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
> +ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
> +ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
> +ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
> +ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
> +ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
> +ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
> +ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
> +ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
> +ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
> +
> +ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
> +ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
> +
> +ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
> +ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
> +
> +ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
> +ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4
> +
> +ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6
> +ip netns exec ns4 ip -6 addr add fc42::1 dev lo
> +ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
> +
> +ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
> +ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8
> +
> +ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
> +ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
> +
> +ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
> +ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
> +ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
> +ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
> +ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
> +
> +ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
> +ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
> +ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
> +
> +ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
> +ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
> +sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
> +kill -INT $!
> +
> +if [[ $(< $TMP_FILE) != "foobar" ]]; then
> +       exit 1
> +fi
> +
> +exit 0
> --
> 2.16.1
>

^ permalink raw reply

* Re: [RFC V4 PATCH 8/8] vhost: event suppression for packed ring
From: Jason Wang @ 2018-05-25  2:28 UTC (permalink / raw)
  To: mst; +Cc: kvm, virtualization, netdev, linux-kernel, jfreimann, wexu,
	tiwei.bie
In-Reply-To: <1526473941-16199-9-git-send-email-jasowang@redhat.com>



On 2018年05月16日 20:32, Jason Wang wrote:
> +static bool vhost_notify_packed(struct vhost_dev *dev,
> +				struct vhost_virtqueue *vq)
> +{
> +	__virtio16 event_off_wrap, event_flags;
> +	__u16 old, new, off_wrap;
> +	bool v;
> +
> +	/* Flush out used descriptors updates. This is paired
> +	 * with the barrier that the Guest executes when enabling
> +	 * interrupts.
> +	 */
> +	smp_mb();
> +
> +	if (vhost_get_avail(vq, event_flags,
> +			   &vq->driver_event->flags) < 0) {
> +		vq_err(vq, "Failed to get driver desc_event_flags");
> +		return true;
> +	}
> +
> +	if (event_flags == cpu_to_vhost16(vq, RING_EVENT_FLAGS_DISABLE))
> +		return false;
> +	else if (event_flags == cpu_to_vhost16(vq, RING_EVENT_FLAGS_ENABLE))
> +		return true;
> +
> +	/* Read desc event flags before event_off and event_wrap */
> +	smp_rmb();
> +
> +	if (vhost_get_avail(vq, event_off_wrap,
> +			    &vq->driver_event->off_warp) < 0) {
> +		vq_err(vq, "Failed to get driver desc_event_off/wrap");
> +		return true;
> +	}
> +
> +	off_wrap = vhost16_to_cpu(vq, event_off_wrap);
> +
> +	old = vq->signalled_used;
> +	v = vq->signalled_used_valid;
> +	new = vq->signalled_used = vq->last_used_idx;
> +	vq->signalled_used_valid = true;

We should move those idx tracking before checking event_flags. Otherwise 
we may lose interrupts because of a wrong signalled_used value.

Thanks

> +
> +	if (unlikely(!v))
> +		return true;
> +
> +	return vhost_vring_packed_need_event(vq, off_wrap, new, old);
> +}

^ permalink raw reply

* Re: [PATCH net-next 00/10] Mirroring tests involving VLAN
From: David Miller @ 2018-05-25  2:26 UTC (permalink / raw)
  To: petrm; +Cc: netdev, linux-kselftest, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

From: Petr Machata <petrm@mellanox.com>
Date: Thu, 24 May 2018 16:27:04 +0200

> This patchset tests mirror-to-gretap with various underlay
> configurations involving VLAN netdevice in particular. Some of the tests
> involve bridges as well, but tests aimed specifically at testing bridges
> (i.e. FDB, STP) are not part of this patchset.
> 
> In patches #1-#6, the codebase is adapted to support the new tests.
> 
> In patch #7, a test for mirroring to VLAN is introduced.
> 
> Patches #8-#10 add three tests where VLAN is part of underlay path after
> gretap encapsulation.

Series applied, thank you.

^ permalink raw reply

* [RFC net-next 4/4] net: sched: cls_matchall: implement offload tcf_proto_op
From: Jakub Kicinski @ 2018-05-25  2:25 UTC (permalink / raw)
  To: netdev; +Cc: jiri, gerlitz.or, sridhar.samudrala, oss-drivers, john.hurley
In-Reply-To: <20180525022539.6799-1-jakub.kicinski@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Add the offload tcf_proto_op in matchall to generate an offload message
for each filter in the given tcf_proto. Call the specified callback with
this new offload message. The function only returns an error if the
callback rejects adding a 'hardware only' rule.

Ensure matchall flags correctly report if the rule is in hw by keeping a
reference counter for the number of instances of the rule offloaded. Only
update the flag when this counter changes from or to 0.

Signed-off-by: John Hurley <john.hurley@netronome.com>
---
 net/sched/cls_matchall.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 2ba721a590a7..a3497bc4ee24 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,6 +21,7 @@ struct cls_mall_head {
 	struct tcf_result res;
 	u32 handle;
 	u32 flags;
+	u32 in_hw_count;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
@@ -106,6 +107,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 		mall_destroy_hw_filter(tp, head, cookie, NULL);
 		return err;
 	} else if (err > 0) {
+		head->in_hw_count = err;
 		tcf_block_offload_inc(block, &head->flags);
 	}
 
@@ -246,6 +248,43 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	arg->count++;
 }
 
+static int mall_offload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+			void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct tc_cls_matchall_offload cls_mall = {};
+	struct tcf_block *block = tp->chain->block;
+	int err;
+
+	if (tc_skip_hw(head->flags))
+		return 0;
+
+	tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
+	cls_mall.command = add ?
+		TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
+	cls_mall.exts = &head->exts;
+	cls_mall.cookie = (unsigned long)head;
+
+	err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
+	if (err) {
+		if (add && tc_skip_sw(head->flags))
+			return err;
+		return 0;
+	}
+
+	if (add) {
+		if (!head->in_hw_count)
+			tcf_block_offload_inc(block, &head->flags);
+		head->in_hw_count++;
+	} else {
+		head->in_hw_count--;
+		if (!head->in_hw_count)
+			tcf_block_offload_dec(block, &head->flags);
+	}
+
+	return 0;
+}
+
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
@@ -300,6 +339,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.change		= mall_change,
 	.delete		= mall_delete,
 	.walk		= mall_walk,
+	.offload	= mall_offload,
 	.dump		= mall_dump,
 	.bind_class	= mall_bind_class,
 	.owner		= THIS_MODULE,
-- 
2.17.0

^ permalink raw reply related

* [RFC net-next 3/4] net: sched: cls_flower: implement offload tcf_proto_op
From: Jakub Kicinski @ 2018-05-25  2:25 UTC (permalink / raw)
  To: netdev; +Cc: jiri, gerlitz.or, sridhar.samudrala, oss-drivers, john.hurley
In-Reply-To: <20180525022539.6799-1-jakub.kicinski@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Add the offload tcf_proto_op in flower to generate an offload message for
each filter in the given tcf_proto. Call the specified callback with this
new offload message. The function only returns an error if the callback
rejects adding a 'hardware only' rule.

A filter contains a flag to indicate if it is in hardware or not. To
ensure the offload function properly maintains this flag, keep a reference
counter for the number of instances of the filter that are in hardware.
Only update the flag when this counter changes from or to 0.

Signed-off-by: John Hurley <john.hurley@netronome.com>
---
 net/sched/cls_flower.c | 51 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index eacaaf803914..c23d338df6a1 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -90,6 +90,7 @@ struct cls_fl_filter {
 	struct list_head list;
 	u32 handle;
 	u32 flags;
+	u32 in_hw_count;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
@@ -289,6 +290,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 		fl_hw_destroy_filter(tp, f, NULL);
 		return err;
 	} else if (err > 0) {
+		f->in_hw_count = err;
 		tcf_block_offload_inc(block, &f->flags);
 	}
 
@@ -1092,6 +1094,54 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
+static int fl_offload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+		      void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = tp->chain->block;
+	struct fl_flow_mask *mask;
+	struct cls_fl_filter *f;
+	int err;
+
+	list_for_each_entry(mask, &head->masks, list) {
+		list_for_each_entry(f, &mask->filters, list) {
+			if (tc_skip_hw(f->flags))
+				continue;
+
+			tc_cls_common_offload_init(&cls_flower.common, tp,
+						   f->flags, extack);
+			cls_flower.command = add ?
+				TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
+			cls_flower.cookie = (unsigned long)f;
+			cls_flower.dissector = &mask->dissector;
+			cls_flower.mask = &f->mkey;
+			cls_flower.key = &f->key;
+			cls_flower.exts = &f->exts;
+			cls_flower.classid = f->res.classid;
+
+			err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+			if (err) {
+				if (add && tc_skip_sw(f->flags))
+					return err;
+				continue;
+			}
+
+			if (add) {
+				if (!f->in_hw_count)
+					tcf_block_offload_inc(block, &f->flags);
+				f->in_hw_count++;
+			} else {
+				f->in_hw_count--;
+				if (!f->in_hw_count)
+					tcf_block_offload_dec(block, &f->flags);
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int fl_dump_key_val(struct sk_buff *skb,
 			   void *val, int val_type,
 			   void *mask, int mask_type, int len)
@@ -1443,6 +1493,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.change		= fl_change,
 	.delete		= fl_delete,
 	.walk		= fl_walk,
+	.offload	= fl_offload,
 	.dump		= fl_dump,
 	.bind_class	= fl_bind_class,
 	.owner		= THIS_MODULE,
-- 
2.17.0

^ permalink raw reply related

* [RFC net-next 2/4] net: sched: add tcf_proto_op to offload a rule
From: Jakub Kicinski @ 2018-05-25  2:25 UTC (permalink / raw)
  To: netdev; +Cc: jiri, gerlitz.or, sridhar.samudrala, oss-drivers, john.hurley
In-Reply-To: <20180525022539.6799-1-jakub.kicinski@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Create a new tcf_proto_op called offload that generates a new offload
message for each node in a tcf_proto. Pointers to the tcf_proto and
whether the offload request is to add or delete the node are included.
Also included is a callback function to send the offload message to and
the option of priv data to go with the cb.

The function is called to offload all tcf_proto nodes in all chains of a
block when a callback tries to register to a port that already has
offloaded rules. If all existing rules cannot be offloaded then the
registration is rejected. This replaces the previous policy of rejecting
such callback registration outright.

On unregistration of a callback, the rules are flushed for that given cb.
The implementation of block sharing in the NFP driver, for example,
dublicates shared rules to all devs bound to a block. This meant that
rules could still exist in hw even after a device is unbound from a block
(assuming the block still remains active).

Signed-off-by: John Hurley <john.hurley@netronome.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    |  4 +-
 include/net/act_api.h                         |  3 --
 include/net/pkt_cls.h                         |  6 ++-
 include/net/sch_generic.h                     |  6 +++
 net/sched/cls_api.c                           | 50 ++++++++++++++++---
 5 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6887c7faaaba..67ff1934fc38 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1542,7 +1542,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 
 err_block_bind:
 	if (!tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block_cb);
+		__tcf_block_cb_unregister(block, block_cb);
 err_cb_register:
 		mlxsw_sp_acl_block_destroy(acl_block);
 	}
@@ -1572,7 +1572,7 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 	err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
 					mlxsw_sp_port, ingress);
 	if (!err && !tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block_cb);
+		__tcf_block_cb_unregister(block, block_cb);
 		mlxsw_sp_acl_block_destroy(acl_block);
 	}
 }
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9e59ebfded62..5ff11adbe2a6 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -190,9 +190,6 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 #endif
 }
 
-typedef int tc_setup_cb_t(enum tc_setup_type type,
-			  void *type_data, void *cb_priv);
-
 #ifdef CONFIG_NET_CLS_ACT
 int tc_setup_cb_egdev_register(const struct net_device *dev,
 			       tc_setup_cb_t *cb, void *cb_priv);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index b2b5cbe13086..74dd9784bf88 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -78,7 +78,8 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
 			  void *cb_priv, struct netlink_ext_ack *extack);
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
 
@@ -176,7 +177,8 @@ int tcf_block_cb_register(struct tcf_block *block,
 }
 
 static inline
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb)
 {
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 98c10a28cd01..7b0d62870e82 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -20,6 +20,9 @@ struct qdisc_walker;
 struct tcf_walker;
 struct module;
 
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+			  void *type_data, void *cb_priv);
+
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
@@ -256,6 +259,9 @@ struct tcf_proto_ops {
 					  bool *last,
 					  struct netlink_ext_ack *);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
+	int			(*offload)(struct tcf_proto *, bool,
+					   tc_setup_cb_t *, void *,
+					   struct netlink_ext_ack *);
 	void			(*bind_class)(void *, u32, unsigned long);
 
 	/* rtnetlink specific */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 95a83aa798d1..0aba7d7db099 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -677,19 +677,50 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 }
 EXPORT_SYMBOL(tcf_block_cb_decref);
 
+static int
+tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+			    void *cb_priv, bool add,
+			    struct netlink_ext_ack *extack)
+{
+	struct tcf_chain *chain;
+	struct tcf_proto *tp;
+	int err;
+
+	list_for_each_entry(chain, &block->chain_list, list) {
+		for (tp = rtnl_dereference(chain->filter_chain); tp;
+		     tp = rtnl_dereference(tp->next)) {
+			if (tp->ops->offload) {
+				err = tp->ops->offload(tp, add, cb, cb_priv,
+						       extack);
+				if (err && add)
+					goto err_playback_remove;
+			} else if (add) {
+				err = -EOPNOTSUPP;
+				NL_SET_ERR_MSG(extack, "Block rule replay failed - no tp offload op");
+				goto err_playback_remove;
+			}
+		}
+	}
+
+	return 0;
+
+err_playback_remove:
+	tcf_block_playback_offloads(block, cb, cb_priv, false, extack);
+	return err;
+}
+
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
 					     void *cb_priv,
 					     struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
+	int err;
 
-	/* At this point, playback of previous block cb calls is not supported,
-	 * so forbid to register to block which already has some offloaded
-	 * filters present.
-	 */
-	if (tcf_block_offload_in_use(block))
-		return ERR_PTR(-EOPNOTSUPP);
+	/* Replay any already present rules */
+	err = tcf_block_playback_offloads(block, cb, cb_priv, true, extack);
+	if (err)
+		return ERR_PTR(err);
 
 	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
 	if (!block_cb)
@@ -714,8 +745,11 @@ int tcf_block_cb_register(struct tcf_block *block,
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
 
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb)
 {
+	tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
+				    false, NULL);
 	list_del(&block_cb->list);
 	kfree(block_cb);
 }
@@ -729,7 +763,7 @@ void tcf_block_cb_unregister(struct tcf_block *block,
 	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
 	if (!block_cb)
 		return;
-	__tcf_block_cb_unregister(block_cb);
+	__tcf_block_cb_unregister(block, block_cb);
 }
 EXPORT_SYMBOL(tcf_block_cb_unregister);
 
-- 
2.17.0

^ permalink raw reply related

* [RFC net-next 1/4] net: sched: pass extack pointer to block binds and cb registration
From: Jakub Kicinski @ 2018-05-25  2:25 UTC (permalink / raw)
  To: netdev; +Cc: jiri, gerlitz.or, sridhar.samudrala, oss-drivers, john.hurley
In-Reply-To: <20180525022539.6799-1-jakub.kicinski@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Pass the extact struct from a tc qdisc add to the block bind function and,
in turn, to the setup_tc ndo of binding device via the tc_block_offload
struct. Pass this back to any block callback registrations to allow
netlink logging of fails in the bind process.

Signed-off-by: John Hurley <john.hurley@netronome.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c |  2 +-
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c   |  2 +-
 .../net/ethernet/intel/i40evf/i40evf_main.c   |  2 +-
 drivers/net/ethernet/intel/igb/igb_main.c     |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 10 ++++----
 drivers/net/ethernet/netronome/nfp/bpf/main.c |  2 +-
 .../ethernet/netronome/nfp/flower/offload.c   |  2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  2 +-
 drivers/net/netdevsim/netdev.c                |  2 +-
 include/net/pkt_cls.h                         |  6 +++--
 net/dsa/slave.c                               |  2 +-
 net/sched/cls_api.c                           | 24 ++++++++++++-------
 17 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dfa0839f6656..a9187b0d97e3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7984,7 +7984,7 @@ static int bnxt_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
-					     bp, bp);
+					     bp, bp, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
 		return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 38f635cf8408..6cd3976f9920 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -173,7 +173,7 @@ static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
 					     bnxt_vf_rep_setup_tc_block_cb,
-					     vf_rep, vf_rep);
+					     vf_rep, vf_rep, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block,
 					bnxt_vf_rep_setup_tc_block_cb, vf_rep);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 513e1d356384..191ac686c5fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3015,7 +3015,7 @@ static int cxgb_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
-					     pi, dev);
+					     pi, dev, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
 		return 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b5daa5c9c7de..07dc46bbe508 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7554,7 +7554,7 @@ static int i40e_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
-					     np, np);
+					     np, np, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
 		return 0;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index a7b87f935411..3f8bb0d61f63 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2926,7 +2926,7 @@ static int i40evf_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
-					     adapter, adapter);
+					     adapter, adapter, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
 					adapter);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 78574c06635b..b1ba426d2b40 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2727,7 +2727,7 @@ static int igb_setup_tc_block(struct igb_adapter *adapter,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
-					     adapter, adapter);
+					     adapter, adapter, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
 					adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a52d92e182ee..81553a602f0f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9376,7 +9376,7 @@ static int ixgbe_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
-					     adapter, adapter);
+					     adapter, adapter, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
 					adapter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b5a7580b12fe..2e0029173a64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3178,7 +3178,7 @@ static int mlx5e_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
-					     priv, priv);
+					     priv, priv, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
 					priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c3034f58aa33..d9fe432f18f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -803,7 +803,7 @@ static int mlx5e_rep_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
-					     priv, priv);
+					     priv, priv, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bb252b36994d..6887c7faaaba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1503,7 +1503,8 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
 
 static int
 mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-				    struct tcf_block *block, bool ingress)
+				    struct tcf_block *block, bool ingress,
+				    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_acl_block *acl_block;
@@ -1518,7 +1519,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 			return -ENOMEM;
 		block_cb = __tcf_block_cb_register(block,
 						   mlxsw_sp_setup_tc_block_cb_flower,
-						   mlxsw_sp, acl_block);
+						   mlxsw_sp, acl_block, extack);
 		if (IS_ERR(block_cb)) {
 			err = PTR_ERR(block_cb);
 			goto err_cb_register;
@@ -1596,11 +1597,12 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
-					    mlxsw_sp_port);
+					    mlxsw_sp_port, f->extack);
 		if (err)
 			return err;
 		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
-							  f->block, ingress);
+							  f->block, ingress,
+							  f->extack);
 		if (err) {
 			tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
 			return err;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index fcdfb8e7fdea..bf46f7bff912 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -206,7 +206,7 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
 					     nfp_bpf_setup_tc_block_cb,
-					     nn, nn);
+					     nn, nn, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block,
 					nfp_bpf_setup_tc_block_cb,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index c42e64f32333..7abefed1efe9 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -627,7 +627,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
 					     nfp_flower_setup_tc_block_cb,
-					     repr, repr);
+					     repr, repr, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block,
 					nfp_flower_setup_tc_block_cb,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c32de53a00d3..f5bee8da084e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3759,7 +3759,7 @@ static int stmmac_setup_tc_block(struct stmmac_priv *priv,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
-				priv, priv);
+				priv, priv, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
 		return 0;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index ec68f38213d9..c9dacc6fcd59 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -260,7 +260,7 @@ nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f)
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, nsim_setup_tc_block_cb,
-					     ns, ns);
+					     ns, ns, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, nsim_setup_tc_block_cb, ns);
 		return 0;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0005f0b40fe9..b2b5cbe13086 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -73,10 +73,11 @@ void tcf_block_cb_incref(struct tcf_block_cb *block_cb);
 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb);
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv);
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack);
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv);
+			  void *cb_priv, struct netlink_ext_ack *extack);
 void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
@@ -596,6 +597,7 @@ struct tc_block_offload {
 	enum tc_block_command command;
 	enum tcf_block_binder_type binder_type;
 	struct tcf_block *block;
+	struct netlink_ext_ack *extack;
 };
 
 struct tc_cls_common_offload {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1e3b6a6d8a40..71536c435132 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -900,7 +900,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 
 	switch (f->command) {
 	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, cb, dev, dev);
+		return tcf_block_cb_register(f->block, cb, dev, dev, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, cb, dev);
 		return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 963e4bf0aab8..95a83aa798d1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -276,7 +276,8 @@ static bool tcf_block_offload_in_use(struct tcf_block *block)
 static int tcf_block_offload_cmd(struct tcf_block *block,
 				 struct net_device *dev,
 				 struct tcf_block_ext_info *ei,
-				 enum tc_block_command command)
+				 enum tc_block_command command,
+				 struct netlink_ext_ack *extack)
 {
 	struct tc_block_offload bo = {};
 
@@ -287,7 +288,8 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
 }
 
 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
-				  struct tcf_block_ext_info *ei)
+				  struct tcf_block_ext_info *ei,
+				  struct netlink_ext_ack *extack)
 {
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
@@ -298,10 +300,12 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	/* If tc offload feature is disabled and the block we try to bind
 	 * to already has some offloaded filters, forbid to bind.
 	 */
-	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
+	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
 		return -EOPNOTSUPP;
+	}
 
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_inc;
 	return err;
@@ -321,7 +325,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_dec;
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
+	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_dec;
 	return;
@@ -539,7 +543,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 	if (err)
 		goto err_chain_head_change_cb_add;
 
-	err = tcf_block_offload_bind(block, q, ei);
+	err = tcf_block_offload_bind(block, q, ei, extack);
 	if (err)
 		goto err_block_offload_bind;
 
@@ -675,7 +679,8 @@ EXPORT_SYMBOL(tcf_block_cb_decref);
 
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv)
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
 
@@ -699,11 +704,12 @@ EXPORT_SYMBOL(__tcf_block_cb_register);
 
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv)
+			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
 
-	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
+					   extack);
 	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
-- 
2.17.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox