Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v3 03/10] net: stmmac: xgmac: Implement tx_queue_prio()
From: Jose Abreu @ 2019-08-07  8:03 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1565164729.git.joabreu@synopsys.com>

Implement the TX Queue Priority callback in XGMAC core.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h      |  4 ++++
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 19 ++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 86a42bc39d21..b77091161765 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -127,6 +127,10 @@
 #define XGMAC_MTL_RXQ_DMA_MAP1		0x00001034
 #define XGMAC_QxMDMACH(x)		GENMASK((x) * 8 + 3, (x) * 8)
 #define XGMAC_QxMDMACH_SHIFT(x)		((x) * 8)
+#define XGMAC_TC_PRTY_MAP0		0x00001040
+#define XGMAC_TC_PRTY_MAP1		0x00001044
+#define XGMAC_PSTC(x)			GENMASK((x) * 8 + 7, (x) * 8)
+#define XGMAC_PSTC_SHIFT(x)		((x) * 8)
 #define XGMAC_MTL_TXQ_OPMODE(x)		(0x00001100 + (0x80 * (x)))
 #define XGMAC_TQS			GENMASK(25, 16)
 #define XGMAC_TQS_SHIFT			16
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index ce6503dfc86d..bfbd5ae11540 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -118,6 +118,23 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
 	writel(value, ioaddr + reg);
 }
 
+static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
+				   u32 queue)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value, reg;
+
+	reg = (queue < 4) ? XGMAC_TC_PRTY_MAP0 : XGMAC_TC_PRTY_MAP1;
+	if (queue >= 4)
+		queue -= 4;
+
+	value = readl(ioaddr + reg);
+	value &= ~XGMAC_PSTC(queue);
+	value |= (prio << XGMAC_PSTC_SHIFT(queue)) & XGMAC_PSTC(queue);
+
+	writel(value, ioaddr + reg);
+}
+
 static void dwxgmac2_prog_mtl_rx_algorithms(struct mac_device_info *hw,
 					    u32 rx_alg)
 {
@@ -428,7 +445,7 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.rx_ipc = dwxgmac2_rx_ipc,
 	.rx_queue_enable = dwxgmac2_rx_queue_enable,
 	.rx_queue_prio = dwxgmac2_rx_queue_prio,
-	.tx_queue_prio = NULL,
+	.tx_queue_prio = dwxgmac2_tx_queue_prio,
 	.rx_queue_routing = NULL,
 	.prog_mtl_rx_algorithms = dwxgmac2_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwxgmac2_prog_mtl_tx_algorithms,
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next v3 05/10] net: stmmac: selftests: Add RSS test
From: Jose Abreu @ 2019-08-07  8:03 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1565164729.git.joabreu@synopsys.com>

Add a test for RSS in the stmmac selftests.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 .../net/ethernet/stmicro/stmmac/stmmac_selftests.c    | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index a97b1ea76438..83b775a8cedc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -700,6 +700,21 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv)
 	return ret;
 }
 
+static int stmmac_test_rss(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+
+	if (!priv->dma_cap.rssen || !priv->rss.enable)
+		return -EOPNOTSUPP;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.exp_hash = true;
+	attr.sport = 0x321;
+	attr.dport = 0x123;
+
+	return __stmmac_test_loopback(priv, &attr);
+}
+
 #define STMMAC_LOOPBACK_NONE	0
 #define STMMAC_LOOPBACK_MAC	1
 #define STMMAC_LOOPBACK_PHY	2
@@ -745,6 +760,10 @@ static const struct stmmac_test {
 		.name = "Flow Control         ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_flowctrl,
+	}, {
+		.name = "RSS                  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_rss,
 	},
 };
 
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next v3 07/10] net: stmmac: selftests: Add test for VLAN and Double VLAN Filtering
From: Jose Abreu @ 2019-08-07  8:03 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1565164729.git.joabreu@synopsys.com>

Add a selftest for VLAN and Double VLAN Filtering in stmmac.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 205 +++++++++++++++++++++
 1 file changed, 205 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index 83b775a8cedc..6b08bb15af15 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -715,6 +715,203 @@ static int stmmac_test_rss(struct stmmac_priv *priv)
 	return __stmmac_test_loopback(priv, &attr);
 }
 
+static int stmmac_test_vlan_validate(struct sk_buff *skb,
+				     struct net_device *ndev,
+				     struct packet_type *pt,
+				     struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct udphdr *uhdr;
+	struct iphdr *ihdr;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out;
+	if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
+		goto out;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
+		goto out;
+
+	ihdr = ip_hdr(skb);
+	if (tpriv->double_vlan)
+		ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+	if (ihdr->protocol != IPPROTO_UDP)
+		goto out;
+
+	uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+	if (uhdr->dest != htons(tpriv->packet->dport))
+		goto out;
+
+	shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr));
+	if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0, i;
+
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_IP);
+	tpriv->pt.func = stmmac_test_vlan_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+
+	/*
+	 * As we use HASH filtering, false positives may appear. This is a
+	 * specially chosen ID so that adjacent IDs (+4) have different
+	 * HASH values.
+	 */
+	tpriv->vlan_id = 0x123;
+	dev_add_pack(&tpriv->pt);
+
+	ret = vlan_vid_add(priv->dev, htons(ETH_P_8021Q), tpriv->vlan_id);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < 4; i++) {
+		attr.vlan = 1;
+		attr.vlan_id_out = tpriv->vlan_id + i;
+		attr.dst = priv->dev->dev_addr;
+		attr.sport = 9;
+		attr.dport = 9;
+
+		skb = stmmac_test_get_udp_skb(priv, &attr);
+		if (!skb) {
+			ret = -ENOMEM;
+			goto vlan_del;
+		}
+
+		skb_set_queue_mapping(skb, 0);
+		ret = dev_queue_xmit(skb);
+		if (ret)
+			goto vlan_del;
+
+		wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+		ret = !tpriv->ok;
+		if (ret && !i) {
+			goto vlan_del;
+		} else if (!ret && i) {
+			ret = -1;
+			goto vlan_del;
+		} else {
+			ret = 0;
+		}
+
+		tpriv->ok = false;
+	}
+
+vlan_del:
+	vlan_vid_del(priv->dev, htons(ETH_P_8021Q), tpriv->vlan_id);
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0, i;
+
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	tpriv->double_vlan = true;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_8021Q);
+	tpriv->pt.func = stmmac_test_vlan_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+
+	/*
+	 * As we use HASH filtering, false positives may appear. This is a
+	 * specially chosen ID so that adjacent IDs (+4) have different
+	 * HASH values.
+	 */
+	tpriv->vlan_id = 0x123;
+	dev_add_pack(&tpriv->pt);
+
+	ret = vlan_vid_add(priv->dev, htons(ETH_P_8021AD), tpriv->vlan_id);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < 4; i++) {
+		attr.vlan = 2;
+		attr.vlan_id_out = tpriv->vlan_id + i;
+		attr.dst = priv->dev->dev_addr;
+		attr.sport = 9;
+		attr.dport = 9;
+
+		skb = stmmac_test_get_udp_skb(priv, &attr);
+		if (!skb) {
+			ret = -ENOMEM;
+			goto vlan_del;
+		}
+
+		skb_set_queue_mapping(skb, 0);
+		ret = dev_queue_xmit(skb);
+		if (ret)
+			goto vlan_del;
+
+		wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+		ret = !tpriv->ok;
+		if (ret && !i) {
+			goto vlan_del;
+		} else if (!ret && i) {
+			ret = -1;
+			goto vlan_del;
+		} else {
+			ret = 0;
+		}
+
+		tpriv->ok = false;
+	}
+
+vlan_del:
+	vlan_vid_del(priv->dev, htons(ETH_P_8021AD), tpriv->vlan_id);
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
 #define STMMAC_LOOPBACK_NONE	0
 #define STMMAC_LOOPBACK_MAC	1
 #define STMMAC_LOOPBACK_PHY	2
@@ -764,6 +961,14 @@ static const struct stmmac_test {
 		.name = "RSS                  ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_rss,
+	}, {
+		.name = "VLAN Filtering       ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_vlanfilt,
+	}, {
+		.name = "Double VLAN Filtering",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_dvlanfilt,
 	},
 };
 
-- 
2.7.4


^ permalink raw reply related

* [net-next:master 102/111] drivers/net/phy/mdio-cavium.h:114:31: note: in expansion of macro 'readq'
From: kbuild test robot @ 2019-08-07  8:05 UTC (permalink / raw)
  To: Nathan Chancellor; +Cc: kbuild-all, netdev

[-- Attachment #1: Type: text/plain, Size: 15909 bytes --]

tree:   https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next.git master
head:   13dfb3fa494361ea9a5950f27c9cd8b06d28c04f
commit: b8fb640643fcdb3bca84137c4cec0c649b25e056 [102/111] net: mdio-octeon: Fix Kconfig warnings and build errors
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gcc (GCC) 7.4.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        git checkout b8fb640643fcdb3bca84137c4cec0c649b25e056
        # save the attached .config to linux build tree
        GCC_VERSION=7.4.0 make.cross ARCH=sh 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from arch/sh/include/asm/io.h:28:0,
                    from include/linux/scatterlist.h:9,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.c: In function 'cavium_mdiobus_set_mode':
   drivers/net/phy/mdio-cavium.h:114:37: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                        ^
   arch/sh/include/mach-common/mach/mangle-port.h:41:23: note: in definition of macro 'ioswabq'
    # define ioswabq(x)  (x)
                          ^
>> arch/sh/include/asm/io.h:43:47: note: in expansion of macro '__raw_readq'
    #define readq_relaxed(c) ({ u64 __v = ioswabq(__raw_readq(c)); __v; })
                                                  ^~~~~~~~~~~
>> arch/sh/include/asm/io.h:53:31: note: in expansion of macro 'readq_relaxed'
    #define readq(a)  ({ u64 r_ = readq_relaxed(a); rmb(); r_; })
                                  ^~~~~~~~~~~~~
>> drivers/net/phy/mdio-cavium.h:114:31: note: in expansion of macro 'readq'
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                  ^~~~~
>> drivers/net/phy/mdio-cavium.c:21:16: note: in expansion of macro 'oct_mdio_readq'
     smi_clk.u64 = oct_mdio_readq(p->register_base + SMI_CLK);
                   ^~~~~~~~~~~~~~
   In file included from include/linux/scatterlist.h:9:0,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.h:113:48: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                                   ^
   arch/sh/include/asm/io.h:33:71: note: in definition of macro '__raw_writeq'
    #define __raw_writeq(v,a) (__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
                                                                          ^
   arch/sh/include/asm/io.h:58:32: note: in expansion of macro 'writeq_relaxed'
    #define writeq(v,a)  ({ wmb(); writeq_relaxed((v),(a)); })
                                   ^~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:36: note: in expansion of macro 'writeq'
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                       ^~~~~~
>> drivers/net/phy/mdio-cavium.c:24:2: note: in expansion of macro 'oct_mdio_writeq'
     oct_mdio_writeq(smi_clk.u64, p->register_base + SMI_CLK);
     ^~~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.c: In function 'cavium_mdiobus_c45_addr':
   drivers/net/phy/mdio-cavium.h:113:48: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                                   ^
   arch/sh/include/asm/io.h:33:71: note: in definition of macro '__raw_writeq'
    #define __raw_writeq(v,a) (__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
                                                                          ^
   arch/sh/include/asm/io.h:58:32: note: in expansion of macro 'writeq_relaxed'
    #define writeq(v,a)  ({ wmb(); writeq_relaxed((v),(a)); })
                                   ^~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:36: note: in expansion of macro 'writeq'
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                       ^~~~~~
   drivers/net/phy/mdio-cavium.c:39:2: note: in expansion of macro 'oct_mdio_writeq'
     oct_mdio_writeq(smi_wr.u64, p->register_base + SMI_WR_DAT);
     ^~~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:48: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                                   ^
   arch/sh/include/asm/io.h:33:71: note: in definition of macro '__raw_writeq'
    #define __raw_writeq(v,a) (__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
                                                                          ^
   arch/sh/include/asm/io.h:58:32: note: in expansion of macro 'writeq_relaxed'
    #define writeq(v,a)  ({ wmb(); writeq_relaxed((v),(a)); })
                                   ^~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:36: note: in expansion of macro 'writeq'
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                       ^~~~~~
   drivers/net/phy/mdio-cavium.c:47:2: note: in expansion of macro 'oct_mdio_writeq'
     oct_mdio_writeq(smi_cmd.u64, p->register_base + SMI_CMD);
     ^~~~~~~~~~~~~~~
   In file included from arch/sh/include/asm/io.h:28:0,
                    from include/linux/scatterlist.h:9,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.h:114:37: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                        ^
   arch/sh/include/mach-common/mach/mangle-port.h:41:23: note: in definition of macro 'ioswabq'
    # define ioswabq(x)  (x)
                          ^
>> arch/sh/include/asm/io.h:43:47: note: in expansion of macro '__raw_readq'
    #define readq_relaxed(c) ({ u64 __v = ioswabq(__raw_readq(c)); __v; })
                                                  ^~~~~~~~~~~
>> arch/sh/include/asm/io.h:53:31: note: in expansion of macro 'readq_relaxed'
    #define readq(a)  ({ u64 r_ = readq_relaxed(a); rmb(); r_; })
                                  ^~~~~~~~~~~~~
>> drivers/net/phy/mdio-cavium.h:114:31: note: in expansion of macro 'readq'
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                  ^~~~~
   drivers/net/phy/mdio-cavium.c:54:16: note: in expansion of macro 'oct_mdio_readq'
      smi_wr.u64 = oct_mdio_readq(p->register_base + SMI_WR_DAT);
                   ^~~~~~~~~~~~~~
   In file included from include/linux/scatterlist.h:9:0,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.c: In function 'cavium_mdiobus_read':
   drivers/net/phy/mdio-cavium.h:113:48: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                                   ^
   arch/sh/include/asm/io.h:33:71: note: in definition of macro '__raw_writeq'
    #define __raw_writeq(v,a) (__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
                                                                          ^
   arch/sh/include/asm/io.h:58:32: note: in expansion of macro 'writeq_relaxed'
    #define writeq(v,a)  ({ wmb(); writeq_relaxed((v),(a)); })
                                   ^~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:36: note: in expansion of macro 'writeq'
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                       ^~~~~~
   drivers/net/phy/mdio-cavium.c:86:2: note: in expansion of macro 'oct_mdio_writeq'
     oct_mdio_writeq(smi_cmd.u64, p->register_base + SMI_CMD);
     ^~~~~~~~~~~~~~~
   In file included from arch/sh/include/asm/io.h:28:0,
                    from include/linux/scatterlist.h:9,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.h:114:37: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                        ^
   arch/sh/include/mach-common/mach/mangle-port.h:41:23: note: in definition of macro 'ioswabq'
    # define ioswabq(x)  (x)
                          ^
>> arch/sh/include/asm/io.h:43:47: note: in expansion of macro '__raw_readq'
    #define readq_relaxed(c) ({ u64 __v = ioswabq(__raw_readq(c)); __v; })
                                                  ^~~~~~~~~~~
>> arch/sh/include/asm/io.h:53:31: note: in expansion of macro 'readq_relaxed'
    #define readq(a)  ({ u64 r_ = readq_relaxed(a); rmb(); r_; })
                                  ^~~~~~~~~~~~~
>> drivers/net/phy/mdio-cavium.h:114:31: note: in expansion of macro 'readq'
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                  ^~~~~
   drivers/net/phy/mdio-cavium.c:93:16: note: in expansion of macro 'oct_mdio_readq'
      smi_rd.u64 = oct_mdio_readq(p->register_base + SMI_RD_DAT);
                   ^~~~~~~~~~~~~~
   In file included from include/linux/scatterlist.h:9:0,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.c: In function 'cavium_mdiobus_write':
   drivers/net/phy/mdio-cavium.h:113:48: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                                   ^
   arch/sh/include/asm/io.h:33:71: note: in definition of macro '__raw_writeq'
    #define __raw_writeq(v,a) (__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
                                                                          ^
   arch/sh/include/asm/io.h:58:32: note: in expansion of macro 'writeq_relaxed'
    #define writeq(v,a)  ({ wmb(); writeq_relaxed((v),(a)); })
                                   ^~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:36: note: in expansion of macro 'writeq'
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                       ^~~~~~
   drivers/net/phy/mdio-cavium.c:125:2: note: in expansion of macro 'oct_mdio_writeq'
     oct_mdio_writeq(smi_wr.u64, p->register_base + SMI_WR_DAT);
     ^~~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:48: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                                   ^
   arch/sh/include/asm/io.h:33:71: note: in definition of macro '__raw_writeq'
    #define __raw_writeq(v,a) (__chk_io_ptr(a), *(volatile u64 __force *)(a) = (v))
                                                                          ^
   arch/sh/include/asm/io.h:58:32: note: in expansion of macro 'writeq_relaxed'
    #define writeq(v,a)  ({ wmb(); writeq_relaxed((v),(a)); })
                                   ^~~~~~~~~~~~~~
   drivers/net/phy/mdio-cavium.h:113:36: note: in expansion of macro 'writeq'
    #define oct_mdio_writeq(val, addr) writeq(val, (void *)addr)
                                       ^~~~~~
   drivers/net/phy/mdio-cavium.c:131:2: note: in expansion of macro 'oct_mdio_writeq'
     oct_mdio_writeq(smi_cmd.u64, p->register_base + SMI_CMD);
     ^~~~~~~~~~~~~~~
   In file included from arch/sh/include/asm/io.h:28:0,
                    from include/linux/scatterlist.h:9,
                    from include/linux/dma-mapping.h:11,
                    from include/linux/skbuff.h:31,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net/phy/mdio-cavium.c:8:
   drivers/net/phy/mdio-cavium.h:114:37: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                        ^
   arch/sh/include/mach-common/mach/mangle-port.h:41:23: note: in definition of macro 'ioswabq'
    # define ioswabq(x)  (x)
                          ^
>> arch/sh/include/asm/io.h:43:47: note: in expansion of macro '__raw_readq'
    #define readq_relaxed(c) ({ u64 __v = ioswabq(__raw_readq(c)); __v; })
                                                  ^~~~~~~~~~~
>> arch/sh/include/asm/io.h:53:31: note: in expansion of macro 'readq_relaxed'
    #define readq(a)  ({ u64 r_ = readq_relaxed(a); rmb(); r_; })
                                  ^~~~~~~~~~~~~
>> drivers/net/phy/mdio-cavium.h:114:31: note: in expansion of macro 'readq'
    #define oct_mdio_readq(addr)  readq((void *)addr)
                                  ^~~~~
   drivers/net/phy/mdio-cavium.c:138:16: note: in expansion of macro 'oct_mdio_readq'
      smi_wr.u64 = oct_mdio_readq(p->register_base + SMI_WR_DAT);
                   ^~~~~~~~~~~~~~

vim +/readq +114 drivers/net/phy/mdio-cavium.h

b8fb640643fcdb Nathan Chancellor 2019-08-02  112  
1eefee901fca02 David Daney       2016-03-11  113  #define oct_mdio_writeq(val, addr)	writeq(val, (void *)addr)
1eefee901fca02 David Daney       2016-03-11 @114  #define oct_mdio_readq(addr)		readq((void *)addr)
1eefee901fca02 David Daney       2016-03-11  115  #endif
1eefee901fca02 David Daney       2016-03-11  116  

:::::: The code at line 114 was first introduced by commit
:::::: 1eefee901fca0208b8a56f20cdc134e2b8638ae7 phy: mdio-octeon: Refactor into two files/modules

:::::: TO: David Daney <david.daney@cavium.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 51812 bytes --]

^ permalink raw reply

* Re: [PATCH v3 11/41] media/v4l2-core/mm: convert put_page() to put_user_page*()
From: Hans Verkuil @ 2019-08-07  8:07 UTC (permalink / raw)
  To: john.hubbard, Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Mauro Carvalho Chehab, Kees Cook,
	Hans Verkuil, Sakari Ailus, Robin Murphy, Souptick Joarder
In-Reply-To: <20190807013340.9706-12-jhubbard@nvidia.com>

On 8/7/19 3:33 AM, john.hubbard@gmail.com wrote:
> From: John Hubbard <jhubbard@nvidia.com>
> 
> For pages that were retained via get_user_pages*(), release those pages
> via the new put_user_page*() routines, instead of via put_page() or
> release_pages().
> 
> This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
> ("mm: introduce put_user_page*(), placeholder versions").
> 
> Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Hans Verkuil <hans.verkuil@cisco.com>
> Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Robin Murphy <robin.murphy@arm.com>
> Cc: Souptick Joarder <jrdr.linux@gmail.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: linux-media@vger.kernel.org
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>

Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>

> ---
>  drivers/media/v4l2-core/videobuf-dma-sg.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
> index 66a6c6c236a7..d6eeb437ec19 100644
> --- a/drivers/media/v4l2-core/videobuf-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
> @@ -349,8 +349,7 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma)
>  	BUG_ON(dma->sglen);
>  
>  	if (dma->pages) {
> -		for (i = 0; i < dma->nr_pages; i++)
> -			put_page(dma->pages[i]);
> +		put_user_pages(dma->pages, dma->nr_pages);
>  		kfree(dma->pages);
>  		dma->pages = NULL;
>  	}
> 


^ permalink raw reply

* [PATCH] net: stmmac: Fix the miscalculation of mapping from rxq to dma channel
From: Shaokun Zhang @ 2019-08-07  8:17 UTC (permalink / raw)
  To: netdev, linux-stm32
  Cc: yuqi jin, Giuseppe Cavallaro, Alexandre Torgue, Jose Abreu,
	David S. Miller, Maxime Coquelin, Shaokun Zhang

From: yuqi jin <jinyuqi@huawei.com>

XGMAC_MTL_RXQ_DMA_MAP1 will be configured if the number of queues is
greater than 3, but local variable chan will shift left more than 32-bits.
Let's fix this issue.

Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Signed-off-by: Yuqi Jin <jinyuqi@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
---
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 0a32c96a7854..de4b15f31727 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -166,13 +166,14 @@ static void dwxgmac2_map_mtl_to_dma(struct mac_device_info *hw, u32 queue,
 				    u32 chan)
 {
 	void __iomem *ioaddr = hw->pcsr;
-	u32 value, reg;
+	u32 value, reg, index;
 
 	reg = (queue < 4) ? XGMAC_MTL_RXQ_DMA_MAP0 : XGMAC_MTL_RXQ_DMA_MAP1;
+	index = (queue < 4) ? queue : queue - 4;
 
 	value = readl(ioaddr + reg);
-	value &= ~XGMAC_QxMDMACH(queue);
-	value |= (chan << XGMAC_QxMDMACH_SHIFT(queue)) & XGMAC_QxMDMACH(queue);
+	value &= ~XGMAC_QxMDMACH(index);
+	value |= (chan << XGMAC_QxMDMACH_SHIFT(index)) & XGMAC_QxMDMACH(index);
 
 	writel(value, ioaddr + reg);
 }
-- 
2.7.4


^ permalink raw reply related

* RE: [PATCH] net: stmmac: Fix the miscalculation of mapping from rxq to dma channel
From: Jose Abreu @ 2019-08-07  8:24 UTC (permalink / raw)
  To: Shaokun Zhang, netdev@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com
  Cc: yuqi jin, Giuseppe Cavallaro, Alexandre Torgue, David S. Miller,
	Maxime Coquelin
In-Reply-To: <1565165849-16246-1-git-send-email-zhangshaokun@hisilicon.com>

From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Aug/07/2019, 09:17:29 (UTC+00:00)

> From: yuqi jin <jinyuqi@huawei.com>
> 
> XGMAC_MTL_RXQ_DMA_MAP1 will be configured if the number of queues is
> greater than 3, but local variable chan will shift left more than 32-bits.
> Let's fix this issue.

This was already fixed in -net. Please see [1]

[1] https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/co
mmit/drivers/net/ethernet/stmicro/stmmac?id=e8df7e8c233a18d2704e37ecff475
83b494789d3

---
Thanks,
Jose Miguel Abreu

^ permalink raw reply

* Re: [PATCH 00/34] put_user_pages(): miscellaneous call sites
From: Jan Kara @ 2019-08-07  8:37 UTC (permalink / raw)
  To: John Hubbard
  Cc: Jan Kara, Matthew Wilcox, Michal Hocko, john.hubbard,
	Andrew Morton, Christoph Hellwig, Dan Williams, Dave Chinner,
	Dave Hansen, Ira Weiny, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel
In-Reply-To: <076e7826-67a5-4829-aae2-2b90f302cebd@nvidia.com>

On Fri 02-08-19 12:14:09, John Hubbard wrote:
> On 8/2/19 7:52 AM, Jan Kara wrote:
> > On Fri 02-08-19 07:24:43, Matthew Wilcox wrote:
> > > On Fri, Aug 02, 2019 at 02:41:46PM +0200, Jan Kara wrote:
> > > > On Fri 02-08-19 11:12:44, Michal Hocko wrote:
> > > > > On Thu 01-08-19 19:19:31, john.hubbard@gmail.com wrote:
> > > > > [...]
> > > > > > 2) Convert all of the call sites for get_user_pages*(), to
> > > > > > invoke put_user_page*(), instead of put_page(). This involves dozens of
> > > > > > call sites, and will take some time.
> > > > > 
> > > > > How do we make sure this is the case and it will remain the case in the
> > > > > future? There must be some automagic to enforce/check that. It is simply
> > > > > not manageable to do it every now and then because then 3) will simply
> > > > > be never safe.
> > > > > 
> > > > > Have you considered coccinele or some other scripted way to do the
> > > > > transition? I have no idea how to deal with future changes that would
> > > > > break the balance though.
> 
> Hi Michal,
> 
> Yes, I've thought about it, and coccinelle falls a bit short (it's not smart
> enough to know which put_page()'s to convert). However, there is a debug
> option planned: a yet-to-be-posted commit [1] uses struct page extensions
> (obviously protected by CONFIG_DEBUG_GET_USER_PAGES_REFERENCES) to add
> a redundant counter. That allows:
> 
> void __put_page(struct page *page)
> {
> 	...
> 	/* Someone called put_page() instead of put_user_page() */
> 	WARN_ON_ONCE(atomic_read(&page_ext->pin_count) > 0);
> 
> > > > 
> > > > Yeah, that's why I've been suggesting at LSF/MM that we may need to create
> > > > a gup wrapper - say vaddr_pin_pages() - and track which sites dropping
> > > > references got converted by using this wrapper instead of gup. The
> > > > counterpart would then be more logically named as unpin_page() or whatever
> > > > instead of put_user_page().  Sure this is not completely foolproof (you can
> > > > create new callsite using vaddr_pin_pages() and then just drop refs using
> > > > put_page()) but I suppose it would be a high enough barrier for missed
> > > > conversions... Thoughts?
> 
> The debug option above is still a bit simplistic in its implementation
> (and maybe not taking full advantage of the data it has), but I think
> it's preferable, because it monitors the "core" and WARNs.
> 
> Instead of the wrapper, I'm thinking: documentation and the passage of
> time, plus the debug option (perhaps enhanced--probably once I post it
> someone will notice opportunities), yes?

So I think your debug option and my suggested renaming serve a bit
different purposes (and thus both make sense). If you do the renaming, you
can just grep to see unconverted sites. Also when someone merges new GUP
user (unaware of the new rules) while you switch GUP to use pins instead of
ordinary references, you'll get compilation error in case of renaming
instead of hard to debug refcount leak without the renaming. And such
conflict is almost bound to happen given the size of GUP patch set... Also
the renaming serves against the "coding inertia" - i.e., GUP is around for
ages so people just use it without checking any documentation or comments.
After switching how GUP works, what used to be correct isn't anymore so
renaming the function serves as a warning that something has really
changed.

Your refcount debug patches are good to catch bugs in the conversions done
but that requires you to be able to excercise the code path in the first
place which may require particular HW or so, and you also have to enable
the debug option which means you already aim at verifying the GUP
references are treated properly.

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH 00/34] put_user_pages(): miscellaneous call sites
From: Michal Hocko @ 2019-08-07  8:46 UTC (permalink / raw)
  To: Jan Kara
  Cc: John Hubbard, Matthew Wilcox, john.hubbard, Andrew Morton,
	Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jason Gunthorpe, Jérôme Glisse, LKML,
	amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx, kvm,
	linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel
In-Reply-To: <20190807083726.GA14658@quack2.suse.cz>

On Wed 07-08-19 10:37:26, Jan Kara wrote:
> On Fri 02-08-19 12:14:09, John Hubbard wrote:
> > On 8/2/19 7:52 AM, Jan Kara wrote:
> > > On Fri 02-08-19 07:24:43, Matthew Wilcox wrote:
> > > > On Fri, Aug 02, 2019 at 02:41:46PM +0200, Jan Kara wrote:
> > > > > On Fri 02-08-19 11:12:44, Michal Hocko wrote:
> > > > > > On Thu 01-08-19 19:19:31, john.hubbard@gmail.com wrote:
> > > > > > [...]
> > > > > > > 2) Convert all of the call sites for get_user_pages*(), to
> > > > > > > invoke put_user_page*(), instead of put_page(). This involves dozens of
> > > > > > > call sites, and will take some time.
> > > > > > 
> > > > > > How do we make sure this is the case and it will remain the case in the
> > > > > > future? There must be some automagic to enforce/check that. It is simply
> > > > > > not manageable to do it every now and then because then 3) will simply
> > > > > > be never safe.
> > > > > > 
> > > > > > Have you considered coccinele or some other scripted way to do the
> > > > > > transition? I have no idea how to deal with future changes that would
> > > > > > break the balance though.
> > 
> > Hi Michal,
> > 
> > Yes, I've thought about it, and coccinelle falls a bit short (it's not smart
> > enough to know which put_page()'s to convert). However, there is a debug
> > option planned: a yet-to-be-posted commit [1] uses struct page extensions
> > (obviously protected by CONFIG_DEBUG_GET_USER_PAGES_REFERENCES) to add
> > a redundant counter. That allows:
> > 
> > void __put_page(struct page *page)
> > {
> > 	...
> > 	/* Someone called put_page() instead of put_user_page() */
> > 	WARN_ON_ONCE(atomic_read(&page_ext->pin_count) > 0);
> > 
> > > > > 
> > > > > Yeah, that's why I've been suggesting at LSF/MM that we may need to create
> > > > > a gup wrapper - say vaddr_pin_pages() - and track which sites dropping
> > > > > references got converted by using this wrapper instead of gup. The
> > > > > counterpart would then be more logically named as unpin_page() or whatever
> > > > > instead of put_user_page().  Sure this is not completely foolproof (you can
> > > > > create new callsite using vaddr_pin_pages() and then just drop refs using
> > > > > put_page()) but I suppose it would be a high enough barrier for missed
> > > > > conversions... Thoughts?
> > 
> > The debug option above is still a bit simplistic in its implementation
> > (and maybe not taking full advantage of the data it has), but I think
> > it's preferable, because it monitors the "core" and WARNs.
> > 
> > Instead of the wrapper, I'm thinking: documentation and the passage of
> > time, plus the debug option (perhaps enhanced--probably once I post it
> > someone will notice opportunities), yes?
> 
> So I think your debug option and my suggested renaming serve a bit
> different purposes (and thus both make sense). If you do the renaming, you
> can just grep to see unconverted sites. Also when someone merges new GUP
> user (unaware of the new rules) while you switch GUP to use pins instead of
> ordinary references, you'll get compilation error in case of renaming
> instead of hard to debug refcount leak without the renaming. And such
> conflict is almost bound to happen given the size of GUP patch set... Also
> the renaming serves against the "coding inertia" - i.e., GUP is around for
> ages so people just use it without checking any documentation or comments.
> After switching how GUP works, what used to be correct isn't anymore so
> renaming the function serves as a warning that something has really
> changed.

Fully agreed!

> Your refcount debug patches are good to catch bugs in the conversions done
> but that requires you to be able to excercise the code path in the first
> place which may require particular HW or so, and you also have to enable
> the debug option which means you already aim at verifying the GUP
> references are treated properly.
> 
> 								Honza
> 
> -- 
> Jan Kara <jack@suse.com>
> SUSE Labs, CR

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply

* Re: [PATCH v2 1/1] ixgbe: sync the first fragment unconditionally
From: Firo Yang @ 2019-08-07  8:38 UTC (permalink / raw)
  To: Jacob Wen
  Cc: davem@davemloft.net, jeffrey.t.kirsher@intel.com,
	alexander.h.duyck@linux.intel.com,
	intel-wired-lan@lists.osuosl.org, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <85aaefdf-d454-1823-5840-d9e2f71ffb19@oracle.com>

The 08/07/2019 15:56, Jacob Wen wrote:
> I think the description is not correct. Consider using something like below.
Thank you for comments. 

> 
> In Xen environment, due to memory fragmentation ixgbe may allocate a 'DMA'
> buffer with pages that are not physically contiguous.
Actually, I didn't look into the reason why ixgbe got a DMA buffer which
was mapped to Xen-swiotlb area.

But I don't think this issue relates to phsical memory contiguity because, in
our case, one ixgbe_rx_buffer only associates at most one page.

If you take a look at the related code, you will find there are several reasons
for mapping a DMA buffer to Xen-swiotlb area:
static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
         */
        if (dma_capable(dev, dev_addr, size) &&
            !range_straddles_page_boundary(phys, size) &&
                !xen_arch_need_swiotlb(dev, phys, dev_addr) &&
                swiotlb_force != SWIOTLB_FORCE)
                goto done;

// Firo
> 
> A NIC doesn't support directly write such buffer. So xen-swiotlb would use
> the pages, which are physically contiguous, from the swiotlb buffer for the
> NIC.
> 
> The unmap operation is used to copy the swiotlb buffer to the pages that are
> allocated by ixgbe.
> 
> On 8/7/19 10:49 AM, Firo Yang wrote:
> > In Xen environment, if Xen-swiotlb is enabled, ixgbe driver
> > could possibly allocate a page, DMA memory buffer, for the first
> > fragment which is not suitable for Xen-swiotlb to do DMA operations.
> > Xen-swiotlb have to internally allocate another page for doing DMA
> > operations. It requires syncing between those two pages. However,
> > since commit f3213d932173 ("ixgbe: Update driver to make use of DMA
> > attributes in Rx path"), the unmap operation is performed with
> > DMA_ATTR_SKIP_CPU_SYNC. As a result, the sync is not performed.
> > 
> > To fix this problem, always sync before possibly performing a page
> > unmap operation.
> > 
> > Fixes: f3213d932173 ("ixgbe: Update driver to make use of DMA
> > attributes in Rx path")
> > Reviewed-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > Signed-off-by: Firo Yang <firo.yang@suse.com>
> > ---
> > 
> > Changes from v1:
> >   * Imporved the patch description.
> >   * Added Reviewed-by: and Fixes: as suggested by Alexander Duyck
> > 
> >   drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 +++++++++-------
> >   1 file changed, 9 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > index cbaf712d6529..200de9838096 100644
> > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > @@ -1825,13 +1825,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
> >   static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
> >   				struct sk_buff *skb)
> >   {
> > -	/* if the page was released unmap it, else just sync our portion */
> > -	if (unlikely(IXGBE_CB(skb)->page_released)) {
> > -		dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
> > -				     ixgbe_rx_pg_size(rx_ring),
> > -				     DMA_FROM_DEVICE,
> > -				     IXGBE_RX_DMA_ATTR);
> > -	} else if (ring_uses_build_skb(rx_ring)) {
> > +	if (ring_uses_build_skb(rx_ring)) {
> >   		unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
> >   		dma_sync_single_range_for_cpu(rx_ring->dev,
> > @@ -1848,6 +1842,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
> >   					      skb_frag_size(frag),
> >   					      DMA_FROM_DEVICE);
> >   	}
> > +
> > +	/* If the page was released, just unmap it. */
> > +	if (unlikely(IXGBE_CB(skb)->page_released)) {
> > +		dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
> > +				     ixgbe_rx_pg_size(rx_ring),
> > +				     DMA_FROM_DEVICE,
> > +				     IXGBE_RX_DMA_ATTR);
> > +	}
> >   }
> >   /**
> 

^ permalink raw reply

* Re: [PATCH v3 10/41] media/ivtv: convert put_page() to put_user_page*()
From: Hans Verkuil @ 2019-08-07  8:51 UTC (permalink / raw)
  To: john.hubbard, Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Andy Walls, Mauro Carvalho Chehab
In-Reply-To: <20190807013340.9706-11-jhubbard@nvidia.com>

On 8/7/19 3:33 AM, john.hubbard@gmail.com wrote:
> From: John Hubbard <jhubbard@nvidia.com>
> 
> For pages that were retained via get_user_pages*(), release those pages
> via the new put_user_page*() routines, instead of via put_page() or
> release_pages().
> 
> This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
> ("mm: introduce put_user_page*(), placeholder versions").
> 
> Cc: Andy Walls <awalls@md.metrocast.net>
> Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
> Cc: linux-media@vger.kernel.org
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>

Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>

Regards,

	Hans

> ---
>  drivers/media/pci/ivtv/ivtv-udma.c | 14 ++++----------
>  drivers/media/pci/ivtv/ivtv-yuv.c  | 11 +++--------
>  2 files changed, 7 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c
> index 5f8883031c9c..7c7f33c2412b 100644
> --- a/drivers/media/pci/ivtv/ivtv-udma.c
> +++ b/drivers/media/pci/ivtv/ivtv-udma.c
> @@ -92,7 +92,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
>  {
>  	struct ivtv_dma_page_info user_dma;
>  	struct ivtv_user_dma *dma = &itv->udma;
> -	int i, err;
> +	int err;
>  
>  	IVTV_DEBUG_DMA("ivtv_udma_setup, dst: 0x%08x\n", (unsigned int)ivtv_dest_addr);
>  
> @@ -119,8 +119,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
>  		IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
>  			   err, user_dma.page_count);
>  		if (err >= 0) {
> -			for (i = 0; i < err; i++)
> -				put_page(dma->map[i]);
> +			put_user_pages(dma->map, err);
>  			return -EINVAL;
>  		}
>  		return err;
> @@ -130,9 +129,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
>  
>  	/* Fill SG List with new values */
>  	if (ivtv_udma_fill_sg_list(dma, &user_dma, 0) < 0) {
> -		for (i = 0; i < dma->page_count; i++) {
> -			put_page(dma->map[i]);
> -		}
> +		put_user_pages(dma->map, dma->page_count);
>  		dma->page_count = 0;
>  		return -ENOMEM;
>  	}
> @@ -153,7 +150,6 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
>  void ivtv_udma_unmap(struct ivtv *itv)
>  {
>  	struct ivtv_user_dma *dma = &itv->udma;
> -	int i;
>  
>  	IVTV_DEBUG_INFO("ivtv_unmap_user_dma\n");
>  
> @@ -170,9 +166,7 @@ void ivtv_udma_unmap(struct ivtv *itv)
>  	ivtv_udma_sync_for_cpu(itv);
>  
>  	/* Release User Pages */
> -	for (i = 0; i < dma->page_count; i++) {
> -		put_page(dma->map[i]);
> -	}
> +	put_user_pages(dma->map, dma->page_count);
>  	dma->page_count = 0;
>  }
>  
> diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c
> index cd2fe2d444c0..2c61a11d391d 100644
> --- a/drivers/media/pci/ivtv/ivtv-yuv.c
> +++ b/drivers/media/pci/ivtv/ivtv-yuv.c
> @@ -30,7 +30,6 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
>  	struct yuv_playback_info *yi = &itv->yuv_info;
>  	u8 frame = yi->draw_frame;
>  	struct yuv_frame_info *f = &yi->new_frame_info[frame];
> -	int i;
>  	int y_pages, uv_pages;
>  	unsigned long y_buffer_offset, uv_buffer_offset;
>  	int y_decode_height, uv_decode_height, y_size;
> @@ -81,8 +80,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
>  				 uv_pages, uv_dma.page_count);
>  
>  			if (uv_pages >= 0) {
> -				for (i = 0; i < uv_pages; i++)
> -					put_page(dma->map[y_pages + i]);
> +				put_user_pages(&dma->map[y_pages], uv_pages);
>  				rc = -EFAULT;
>  			} else {
>  				rc = uv_pages;
> @@ -93,8 +91,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
>  				 y_pages, y_dma.page_count);
>  		}
>  		if (y_pages >= 0) {
> -			for (i = 0; i < y_pages; i++)
> -				put_page(dma->map[i]);
> +			put_user_pages(dma->map, y_pages);
>  			/*
>  			 * Inherit the -EFAULT from rc's
>  			 * initialization, but allow it to be
> @@ -112,9 +109,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
>  	/* Fill & map SG List */
>  	if (ivtv_udma_fill_sg_list (dma, &uv_dma, ivtv_udma_fill_sg_list (dma, &y_dma, 0)) < 0) {
>  		IVTV_DEBUG_WARN("could not allocate bounce buffers for highmem userspace buffers\n");
> -		for (i = 0; i < dma->page_count; i++) {
> -			put_page(dma->map[i]);
> -		}
> +		put_user_pages(dma->map, dma->page_count);
>  		dma->page_count = 0;
>  		return -ENOMEM;
>  	}
> 


^ permalink raw reply

* Re: [PATCH] net: stmmac: Fix the miscalculation of mapping from rxq to dma channel
From: Shaokun Zhang @ 2019-08-07  8:55 UTC (permalink / raw)
  To: Jose Abreu, netdev@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com
  Cc: yuqi jin, Giuseppe Cavallaro, Alexandre Torgue, David S. Miller,
	Maxime Coquelin
In-Reply-To: <BN8PR12MB3266D248C58DB7ABE6238A49D3D40@BN8PR12MB3266.namprd12.prod.outlook.com>

Hi Jose,

Thanks your quick reply.

On 2019/8/7 16:24, Jose Abreu wrote:
> From: Shaokun Zhang <zhangshaokun@hisilicon.com>
> Date: Aug/07/2019, 09:17:29 (UTC+00:00)
> 
>> From: yuqi jin <jinyuqi@huawei.com>
>>
>> XGMAC_MTL_RXQ_DMA_MAP1 will be configured if the number of queues is
>> greater than 3, but local variable chan will shift left more than 32-bits.
>> Let's fix this issue.
> 
> This was already fixed in -net. Please see [1]
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/co
> mmit/drivers/net/ethernet/stmicro/stmmac?id=e8df7e8c233a18d2704e37ecff475
> 83b494789d3
> 
> ---
> Thanks,
> Jose Miguel Abreu
> 
> 


^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Lorenz Bauer @ 2019-08-07  9:03 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Alexei Starovoitov, Song Liu, Kees Cook, Networking, bpf,
	Alexei Starovoitov, Daniel Borkmann, Kernel Team, Jann Horn,
	Greg KH, Linux API, LSM List
In-Reply-To: <CALCETrXEHL3+NAY6P6vUj7Pvd9ZpZsYC6VCLXOaNxb90a_POGw@mail.gmail.com>

On Wed, 7 Aug 2019 at 06:24, Andy Lutomirski <luto@kernel.org> wrote:
> a) Those that, by design, control privileged operations.  This
> includes most attach calls, but it also includes allow_ptr_leaks,
> bpf_probe_read(), and quite a few other things.  It also includes all
> of the by_id calls, I think, unless some clever modification to the
> way they worked would isolate different users' objects.  I think that
> persistent objects can do pretty much everything that by_id users
> would need, so this isn't a big deal.

Slightly OT, since this is an implementation question: GET_MAP_FD_BY_ID
is useful to iterate a nested map. This isn't covered by rights to
persistent objects,
so it would need some thought.

-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply

* Re: [PATCH v2 1/1] net: bridge: use mac_len in bridge forwarding
From: Nikolay Aleksandrov @ 2019-08-07  9:17 UTC (permalink / raw)
  To: Zahari Doychev, netdev
  Cc: bridge, roopa, jhs, dsahern, simon.horman, makita.toshiaki,
	xiyou.wangcong, jiri, ast, johannes
In-Reply-To: <20190805153740.29627-2-zahari.doychev@linux.com>

Hi Zahari,
On 05/08/2019 18:37, Zahari Doychev wrote:
> The bridge code cannot forward packets from various paths that set up the
> SKBs in different ways. Some of these packets get corrupted during the
> forwarding as not always is just ETH_HLEN pulled at the front. This happens
> e.g. when VLAN tags are pushed bu using tc act_vlan on ingress.
Overall the patch looks good, I think it shouldn't introduce any regressions
at least from the codepaths I was able to inspect, but please include more
details in here from the cover letter, in fact you don't need it just add all of
the details here so we have them, especially the test setup. Also please provide
some details how this patch was tested. It'd be great if you could provide a
selftest for it so we can make sure it's considered when doing future changes.

Thank you,
 Nik

> 
> The problem is fixed by using skb->mac_len instead of ETH_HLEN, which makes
> sure that the skb headers are correctly restored. This usually does not
> change anything, execpt the local bridge transmits which now need to set
> the skb->mac_len correctly in br_dev_xmit, as well as the broken case noted
> above.
> 
> Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> ---
>  net/bridge/br_device.c  | 3 ++-
>  net/bridge/br_forward.c | 4 ++--
>  net/bridge/br_vlan.c    | 3 ++-
>  3 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 681b72862c16..aeb77ff60311 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -55,8 +55,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>  	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
>  
>  	skb_reset_mac_header(skb);
> +	skb_reset_mac_len(skb);
>  	eth = eth_hdr(skb);
> -	skb_pull(skb, ETH_HLEN);
> +	skb_pull(skb, skb->mac_len);
>  
>  	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
>  		goto out;
> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> index 86637000f275..edb4f3533f05 100644
> --- a/net/bridge/br_forward.c
> +++ b/net/bridge/br_forward.c
> @@ -32,7 +32,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
>  
>  int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
>  {
> -	skb_push(skb, ETH_HLEN);
> +	skb_push(skb, skb->mac_len);
>  	if (!is_skb_forwardable(skb->dev, skb))
>  		goto drop;
>  
> @@ -94,7 +94,7 @@ static void __br_forward(const struct net_bridge_port *to,
>  		net = dev_net(indev);
>  	} else {
>  		if (unlikely(netpoll_tx_running(to->br->dev))) {
> -			skb_push(skb, ETH_HLEN);
> +			skb_push(skb, skb->mac_len);
>  			if (!is_skb_forwardable(skb->dev, skb))
>  				kfree_skb(skb);
>  			else
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 021cc9f66804..88244c9cc653 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -466,13 +466,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
>  		/* Tagged frame */
>  		if (skb->vlan_proto != br->vlan_proto) {
>  			/* Protocol-mismatch, empty out vlan_tci for new tag */
> -			skb_push(skb, ETH_HLEN);
> +			skb_push(skb, skb->mac_len);
>  			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
>  							skb_vlan_tag_get(skb));
>  			if (unlikely(!skb))
>  				return false;
>  
>  			skb_pull(skb, ETH_HLEN);
> +			skb_reset_network_header(skb);
>  			skb_reset_mac_len(skb);
>  			*vid = 0;
>  			tagged = false;
> 


^ permalink raw reply

* Re: [PATCH -next] iwlwifi: dbg: work around clang bug by marking debug strings static
From: Joe Perches @ 2019-08-07  9:18 UTC (permalink / raw)
  To: Nathan Chancellor, Nick Desaulniers
  Cc: Johannes Berg, Michael Ellerman, Kalle Valo, Luca Coelho,
	Arnd Bergmann, Emmanuel Grumbach, Intel Linux Wireless,
	David S. Miller, Shahar S Matityahu, Sara Sharon, linux-wireless,
	netdev, LKML, clang-built-linux
In-Reply-To: <20190807051516.GA117639@archlinux-threadripper>

On Tue, 2019-08-06 at 22:15 -0700, Nathan Chancellor wrote:
> Just for everyone else (since I commented on our issue tracker), this is
> now fixed in Linus's tree as of commit  1f6607250331 ("iwlwifi: dbg_ini:
> fix compile time assert build errors").

I think this change is incomplete and suggest you add this
to remove the use of another const char * format.

---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 4d81776f576d..6b15e2e8cd37 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2593,20 +2593,20 @@ static void iwl_fw_dbg_update_regions(struct iwl_fw_runtime *fwrt,
 {
 	void *iter = (void *)tlv->region_config;
 	int i, size = le32_to_cpu(tlv->num_regions);
-	const char *err_st =
-		"WRT: ext=%d. Invalid region %s %d for apply point %d\n";
 
 	for (i = 0; i < size; i++) {
 		struct iwl_fw_ini_region_cfg *reg = iter, **active;
 		int id = le32_to_cpu(reg->region_id);
 		u32 type = le32_to_cpu(reg->region_type);
 
-		if (WARN(id >= ARRAY_SIZE(fwrt->dump.active_regs), err_st, ext,
-			 "id", id, pnt))
+		if (WARN(id >= ARRAY_SIZE(fwrt->dump.active_regs),
+			 "WRT: ext=%d. Invalid region id %d for apply point %d\n",
+			 ext, id, pnt))
 			break;
 
-		if (WARN(type == 0 || type >= IWL_FW_INI_REGION_NUM, err_st,
-			 ext, "type", type, pnt))
+		if (WARN(type == 0 || type >= IWL_FW_INI_REGION_NUM,
+			 "WRT: ext=%d. Invalid region type %d for apply point %d\n",
+			 ext, type, pnt))
 			break;
 
 		active = &fwrt->dump.active_regs[id];



^ permalink raw reply related

* [PATCH net-next v5 0/6] net: mscc: PTP Hardware Clock (PHC) support
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen

Hello,

This series introduces the PTP Hardware Clock (PHC) support to the Mscc
Ocelot switch driver. In order to make use of this, a new register bank
is added and described in the device tree, as well as a new interrupt.
The use this bank and interrupt was made optional in the driver for dt
compatibility reasons.

Thanks!
Antoine

Since v4:
  - Added SKBTX_IN_PROGRESS.
  - Fixed two xmas trees.
  - Rework the loop condition in ocelot_ptp_rdy_irq_handler.

Since v3:
  - Fixed a spin_unlock_irqrestore issue.

Since v2:
  - Prevented from a possible infinite loop when reading the h/w
    timestamps.
  - s/GFP_KERNEL/GFP_ATOMIC/ in the Tx path.
  - Set rx_filter to HWTSTAMP_FILTER_PTP_V2_EVENT at probe.
  - Fixed s/w timestamping dependencies.
  - Added Paul Burton's Acked-by on patches 2 and 4.

Since v1:
  - Used list_for_each_safe() in ocelot_deinit().
  - Fixed a memory leak in ocelot_deinit() by calling
    dev_kfree_skb_any().
  - Fixed a locking issue in get_hwtimestamp().
  - Handled the NULL case of ptp_clock_register().
  - Added comments on optional dt properties.

Antoine Tenart (6):
  Documentation/bindings: net: ocelot: document the PTP bank
  Documentation/bindings: net: ocelot: document the PTP ready IRQ
  net: mscc: describe the PTP register range
  net: mscc: improve the frame header parsing readability
  net: mscc: remove the frame_info cpuq member
  net: mscc: PTP Hardware Clock (PHC) support

 .../devicetree/bindings/net/mscc-ocelot.txt   |  20 +-
 drivers/net/ethernet/mscc/ocelot.c            | 396 +++++++++++++++++-
 drivers/net/ethernet/mscc/ocelot.h            |  49 ++-
 drivers/net/ethernet/mscc/ocelot_board.c      | 146 ++++++-
 drivers/net/ethernet/mscc/ocelot_ptp.h        |  41 ++
 drivers/net/ethernet/mscc/ocelot_regs.c       |  11 +
 6 files changed, 629 insertions(+), 34 deletions(-)
 create mode 100644 drivers/net/ethernet/mscc/ocelot_ptp.h

-- 
2.21.0


^ permalink raw reply

* [PATCH net-next v5 2/6] Documentation/bindings: net: ocelot: document the PTP ready IRQ
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>

One additional interrupt needs to be described within the Ocelot device
tree node: the PTP ready one. This patch documents the binding needed to
do so.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 Documentation/devicetree/bindings/net/mscc-ocelot.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/mscc-ocelot.txt b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
index 4d05a3b0f786..3b6290b45ce5 100644
--- a/Documentation/devicetree/bindings/net/mscc-ocelot.txt
+++ b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
@@ -17,9 +17,10 @@ Required properties:
   - "ana"
   - "portX" with X from 0 to the number of last port index available on that
     switch
-- interrupts: Should contain the switch interrupts for frame extraction and
-  frame injection
-- interrupt-names: should contain the interrupt names: "xtr", "inj"
+- interrupts: Should contain the switch interrupts for frame extraction,
+  frame injection and PTP ready.
+- interrupt-names: should contain the interrupt names: "xtr", "inj". Can contain
+  "ptp_rdy" which is optional due to backward compatibility.
 - ethernet-ports: A container for child nodes representing switch ports.
 
 The ethernet-ports container has the following properties
@@ -63,8 +64,8 @@ Example:
 			    "port2", "port3", "port4", "port5", "port6",
 			    "port7", "port8", "port9", "port10", "qsys",
 			    "ana";
-		interrupts = <21 22>;
-		interrupt-names = "xtr", "inj";
+		interrupts = <18 21 22>;
+		interrupt-names = "ptp_rdy", "xtr", "inj";
 
 		ethernet-ports {
 			#address-cells = <1>;
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v5 3/6] net: mscc: describe the PTP register range
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>

This patch adds support for using the PTP register range, and adds a
description of its registers. This bank is used when configuring PTP.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/mscc/ocelot.h       |  9 ++++++
 drivers/net/ethernet/mscc/ocelot_board.c | 10 +++++-
 drivers/net/ethernet/mscc/ocelot_ptp.h   | 41 ++++++++++++++++++++++++
 drivers/net/ethernet/mscc/ocelot_regs.c  | 11 +++++++
 4 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mscc/ocelot_ptp.h

diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index f7eeb4806897..e0da8b4eddf2 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -23,6 +23,7 @@
 #include "ocelot_sys.h"
 #include "ocelot_qs.h"
 #include "ocelot_tc.h"
+#include "ocelot_ptp.h"
 
 #define PGID_AGGR    64
 #define PGID_SRC     80
@@ -71,6 +72,7 @@ enum ocelot_target {
 	SYS,
 	S2,
 	HSIO,
+	PTP,
 	TARGET_MAX,
 };
 
@@ -343,6 +345,13 @@ enum ocelot_reg {
 	S2_CACHE_ACTION_DAT,
 	S2_CACHE_CNT_DAT,
 	S2_CACHE_TG_DAT,
+	PTP_PIN_CFG = PTP << TARGET_OFFSET,
+	PTP_PIN_TOD_SEC_MSB,
+	PTP_PIN_TOD_SEC_LSB,
+	PTP_PIN_TOD_NSEC,
+	PTP_CFG_MISC,
+	PTP_CLK_CFG_ADJ_CFG,
+	PTP_CLK_CFG_ADJ_FREQ,
 };
 
 enum ocelot_regfield {
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 2451d4a96490..990027f04d1b 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -182,6 +182,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 	struct {
 		enum ocelot_target id;
 		char *name;
+		u8 optional:1;
 	} res[] = {
 		{ SYS, "sys" },
 		{ REW, "rew" },
@@ -189,6 +190,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 		{ ANA, "ana" },
 		{ QS, "qs" },
 		{ S2, "s2" },
+		{ PTP, "ptp", 1 },
 	};
 
 	if (!np && !pdev->dev.platform_data)
@@ -205,8 +207,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 		struct regmap *target;
 
 		target = ocelot_io_platform_init(ocelot, pdev, res[i].name);
-		if (IS_ERR(target))
+		if (IS_ERR(target)) {
+			if (res[i].optional) {
+				ocelot->targets[res[i].id] = NULL;
+				continue;
+			}
+
 			return PTR_ERR(target);
+		}
 
 		ocelot->targets[res[i].id] = target;
 	}
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.h b/drivers/net/ethernet/mscc/ocelot_ptp.h
new file mode 100644
index 000000000000..9ede14a12573
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * License: Dual MIT/GPL
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_PTP_H_
+#define _MSCC_OCELOT_PTP_H_
+
+#define PTP_PIN_CFG_RSZ			0x20
+#define PTP_PIN_TOD_SEC_MSB_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_SEC_LSB_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_NSEC_RSZ		PTP_PIN_CFG_RSZ
+
+#define PTP_PIN_CFG_DOM			BIT(0)
+#define PTP_PIN_CFG_SYNC		BIT(2)
+#define PTP_PIN_CFG_ACTION(x)		((x) << 3)
+#define PTP_PIN_CFG_ACTION_MASK		PTP_PIN_CFG_ACTION(0x7)
+
+enum {
+	PTP_PIN_ACTION_IDLE = 0,
+	PTP_PIN_ACTION_LOAD,
+	PTP_PIN_ACTION_SAVE,
+	PTP_PIN_ACTION_CLOCK,
+	PTP_PIN_ACTION_DELTA,
+	PTP_PIN_ACTION_NOSYNC,
+	PTP_PIN_ACTION_SYNC,
+};
+
+#define PTP_CFG_MISC_PTP_EN		BIT(2)
+
+#define PSEC_PER_SEC			1000000000000LL
+
+#define PTP_CFG_CLK_ADJ_CFG_ENA		BIT(0)
+#define PTP_CFG_CLK_ADJ_CFG_DIR		BIT(1)
+
+#define PTP_CFG_CLK_ADJ_FREQ_NS		BIT(30)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c
index 6c387f994ec5..e59977d20400 100644
--- a/drivers/net/ethernet/mscc/ocelot_regs.c
+++ b/drivers/net/ethernet/mscc/ocelot_regs.c
@@ -234,6 +234,16 @@ static const u32 ocelot_s2_regmap[] = {
 	REG(S2_CACHE_TG_DAT,               0x000388),
 };
 
+static const u32 ocelot_ptp_regmap[] = {
+	REG(PTP_PIN_CFG,                   0x000000),
+	REG(PTP_PIN_TOD_SEC_MSB,           0x000004),
+	REG(PTP_PIN_TOD_SEC_LSB,           0x000008),
+	REG(PTP_PIN_TOD_NSEC,              0x00000c),
+	REG(PTP_CFG_MISC,                  0x0000a0),
+	REG(PTP_CLK_CFG_ADJ_CFG,           0x0000a4),
+	REG(PTP_CLK_CFG_ADJ_FREQ,          0x0000a8),
+};
+
 static const u32 *ocelot_regmap[] = {
 	[ANA] = ocelot_ana_regmap,
 	[QS] = ocelot_qs_regmap,
@@ -241,6 +251,7 @@ static const u32 *ocelot_regmap[] = {
 	[REW] = ocelot_rew_regmap,
 	[SYS] = ocelot_sys_regmap,
 	[S2] = ocelot_s2_regmap,
+	[PTP] = ocelot_ptp_regmap,
 };
 
 static const struct reg_field ocelot_regfields[] = {
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v5 4/6] net: mscc: improve the frame header parsing readability
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>

This cosmetic patch improves the frame header parsing readability by
introducing a new macro to access and mask its fields.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/mscc/ocelot_board.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 990027f04d1b..5e4f1718dd99 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -16,24 +16,26 @@
 
 #include "ocelot.h"
 
-static int ocelot_parse_ifh(u32 *ifh, struct frame_info *info)
+#define IFH_EXTRACT_BITFIELD64(x, o, w) (((x) >> (o)) & GENMASK_ULL((w) - 1, 0))
+
+static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
 {
-	int i;
 	u8 llen, wlen;
+	u64 ifh[2];
+
+	ifh[0] = be64_to_cpu(((__force __be64 *)_ifh)[0]);
+	ifh[1] = be64_to_cpu(((__force __be64 *)_ifh)[1]);
 
-	/* The IFH is in network order, switch to CPU order */
-	for (i = 0; i < IFH_LEN; i++)
-		ifh[i] = ntohl((__force __be32)ifh[i]);
+	wlen = IFH_EXTRACT_BITFIELD64(ifh[0], 7,  8);
+	llen = IFH_EXTRACT_BITFIELD64(ifh[0], 15,  6);
 
-	wlen = (ifh[1] >> 7) & 0xff;
-	llen = (ifh[1] >> 15) & 0x3f;
 	info->len = OCELOT_BUFFER_CELL_SZ * wlen + llen - 80;
 
-	info->port = (ifh[2] & GENMASK(14, 11)) >> 11;
+	info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
 
-	info->cpuq = (ifh[3] & GENMASK(27, 20)) >> 20;
-	info->tag_type = (ifh[3] & BIT(16)) >> 16;
-	info->vid = ifh[3] & GENMASK(11, 0);
+	info->cpuq = IFH_EXTRACT_BITFIELD64(ifh[1], 20, 8);
+	info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16,  1);
+	info->vid = IFH_EXTRACT_BITFIELD64(ifh[1], 0,  12);
 
 	return 0;
 }
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v5 1/6] Documentation/bindings: net: ocelot: document the PTP bank
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>

One additional register range needs to be described within the Ocelot
device tree node: the PTP. This patch documents the binding needed to do
so.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 Documentation/devicetree/bindings/net/mscc-ocelot.txt | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/mscc-ocelot.txt b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
index 9e5c17d426ce..4d05a3b0f786 100644
--- a/Documentation/devicetree/bindings/net/mscc-ocelot.txt
+++ b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
@@ -12,6 +12,7 @@ Required properties:
   - "sys"
   - "rew"
   - "qs"
+  - "ptp" (optional due to backward compatibility)
   - "qsys"
   - "ana"
   - "portX" with X from 0 to the number of last port index available on that
@@ -44,6 +45,7 @@ Example:
 		reg = <0x1010000 0x10000>,
 		      <0x1030000 0x10000>,
 		      <0x1080000 0x100>,
+		      <0x10e0000 0x10000>,
 		      <0x11e0000 0x100>,
 		      <0x11f0000 0x100>,
 		      <0x1200000 0x100>,
@@ -57,9 +59,10 @@ Example:
 		      <0x1280000 0x100>,
 		      <0x1800000 0x80000>,
 		      <0x1880000 0x10000>;
-		reg-names = "sys", "rew", "qs", "port0", "port1", "port2",
-			    "port3", "port4", "port5", "port6", "port7",
-			    "port8", "port9", "port10", "qsys", "ana";
+		reg-names = "sys", "rew", "qs", "ptp", "port0", "port1",
+			    "port2", "port3", "port4", "port5", "port6",
+			    "port7", "port8", "port9", "port10", "qsys",
+			    "ana";
 		interrupts = <21 22>;
 		interrupt-names = "xtr", "inj";
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v5 5/6] net: mscc: remove the frame_info cpuq member
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>

In struct frame_info, the cpuq member is never used. This cosmetic patch
removes it from the structure, and from the parsing of the frame header
as it's only set but never used.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/mscc/ocelot.h       | 1 -
 drivers/net/ethernet/mscc/ocelot_board.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index e0da8b4eddf2..515dee6fa8a6 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -45,7 +45,6 @@ struct frame_info {
 	u32 len;
 	u16 port;
 	u16 vid;
-	u8 cpuq;
 	u8 tag_type;
 };
 
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 5e4f1718dd99..df8d15994a89 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -33,7 +33,6 @@ static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
 
 	info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
 
-	info->cpuq = IFH_EXTRACT_BITFIELD64(ifh[1], 20, 8);
 	info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16,  1);
 	info->vid = IFH_EXTRACT_BITFIELD64(ifh[1], 0,  12);
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v5 6/6] net: mscc: PTP Hardware Clock (PHC) support
From: Antoine Tenart @ 2019-08-07  9:22 UTC (permalink / raw)
  To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
  Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>

This patch adds support for PTP Hardware Clock (PHC) to the Ocelot
switch for both PTP 1-step and 2-step modes.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/mscc/ocelot.c       | 396 ++++++++++++++++++++++-
 drivers/net/ethernet/mscc/ocelot.h       |  39 +++
 drivers/net/ethernet/mscc/ocelot_board.c | 113 ++++++-
 3 files changed, 535 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 6932e615d4b0..0b74e4231f46 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
 #include <linux/iopoll.h>
 #include <net/arp.h>
@@ -538,7 +539,7 @@ static int ocelot_port_stop(struct net_device *dev)
  */
 static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info)
 {
-	ifh[0] = IFH_INJ_BYPASS;
+	ifh[0] = IFH_INJ_BYPASS | ((0x1ff & info->rew_op) << 21);
 	ifh[1] = (0xf00 & info->port) >> 8;
 	ifh[2] = (0xff & info->port) << 24;
 	ifh[3] = (info->tag_type << 16) | info->vid;
@@ -548,6 +549,7 @@ static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info)
 
 static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct ocelot_port *port = netdev_priv(dev);
 	struct ocelot *ocelot = port->ocelot;
 	u32 val, ifh[IFH_LEN];
@@ -566,6 +568,14 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 	info.port = BIT(port->chip_port);
 	info.tag_type = IFH_TAG_TYPE_C;
 	info.vid = skb_vlan_tag_get(skb);
+
+	/* Check if timestamping is needed */
+	if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) {
+		info.rew_op = port->ptp_cmd;
+		if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+			info.rew_op |= (port->ts_id  % 4) << 3;
+	}
+
 	ocelot_gen_ifh(ifh, &info);
 
 	for (i = 0; i < IFH_LEN; i++)
@@ -596,11 +606,53 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
-	dev_kfree_skb_any(skb);
+
+	if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP &&
+	    port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+		struct ocelot_skb *oskb =
+			kzalloc(sizeof(struct ocelot_skb), GFP_ATOMIC);
+
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+		oskb->skb = skb;
+		oskb->id = port->ts_id % 4;
+		port->ts_id++;
+
+		list_add_tail(&oskb->head, &port->skbs);
+	} else {
+		dev_kfree_skb_any(skb);
+	}
 
 	return NETDEV_TX_OK;
 }
 
+void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	/* Read current PTP time to get seconds */
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+	ts->tv_sec = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+
+	/* Read packet HW timestamp from FIFO */
+	val = ocelot_read(ocelot, SYS_PTP_TXSTAMP);
+	ts->tv_nsec = SYS_PTP_TXSTAMP_PTP_TXSTAMP(val);
+
+	/* Sec has incremented since the ts was registered */
+	if ((ts->tv_sec & 0x1) != !!(val & SYS_PTP_TXSTAMP_PTP_TXSTAMP_SEC))
+		ts->tv_sec--;
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+}
+EXPORT_SYMBOL(ocelot_get_hwtimestamp);
+
 static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr)
 {
 	struct ocelot_port *port = netdev_priv(dev);
@@ -917,6 +969,97 @@ static int ocelot_get_port_parent_id(struct net_device *dev,
 	return 0;
 }
 
+static int ocelot_hwstamp_get(struct ocelot_port *port, struct ifreq *ifr)
+{
+	struct ocelot *ocelot = port->ocelot;
+
+	return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
+			    sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int ocelot_hwstamp_set(struct ocelot_port *port, struct ifreq *ifr)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct hwtstamp_config cfg;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (cfg.flags)
+		return -EINVAL;
+
+	/* Tx type sanity check */
+	switch (cfg.tx_type) {
+	case HWTSTAMP_TX_ON:
+		port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
+		break;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		/* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we
+		 * need to update the origin time.
+		 */
+		port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP;
+		break;
+	case HWTSTAMP_TX_OFF:
+		port->ptp_cmd = 0;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	mutex_lock(&ocelot->ptp_lock);
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	default:
+		mutex_unlock(&ocelot->ptp_lock);
+		return -ERANGE;
+	}
+
+	/* Commit back the result & save it */
+	memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
+	mutex_unlock(&ocelot->ptp_lock);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct ocelot_port *port = netdev_priv(dev);
+	struct ocelot *ocelot = port->ocelot;
+
+	/* The function is only used for PTP operations for now */
+	if (!ocelot->ptp)
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return ocelot_hwstamp_set(port, ifr);
+	case SIOCGHWTSTAMP:
+		return ocelot_hwstamp_get(port, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops ocelot_port_netdev_ops = {
 	.ndo_open			= ocelot_port_open,
 	.ndo_stop			= ocelot_port_stop,
@@ -933,6 +1076,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
 	.ndo_set_features		= ocelot_set_features,
 	.ndo_get_port_parent_id		= ocelot_get_port_parent_id,
 	.ndo_setup_tc			= ocelot_setup_tc,
+	.ndo_do_ioctl			= ocelot_ioctl,
 };
 
 static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -1014,12 +1158,37 @@ static int ocelot_get_sset_count(struct net_device *dev, int sset)
 	return ocelot->num_stats;
 }
 
+static int ocelot_get_ts_info(struct net_device *dev,
+			      struct ethtool_ts_info *info)
+{
+	struct ocelot_port *ocelot_port = netdev_priv(dev);
+	struct ocelot *ocelot = ocelot_port->ocelot;
+
+	if (!ocelot->ptp)
+		return ethtool_op_get_ts_info(dev, info);
+
+	info->phc_index = ocelot->ptp_clock ?
+			  ptp_clock_index(ocelot->ptp_clock) : -1;
+	info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+				 SOF_TIMESTAMPING_RX_SOFTWARE |
+				 SOF_TIMESTAMPING_SOFTWARE |
+				 SOF_TIMESTAMPING_TX_HARDWARE |
+				 SOF_TIMESTAMPING_RX_HARDWARE |
+				 SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
+			 BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 static const struct ethtool_ops ocelot_ethtool_ops = {
 	.get_strings		= ocelot_get_strings,
 	.get_ethtool_stats	= ocelot_get_ethtool_stats,
 	.get_sset_count		= ocelot_get_sset_count,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_ts_info		= ocelot_get_ts_info,
 };
 
 static int ocelot_port_attr_stp_state_set(struct ocelot_port *ocelot_port,
@@ -1629,6 +1798,196 @@ struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = {
 };
 EXPORT_SYMBOL(ocelot_switchdev_blocking_nb);
 
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	unsigned long flags;
+	time64_t s;
+	u32 val;
+	s64 ns;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	s = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN) & 0xffff;
+	s <<= 32;
+	s += ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+	ns = ocelot_read_rix(ocelot, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+
+	/* Deal with negative values */
+	if (ns >= 0x3ffffff0 && ns <= 0x3fffffff) {
+		s--;
+		ns &= 0xf;
+		ns += 999999984;
+	}
+
+	set_normalized_timespec64(ts, s, ns);
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_gettime64);
+
+static int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	ocelot_write_rix(ocelot, lower_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_LSB,
+			 TOD_ACC_PIN);
+	ocelot_write_rix(ocelot, upper_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_MSB,
+			 TOD_ACC_PIN);
+	ocelot_write_rix(ocelot, ts->tv_nsec, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_LOAD);
+
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+}
+
+static int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
+		struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+		unsigned long flags;
+		u32 val;
+
+		spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN);
+		ocelot_write_rix(ocelot, delta, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_DELTA);
+
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+		spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	} else {
+		/* Fall back using ocelot_ptp_settime64 which is not exact. */
+		struct timespec64 ts;
+		u64 now;
+
+		ocelot_ptp_gettime64(ptp, &ts);
+
+		now = ktime_to_ns(timespec64_to_ktime(ts));
+		ts = ns_to_timespec64(now + delta);
+
+		ocelot_ptp_settime64(ptp, &ts);
+	}
+	return 0;
+}
+
+static int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	u32 unit = 0, direction = 0;
+	unsigned long flags;
+	u64 adj = 0;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	if (!scaled_ppm)
+		goto disable_adj;
+
+	if (scaled_ppm < 0) {
+		direction = PTP_CFG_CLK_ADJ_CFG_DIR;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	adj = PSEC_PER_SEC << 16;
+	do_div(adj, scaled_ppm);
+	do_div(adj, 1000);
+
+	/* If the adjustment value is too large, use ns instead */
+	if (adj >= (1L << 30)) {
+		unit = PTP_CFG_CLK_ADJ_FREQ_NS;
+		do_div(adj, 1000);
+	}
+
+	/* Still too big */
+	if (adj >= (1L << 30))
+		goto disable_adj;
+
+	ocelot_write(ocelot, unit | adj, PTP_CLK_CFG_ADJ_FREQ);
+	ocelot_write(ocelot, PTP_CFG_CLK_ADJ_CFG_ENA | direction,
+		     PTP_CLK_CFG_ADJ_CFG);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+
+disable_adj:
+	ocelot_write(ocelot, 0, PTP_CLK_CFG_ADJ_CFG);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+}
+
+static struct ptp_clock_info ocelot_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "ocelot ptp",
+	.max_adj	= 0x7fffffff,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.gettime64	= ocelot_ptp_gettime64,
+	.settime64	= ocelot_ptp_settime64,
+	.adjtime	= ocelot_ptp_adjtime,
+	.adjfine	= ocelot_ptp_adjfine,
+};
+
+static int ocelot_init_timestamp(struct ocelot *ocelot)
+{
+	ocelot->ptp_info = ocelot_ptp_clock_info;
+	ocelot->ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
+	if (IS_ERR(ocelot->ptp_clock))
+		return PTR_ERR(ocelot->ptp_clock);
+	/* Check if PHC support is missing at the configuration level */
+	if (!ocelot->ptp_clock)
+		return 0;
+
+	ocelot_write(ocelot, SYS_PTP_CFG_PTP_STAMP_WID(30), SYS_PTP_CFG);
+	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_LOW);
+	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_HIGH);
+
+	ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC);
+
+	/* There is no device reconfiguration, PTP Rx stamping is always
+	 * enabled.
+	 */
+	ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+	return 0;
+}
+
 int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 		      void __iomem *regs,
 		      struct phy_device *phy)
@@ -1661,6 +2020,8 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 	ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid,
 			  ENTRYTYPE_LOCKED);
 
+	INIT_LIST_HEAD(&ocelot_port->skbs);
+
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(ocelot->dev, "register_netdev failed\n");
@@ -1684,7 +2045,7 @@ EXPORT_SYMBOL(ocelot_probe_port);
 int ocelot_init(struct ocelot *ocelot)
 {
 	u32 port;
-	int i, cpu = ocelot->num_phys_ports;
+	int i, ret, cpu = ocelot->num_phys_ports;
 	char queue_name[32];
 
 	ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
@@ -1699,6 +2060,8 @@ int ocelot_init(struct ocelot *ocelot)
 		return -ENOMEM;
 
 	mutex_init(&ocelot->stats_lock);
+	mutex_init(&ocelot->ptp_lock);
+	spin_lock_init(&ocelot->ptp_clock_lock);
 	snprintf(queue_name, sizeof(queue_name), "%s-stats",
 		 dev_name(ocelot->dev));
 	ocelot->stats_queue = create_singlethread_workqueue(queue_name);
@@ -1812,16 +2175,43 @@ int ocelot_init(struct ocelot *ocelot)
 	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
+
+	if (ocelot->ptp) {
+		ret = ocelot_init_timestamp(ocelot);
+		if (ret) {
+			dev_err(ocelot->dev,
+				"Timestamp initialization failed\n");
+			return ret;
+		}
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(ocelot_init);
 
 void ocelot_deinit(struct ocelot *ocelot)
 {
+	struct list_head *pos, *tmp;
+	struct ocelot_port *port;
+	struct ocelot_skb *entry;
+	int i;
+
 	cancel_delayed_work(&ocelot->stats_work);
 	destroy_workqueue(ocelot->stats_queue);
 	mutex_destroy(&ocelot->stats_lock);
 	ocelot_ace_deinit();
+
+	for (i = 0; i < ocelot->num_phys_ports; i++) {
+		port = ocelot->ports[i];
+
+		list_for_each_safe(pos, tmp, &port->skbs) {
+			entry = list_entry(pos, struct ocelot_skb, head);
+
+			list_del(pos);
+			dev_kfree_skb_any(entry->skb);
+			kfree(entry);
+		}
+	}
 }
 EXPORT_SYMBOL(ocelot_deinit);
 
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 515dee6fa8a6..e40773c01a44 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -11,9 +11,11 @@
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
 #include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
 #include <linux/phy.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/regmap.h>
 
 #include "ocelot_ana.h"
@@ -39,6 +41,8 @@
 
 #define OCELOT_STATS_CHECK_DELAY (2 * HZ)
 
+#define OCELOT_PTP_QUEUE_SZ	128
+
 #define IFH_LEN 4
 
 struct frame_info {
@@ -46,6 +50,8 @@ struct frame_info {
 	u16 port;
 	u16 vid;
 	u8 tag_type;
+	u16 rew_op;
+	u32 timestamp;	/* rew_val */
 };
 
 #define IFH_INJ_BYPASS	BIT(31)
@@ -54,6 +60,12 @@ struct frame_info {
 #define IFH_TAG_TYPE_C 0
 #define IFH_TAG_TYPE_S 1
 
+#define IFH_REW_OP_NOOP			0x0
+#define IFH_REW_OP_DSCP			0x1
+#define IFH_REW_OP_ONE_STEP_PTP		0x2
+#define IFH_REW_OP_TWO_STEP_PTP		0x3
+#define IFH_REW_OP_ORIGIN_PTP		0x5
+
 #define OCELOT_SPEED_2500 0
 #define OCELOT_SPEED_1000 1
 #define OCELOT_SPEED_100  2
@@ -401,6 +413,13 @@ enum ocelot_regfield {
 	REGFIELD_MAX
 };
 
+enum ocelot_clk_pins {
+	ALT_PPS_PIN	= 1,
+	EXT_CLK_PIN,
+	ALT_LDST_PIN,
+	TOD_ACC_PIN
+};
+
 struct ocelot_multicast {
 	struct list_head list;
 	unsigned char addr[ETH_ALEN];
@@ -450,6 +469,13 @@ struct ocelot {
 	u64 *stats;
 	struct delayed_work stats_work;
 	struct workqueue_struct *stats_queue;
+
+	u8 ptp:1;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_info;
+	struct hwtstamp_config hwtstamp_config;
+	struct mutex ptp_lock; /* Protects the PTP interface state */
+	spinlock_t ptp_clock_lock; /* Protects the PTP clock */
 };
 
 struct ocelot_port {
@@ -473,6 +499,16 @@ struct ocelot_port {
 	struct phy *serdes;
 
 	struct ocelot_port_tc tc;
+
+	u8 ptp_cmd;
+	struct list_head skbs;
+	u8 ts_id;
+};
+
+struct ocelot_skb {
+	struct list_head head;
+	struct sk_buff *skb;
+	u8 id;
 };
 
 u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
@@ -517,4 +553,7 @@ extern struct notifier_block ocelot_netdevice_nb;
 extern struct notifier_block ocelot_switchdev_nb;
 extern struct notifier_block ocelot_switchdev_blocking_nb;
 
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
+void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts);
+
 #endif
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index df8d15994a89..446a4dae6078 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -31,6 +31,8 @@ static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
 
 	info->len = OCELOT_BUFFER_CELL_SZ * wlen + llen - 80;
 
+	info->timestamp = IFH_EXTRACT_BITFIELD64(ifh[0], 21, 32);
+
 	info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
 
 	info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16,  1);
@@ -92,13 +94,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	do {
-		struct sk_buff *skb;
+		struct skb_shared_hwtstamps *shhwtstamps;
+		u64 tod_in_ns, full_ts_in_ns;
+		struct frame_info info = {};
 		struct net_device *dev;
-		u32 *buf;
+		u32 ifh[4], val, *buf;
+		struct timespec64 ts;
 		int sz, len, buf_len;
-		u32 ifh[4];
-		u32 val;
-		struct frame_info info;
+		struct sk_buff *skb;
 
 		for (i = 0; i < IFH_LEN; i++) {
 			err = ocelot_rx_frame_word(ocelot, grp, true, &ifh[i]);
@@ -145,6 +148,22 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 			break;
 		}
 
+		if (ocelot->ptp) {
+			ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
+
+			tod_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec);
+			if ((tod_in_ns & 0xffffffff) < info.timestamp)
+				full_ts_in_ns = (((tod_in_ns >> 32) - 1) << 32) |
+						info.timestamp;
+			else
+				full_ts_in_ns = (tod_in_ns & GENMASK_ULL(63, 32)) |
+						info.timestamp;
+
+			shhwtstamps = skb_hwtstamps(skb);
+			memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+			shhwtstamps->hwtstamp = full_ts_in_ns;
+		}
+
 		/* Everything we see on an interface that is in the HW bridge
 		 * has already been forwarded.
 		 */
@@ -164,6 +183,66 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t ocelot_ptp_rdy_irq_handler(int irq, void *arg)
+{
+	int budget = OCELOT_PTP_QUEUE_SZ;
+	struct ocelot *ocelot = arg;
+
+	while (budget--) {
+		struct skb_shared_hwtstamps shhwtstamps;
+		struct list_head *pos, *tmp;
+		struct sk_buff *skb = NULL;
+		struct ocelot_skb *entry;
+		struct ocelot_port *port;
+		struct timespec64 ts;
+		u32 val, id, txport;
+
+		val = ocelot_read(ocelot, SYS_PTP_STATUS);
+
+		/* Check if a timestamp can be retrieved */
+		if (!(val & SYS_PTP_STATUS_PTP_MESS_VLD))
+			break;
+
+		WARN_ON(val & SYS_PTP_STATUS_PTP_OVFL);
+
+		/* Retrieve the ts ID and Tx port */
+		id = SYS_PTP_STATUS_PTP_MESS_ID_X(val);
+		txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val);
+
+		/* Retrieve its associated skb */
+		port = ocelot->ports[txport];
+
+		list_for_each_safe(pos, tmp, &port->skbs) {
+			entry = list_entry(pos, struct ocelot_skb, head);
+			if (entry->id != id)
+				continue;
+
+			skb = entry->skb;
+
+			list_del(pos);
+			kfree(entry);
+		}
+
+		/* Next ts */
+		ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
+
+		if (unlikely(!skb))
+			continue;
+
+		/* Get the h/w timestamp */
+		ocelot_get_hwtimestamp(ocelot, &ts);
+
+		/* Set the timestamp into the skb */
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+		skb_tstamp_tx(skb, &shhwtstamps);
+
+		dev_kfree_skb_any(skb);
+	}
+
+	return IRQ_HANDLED;
+}
+
 static const struct of_device_id mscc_ocelot_match[] = {
 	{ .compatible = "mscc,vsc7514-switch" },
 	{ }
@@ -172,12 +251,12 @@ MODULE_DEVICE_TABLE(of, mscc_ocelot_match);
 
 static int mscc_ocelot_probe(struct platform_device *pdev)
 {
-	int err, irq;
-	unsigned int i;
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *ports, *portnp;
+	int err, irq_xtr, irq_ptp_rdy;
 	struct ocelot *ocelot;
 	struct regmap *hsio;
+	unsigned int i;
 	u32 val;
 
 	struct {
@@ -232,16 +311,30 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	irq = platform_get_irq_byname(pdev, "xtr");
-	if (irq < 0)
+	irq_xtr = platform_get_irq_byname(pdev, "xtr");
+	if (irq_xtr < 0)
 		return -ENODEV;
 
-	err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+	err = devm_request_threaded_irq(&pdev->dev, irq_xtr, NULL,
 					ocelot_xtr_irq_handler, IRQF_ONESHOT,
 					"frame extraction", ocelot);
 	if (err)
 		return err;
 
+	irq_ptp_rdy = platform_get_irq_byname(pdev, "ptp_rdy");
+	if (irq_ptp_rdy > 0) {
+		err = devm_request_threaded_irq(&pdev->dev, irq_ptp_rdy, NULL,
+						ocelot_ptp_rdy_irq_handler,
+						IRQF_ONESHOT, "ptp ready",
+						ocelot);
+		if (err)
+			return err;
+
+		/* Check if we can support PTP */
+		if (ocelot->targets[PTP])
+			ocelot->ptp = 1;
+	}
+
 	regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_INIT], 1);
 	regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_ENA], 1);
 
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH net-next v2] ibmveth: Allow users to update reported speed and duplex
From: Michael Ellerman @ 2019-08-07 10:09 UTC (permalink / raw)
  To: Thomas Falcon; +Cc: netdev, linuxppc-dev, Thomas Falcon
In-Reply-To: <1565108588-17331-1-git-send-email-tlfalcon@linux.ibm.com>

Thomas Falcon <tlfalcon@linux.ibm.com> writes:
> Reported ethtool link settings for the ibmveth driver are currently
> hardcoded and no longer reflect the actual capabilities of supported
> hardware. There is no interface designed for retrieving this information
> from device firmware nor is there any way to update current settings
> to reflect observed or expected link speeds.
>
> To avoid breaking existing configurations, retain current values as
> default settings but let users update them to match the expected
> capabilities of underlying hardware if needed. This update would
> allow the use of configurations that rely on certain link speed
> settings, such as LACP. This patch is based on the implementation
> in virtio_net.
>
> Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
> ---
> v2: Updated default driver speed/duplex settings to avoid
>     breaking existing setups

Thanks.

I won't give you an ack because I don't know jack about network drivers
these days, but I think that alleviates my concern about breaking
existing setups. I'll leave the rest of the review up to the networking
folks.

cheers

> diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
> index d654c23..5dc634f 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.c
> +++ b/drivers/net/ethernet/ibm/ibmveth.c
> @@ -712,31 +712,68 @@ static int ibmveth_close(struct net_device *netdev)
>  	return 0;
>  }
>  
> -static int netdev_get_link_ksettings(struct net_device *dev,
> -				     struct ethtool_link_ksettings *cmd)
> +static bool
> +ibmveth_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
>  {
> -	u32 supported, advertising;
> -
> -	supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
> -				SUPPORTED_FIBRE);
> -	advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg |
> -				ADVERTISED_FIBRE);
> -	cmd->base.speed = SPEED_1000;
> -	cmd->base.duplex = DUPLEX_FULL;
> -	cmd->base.port = PORT_FIBRE;
> -	cmd->base.phy_address = 0;
> -	cmd->base.autoneg = AUTONEG_ENABLE;
> -
> -	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
> -						supported);
> -	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
> -						advertising);
> +	struct ethtool_link_ksettings diff1 = *cmd;
> +	struct ethtool_link_ksettings diff2 = {};
> +
> +	diff2.base.port = PORT_OTHER;
> +	diff1.base.speed = 0;
> +	diff1.base.duplex = 0;
> +	diff1.base.cmd = 0;
> +	diff1.base.link_mode_masks_nwords = 0;
> +	ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
> +
> +	return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
> +		bitmap_empty(diff1.link_modes.supported,
> +			     __ETHTOOL_LINK_MODE_MASK_NBITS) &&
> +		bitmap_empty(diff1.link_modes.advertising,
> +			     __ETHTOOL_LINK_MODE_MASK_NBITS) &&
> +		bitmap_empty(diff1.link_modes.lp_advertising,
> +			     __ETHTOOL_LINK_MODE_MASK_NBITS);
> +}
> +
> +static int ibmveth_set_link_ksettings(struct net_device *dev,
> +				      const struct ethtool_link_ksettings *cmd)
> +{
> +	struct ibmveth_adapter *adapter = netdev_priv(dev);
> +	u32 speed;
> +	u8 duplex;
> +
> +	speed = cmd->base.speed;
> +	duplex = cmd->base.duplex;
> +	/* don't allow custom speed and duplex */
> +	if (!ethtool_validate_speed(speed) ||
> +	    !ethtool_validate_duplex(duplex) ||
> +	    !ibmveth_validate_ethtool_cmd(cmd))
> +		return -EINVAL;
> +	adapter->speed = speed;
> +	adapter->duplex = duplex;
>  
>  	return 0;
>  }
>  
> -static void netdev_get_drvinfo(struct net_device *dev,
> -			       struct ethtool_drvinfo *info)
> +static int ibmveth_get_link_ksettings(struct net_device *dev,
> +				      struct ethtool_link_ksettings *cmd)
> +{
> +	struct ibmveth_adapter *adapter = netdev_priv(dev);
> +
> +	cmd->base.speed = adapter->speed;
> +	cmd->base.duplex = adapter->duplex;
> +	cmd->base.port = PORT_OTHER;
> +
> +	return 0;
> +}
> +
> +static void ibmveth_init_link_settings(struct ibmveth_adapter *adapter)
> +{
> +	adapter->duplex = DUPLEX_FULL;
> +	adapter->speed = SPEED_1000;
> +}
> +
> +static void ibmveth_get_drvinfo(struct net_device *dev,
> +				struct ethtool_drvinfo *info)
>  {
>  	strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
>  	strlcpy(info->version, ibmveth_driver_version, sizeof(info->version));
> @@ -965,12 +1002,13 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev,
>  }
>  
>  static const struct ethtool_ops netdev_ethtool_ops = {
> -	.get_drvinfo		= netdev_get_drvinfo,
> +	.get_drvinfo		= ibmveth_get_drvinfo,
>  	.get_link		= ethtool_op_get_link,
>  	.get_strings		= ibmveth_get_strings,
>  	.get_sset_count		= ibmveth_get_sset_count,
>  	.get_ethtool_stats	= ibmveth_get_ethtool_stats,
> -	.get_link_ksettings	= netdev_get_link_ksettings,
> +	.get_link_ksettings	= ibmveth_get_link_ksettings,
> +	.set_link_ksettings	= ibmveth_set_link_ksettings
>  };
>  
>  static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
> @@ -1647,6 +1685,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
>  	adapter->netdev = netdev;
>  	adapter->mcastFilterSize = *mcastFilterSize_p;
>  	adapter->pool_config = 0;
> +	ibmveth_init_link_settings(adapter);
>  
>  	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
>  
> diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
> index 4e9bf34..db96c88 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.h
> +++ b/drivers/net/ethernet/ibm/ibmveth.h
> @@ -162,6 +162,9 @@ struct ibmveth_adapter {
>      u64 tx_send_failed;
>      u64 tx_large_packets;
>      u64 rx_large_packets;
> +    /* Ethtool settings */
> +    u8 duplex;
> +    u32 speed;
>  };
>  
>  /*
> -- 
> 1.8.3.1

^ permalink raw reply

* [PATCH net-next 00/10] drop_monitor: Capture dropped packets and metadata
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel

From: Ido Schimmel <idosch@mellanox.com>

So far drop monitor supported only one mode of operation in which a
summary of recent packet drops is periodically sent to user space as a
netlink event. The event only includes the drop location (program
counter) and number of drops in the last interval.

While this mode of operation allows one to understand if the system is
dropping packets, it is not sufficient if a more detailed analysis is
required. Both the packet itself and related metadata are missing.

This patchset extends drop monitor with another mode of operation where
the packet - potentially truncated - and metadata (e.g., drop location,
timestamp, netdev) are sent to user space as a netlink event. Thanks to
the extensible nature of netlink, more metadata can be added in the
future.

To avoid performing expensive operations in the context in which
kfree_skb() is called, the dropped skbs are cloned and queued on per-CPU
skb drop list. The list is then processed in process context (using a
workqueue), where the netlink messages are allocated, prepared and
finally sent to user space.

A follow-up patchset will integrate drop monitor with devlink and allow
the latter to call into drop monitor to report hardware drops. In the
future, XDP drops can be added as well, thereby making drop monitor the
go-to netlink channel for diagnosing all packet drops.

Example usage with patched dropwatch [1] can be found here [2]. Example
dissection of drop monitor netlink events with patched wireshark [3] can
be found here [4]. I will submit both changes upstream after the kernel
changes are accepted. Another change worth making is adding a dropmon
pseudo interface to libpcap, similar to the nflog interface [5]. This
will allow users to specifically listen on dropmon traffic instead of
capturing all netlink packets via the nlmon netdev.

Patches #1-#5 prepare the code towards the actual changes in later
patches.

Patch #6 adds another mode of operation to drop monitor in which the
dropped packet itself is notified to user space along with metadata.

Patch #7 allows users to truncate reported packets to a specific length,
in case only the headers are of interest. The original length of the
packet is added as metadata to the netlink notification.

Patch #8 allows user to query the current configuration of drop monitor
(e.g., alert mode, truncation length).

Patches #9-#10 allow users to tune the length of the per-CPU skb drop
list according to their needs.

Changes since RFC [6]:
* Limit the length of the per-CPU skb drop list and make it configurable
* Do not use the hysteresis timer in packet alert mode
* Introduce alert mode operations in a separate patch and only then
  introduce the new alert mode
* Use 'skb->skb_iif' instead of 'skb->dev' because the latter is inside
  a union with 'dev_scratch' and therefore not guaranteed to point to a
  valid netdev
* Return '-EBUSY' instead of '-EOPNOTSUPP' when trying to configure drop
  monitor while it is monitoring
* Did not change schedule_work() in favor of schedule_work_on() as I did
  not observe a change in number of tail drops

[1] https://github.com/idosch/dropwatch/tree/packet-mode
[2] https://gist.github.com/idosch/166b64384577174230fd2523866f6b1c#file-gistfile1-txt
[3] https://github.com/idosch/wireshark/tree/drop-monitor-v1
[4] https://gist.github.com/idosch/166b64384577174230fd2523866f6b1c#file-gistfile2-txt
[5] https://github.com/the-tcpdump-group/libpcap/blob/master/pcap-netfilter-linux.c
[6] https://patchwork.ozlabs.org/cover/1135226/

Ido Schimmel (10):
  drop_monitor: Split tracing enable / disable to different functions
  drop_monitor: Initialize timer and work item upon tracing enable
  drop_monitor: Reset per-CPU data before starting to trace
  drop_monitor: Require CAP_NET_ADMIN for drop monitor configuration
  drop_monitor: Add alert mode operations
  drop_monitor: Add packet alert mode
  drop_monitor: Allow truncation of dropped packets
  drop_monitor: Add a command to query current configuration
  drop_monitor: Make drop queue length configurable
  drop_monitor: Expose tail drop counter

 include/uapi/linux/net_dropmon.h |  50 +++
 net/core/drop_monitor.c          | 594 +++++++++++++++++++++++++++++--
 2 files changed, 607 insertions(+), 37 deletions(-)

-- 
2.21.0

^ permalink raw reply

* [PATCH net-next 01/10] drop_monitor: Split tracing enable / disable to different functions
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Subsequent patches will need to enable / disable tracing based on the
configured alerting mode.

Reduce the nesting level and prepare for the introduction of this
functionality by splitting the tracing enable / disable operations into
two different functions.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 net/core/drop_monitor.c | 79 ++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 4deb86f990f1..8b9b0b899ebc 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -241,11 +241,58 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 	rcu_read_unlock();
 }
 
+static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
+{
+	int rc;
+
+	if (!try_module_get(THIS_MODULE)) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+		return -ENODEV;
+	}
+
+	rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
+		goto err_module_put;
+	}
+
+	rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
+		goto err_unregister_trace;
+	}
+
+	return 0;
+
+err_unregister_trace:
+	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+err_module_put:
+	module_put(THIS_MODULE);
+	return rc;
+}
+
+static void net_dm_trace_off_set(void)
+{
+	struct dm_hw_stat_delta *new_stat, *temp;
+
+	unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
+	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+
+	tracepoint_synchronize_unregister();
+
+	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+		if (new_stat->dev == NULL) {
+			list_del_rcu(&new_stat->list);
+			kfree_rcu(new_stat, rcu);
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
 static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 {
 	int rc = 0;
-	struct dm_hw_stat_delta *new_stat = NULL;
-	struct dm_hw_stat_delta *temp;
 
 	if (state == trace_state) {
 		NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
@@ -254,34 +301,10 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 
 	switch (state) {
 	case TRACE_ON:
-		if (!try_module_get(THIS_MODULE)) {
-			NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
-			rc = -ENODEV;
-			break;
-		}
-
-		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
-		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
+		rc = net_dm_trace_on_set(extack);
 		break;
-
 	case TRACE_OFF:
-		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
-		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
-
-		tracepoint_synchronize_unregister();
-
-		/*
-		 * Clean the device list
-		 */
-		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
-			if (new_stat->dev == NULL) {
-				list_del_rcu(&new_stat->list);
-				kfree_rcu(new_stat, rcu);
-			}
-		}
-
-		module_put(THIS_MODULE);
-
+		net_dm_trace_off_set();
 		break;
 	default:
 		rc = 1;
-- 
2.21.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox