* [PATCH v5 01/13] net: phy: adin: add support for Analog Devices PHYs
From: Alexandru Ardelean @ 2019-08-16 13:09 UTC (permalink / raw)
To: netdev, devicetree, linux-kernel
Cc: davem, robh+dt, mark.rutland, f.fainelli, hkallweit1, andrew,
Alexandru Ardelean
In-Reply-To: <20190816131011.23264-1-alexandru.ardelean@analog.com>
This change adds support for Analog Devices Industrial Ethernet PHYs.
Particularly the PHYs this driver adds support for:
* ADIN1200 - Robust, Industrial, Low Power 10/100 Ethernet PHY
* ADIN1300 - Robust, Industrial, Low Latency 10/100/1000 Gigabit
Ethernet PHY
The 2 chips are register compatible with one another. The main difference
being that ADIN1200 doesn't operate in gigabit mode.
The chips can be operated by the Generic PHY driver as well via the
standard IEEE PHY registers (0x0000 - 0x000F) which are supported by the
kernel as well. This assumes that configuration of the PHY has been done
completely in HW, according to spec.
Configuration can also be done via registers, which will be supported by
this driver.
Datasheets:
https://www.analog.com/media/en/technical-documentation/data-sheets/ADIN1300.pdf
https://www.analog.com/media/en/technical-documentation/data-sheets/ADIN1200.pdf
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
---
MAINTAINERS | 7 ++++++
drivers/net/phy/Kconfig | 9 ++++++++
drivers/net/phy/Makefile | 1 +
drivers/net/phy/adin.c | 49 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 drivers/net/phy/adin.c
diff --git a/MAINTAINERS b/MAINTAINERS
index e352550a6895..e8aa8a667864 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -938,6 +938,13 @@ S: Supported
F: drivers/mux/adgs1408.c
F: Documentation/devicetree/bindings/mux/adi,adgs1408.txt
+ANALOG DEVICES INC ADIN DRIVER
+M: Alexandru Ardelean <alexaundru.ardelean@analog.com>
+L: netdev@vger.kernel.org
+W: http://ez.analog.com/community/linux-device-drivers
+S: Supported
+F: drivers/net/phy/adin.c
+
ANALOG DEVICES INC ADIS DRIVER LIBRARY
M: Alexandru Ardelean <alexandru.ardelean@analog.com>
S: Supported
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 48ca213c0ada..03be30cde552 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -257,6 +257,15 @@ config SFP
depends on HWMON || HWMON=n
select MDIO_I2C
+config ADIN_PHY
+ tristate "Analog Devices Industrial Ethernet PHYs"
+ help
+ Adds support for the Analog Devices Industrial Ethernet PHYs.
+ Currently supports the:
+ - ADIN1200 - Robust,Industrial, Low Power 10/100 Ethernet PHY
+ - ADIN1300 - Robust,Industrial, Low Latency 10/100/1000 Gigabit
+ Ethernet PHY
+
config AMD_PHY
tristate "AMD PHYs"
---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index ba07c27e4208..a03437e091f3 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_SFP) += sfp.o
sfp-obj-$(CONFIG_SFP) += sfp-bus.o
obj-y += $(sfp-obj-y) $(sfp-obj-m)
+obj-$(CONFIG_ADIN_PHY) += adin.o
obj-$(CONFIG_AMD_PHY) += amd.o
aquantia-objs += aquantia_main.o
ifdef CONFIG_HWMON
diff --git a/drivers/net/phy/adin.c b/drivers/net/phy/adin.c
new file mode 100644
index 000000000000..6d7af4743957
--- /dev/null
+++ b/drivers/net/phy/adin.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0+
+/**
+ * Driver for Analog Devices Industrial Ethernet PHYs
+ *
+ * Copyright 2019 Analog Devices Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+#define PHY_ID_ADIN1200 0x0283bc20
+#define PHY_ID_ADIN1300 0x0283bc30
+
+static int adin_config_init(struct phy_device *phydev)
+{
+ return genphy_config_init(phydev);
+}
+
+static struct phy_driver adin_driver[] = {
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_ADIN1200),
+ .name = "ADIN1200",
+ .config_init = adin_config_init,
+ .config_aneg = genphy_config_aneg,
+ .read_status = genphy_read_status,
+ },
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_ADIN1300),
+ .name = "ADIN1300",
+ .config_init = adin_config_init,
+ .config_aneg = genphy_config_aneg,
+ .read_status = genphy_read_status,
+ },
+};
+
+module_phy_driver(adin_driver);
+
+static struct mdio_device_id __maybe_unused adin_tbl[] = {
+ { PHY_ID_MATCH_MODEL(PHY_ID_ADIN1200) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_ADIN1300) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, adin_tbl);
+MODULE_DESCRIPTION("Analog Devices Industrial Ethernet PHY driver");
+MODULE_LICENSE("GPL");
--
2.20.1
^ permalink raw reply related
* Re: [PATCH v5 10/13] net: phy: adin: implement PHY subsystem software reset
From: Andrew Lunn @ 2019-08-16 13:19 UTC (permalink / raw)
To: Alexandru Ardelean
Cc: netdev, devicetree, linux-kernel, davem, robh+dt, mark.rutland,
f.fainelli, hkallweit1
In-Reply-To: <20190816131011.23264-11-alexandru.ardelean@analog.com>
On Fri, Aug 16, 2019 at 04:10:08PM +0300, Alexandru Ardelean wrote:
> The ADIN PHYs supports 4 types of reset:
> 1. The standard PHY reset via BMCR_RESET bit in MII_BMCR reg
> 2. Reset via GPIO
> 3. Reset via reg GeSftRst (0xff0c) & reload previous pin configs
> 4. Reset via reg GeSftRst (0xff0c) & request new pin configs
>
> Resets 2, 3 & 4 are almost identical, with the exception that the crystal
> oscillator is available during reset for 2.
>
> This change implements subsystem software reset via the GeSftRst and
> reloading the previous pin configuration (so reset number 3).
> This will also reset the PHY core regs (similar to reset 1).
>
> Since writing bit 1 to reg GeSftRst is self-clearing, the only thing that
> can be done, is to write to that register, wait a specific amount of time
> (10 milliseconds should be enough) and try to read back and check if there
> are no errors on read. A busy-wait-read won't work well, and may sometimes
> work or not work.
>
> In case phylib is configured to also do a reset via GPIO, the ADIN PHY may
> be reset twice when the PHY device registers, but that isn't a problem,
> since it's being done on boot (or PHY device register).
>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Andrew
^ permalink raw reply
* [PATCH net-next 3/4 v3] net: ethernet: mediatek: Rename NEXT_RX_DESP_IDX to NEXT_DESP_IDX
From: Stefan Roese @ 2019-08-16 13:23 UTC (permalink / raw)
To: netdev, linux-mediatek
Cc: René van Dorst, Daniel Golle, Sean Wang, John Crispin
In-Reply-To: <20190816132325.28426-1-sr@denx.de>
Rename the NEXT_RX_DESP_IDX macro to NEXT_DESP_IDX, so that it better
can be used for TX ops as well. This will be used in the upcoming
MT7628/88 support (same functionality for RX and TX in this macro).
Signed-off-by: Stefan Roese <sr@denx.de>
Cc: René van Dorst <opensource@vdorst.com>
Cc: Daniel Golle <daniel@makrotopia.org>
Cc: Sean Wang <sean.wang@mediatek.com>
Cc: John Crispin <john@phrozen.org>
---
v3:
- No change
v2:
- New patch
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++--
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index bee2cdca66e7..d9978174b96a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -903,7 +903,7 @@ static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth)
for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
ring = ð->rx_ring[i];
- idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+ idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size);
if (ring->dma[idx].rxd2 & RX_DMA_DONE) {
ring->calc_idx_update = true;
return ring;
@@ -952,7 +952,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
if (unlikely(!ring))
goto rx_done;
- idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+ idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size);
rxd = &ring->dma[idx];
data = ring->data[idx];
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 088e2bc621f7..556644f28eae 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -39,7 +39,7 @@
NETIF_F_SG | NETIF_F_TSO | \
NETIF_F_TSO6 | \
NETIF_F_IPV6_CSUM)
-#define NEXT_RX_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1))
+#define NEXT_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1))
#define MTK_MAX_RX_RING_NUM 4
#define MTK_HW_LRO_DMA_SIZE 8
--
2.22.1
^ permalink raw reply related
* [PATCH net-next 1/4 v3] dt-bindings: net: mediatek: Add support for MediaTek MT7628/88 SoC
From: Stefan Roese @ 2019-08-16 13:23 UTC (permalink / raw)
To: netdev, linux-mediatek
Cc: René van Dorst, Daniel Golle, Sean Wang, John Crispin,
devicetree, Rob Herring
Add compatible for the ethernet IP core on MT7628/88 SoCs. Its
compatible with the older Ralink Rt5350F SoC. And OpenWrt already
uses this compatible string for the MT76x8.
Signed-off-by: Stefan Roese <sr@denx.de>
Cc: René van Dorst <opensource@vdorst.com>
Cc: Daniel Golle <daniel@makrotopia.org>
Cc: Sean Wang <sean.wang@mediatek.com>
Cc: John Crispin <john@phrozen.org>
Cc: devicetree@vger.kernel.org
Cc: Rob Herring <robh@kernel.org>
---
v3:
- No change
v2:
- New patch - bindings description moved to separate patch
Documentation/devicetree/bindings/net/mediatek-net.txt | 1 +
1 file changed, 1 insertion(+)
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 770ff98d4524..72d03e07cf7c 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -12,6 +12,7 @@ Required properties:
"mediatek,mt7623-eth", "mediatek,mt2701-eth": for MT7623 SoC
"mediatek,mt7622-eth": for MT7622 SoC
"mediatek,mt7629-eth": for MT7629 SoC
+ "ralink,rt5350-eth": for Ralink Rt5350F and MT7628/88 SoC
- reg: Address and length of the register set for the device
- interrupts: Should contain the three frame engines interrupts in numeric
order. These are fe_int0, fe_int1 and fe_int2.
--
2.22.1
^ permalink raw reply related
* [PATCH net-next 2/4 v3] net: ethernet: mediatek: Rename MTK_QMTK_INT_STATUS to MTK_QDMA_INT_STATUS
From: Stefan Roese @ 2019-08-16 13:23 UTC (permalink / raw)
To: netdev, linux-mediatek
Cc: René van Dorst, Daniel Golle, Sean Wang, John Crispin
In-Reply-To: <20190816132325.28426-1-sr@denx.de>
Currently all QDMA registers are named "MTK_QDMA_foo" in this driver
with one exception: MTK_QMTK_INT_STATUS. This patch renames
MTK_QMTK_INT_STATUS to MTK_QDMA_INT_STATUS so that all macros follow
this rule.
Signed-off-by: Stefan Roese <sr@denx.de>
Cc: René van Dorst <opensource@vdorst.com>
Cc: Daniel Golle <daniel@makrotopia.org>
Cc: Sean Wang <sean.wang@mediatek.com>
Cc: John Crispin <john@phrozen.org>
---
v3:
- No change
v2:
- New patch
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++----
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index ddbffeb5701b..bee2cdca66e7 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1122,11 +1122,11 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
int tx_done = 0;
mtk_handle_status_irq(eth);
- mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
+ mtk_w32(eth, MTK_TX_DONE_INT, MTK_QDMA_INT_STATUS);
tx_done = mtk_poll_tx(eth, budget);
if (unlikely(netif_msg_intr(eth))) {
- status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+ status = mtk_r32(eth, MTK_QDMA_INT_STATUS);
mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
dev_info(eth->dev,
"done tx %d, intr 0x%08x/0x%x\n",
@@ -1136,7 +1136,7 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
if (tx_done == budget)
return budget;
- status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+ status = mtk_r32(eth, MTK_QDMA_INT_STATUS);
if (status & MTK_TX_DONE_INT)
return budget;
@@ -1747,7 +1747,7 @@ static irqreturn_t mtk_handle_irq(int irq, void *_eth)
mtk_handle_irq_rx(irq, _eth);
}
if (mtk_r32(eth, MTK_QDMA_INT_MASK) & MTK_TX_DONE_INT) {
- if (mtk_r32(eth, MTK_QMTK_INT_STATUS) & MTK_TX_DONE_INT)
+ if (mtk_r32(eth, MTK_QDMA_INT_STATUS) & MTK_TX_DONE_INT)
mtk_handle_irq_tx(irq, _eth);
}
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index bab94f763e2c..088e2bc621f7 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -212,7 +212,7 @@
#define FC_THRES_MIN 0x4444
/* QDMA Interrupt Status Register */
-#define MTK_QMTK_INT_STATUS 0x1A18
+#define MTK_QDMA_INT_STATUS 0x1A18
#define MTK_RX_DONE_DLY BIT(30)
#define MTK_RX_DONE_INT3 BIT(19)
#define MTK_RX_DONE_INT2 BIT(18)
--
2.22.1
^ permalink raw reply related
* [PATCH net-next 4/4 v3] net: ethernet: mediatek: Add MT7628/88 SoC support
From: Stefan Roese @ 2019-08-16 13:23 UTC (permalink / raw)
To: netdev, linux-mediatek
Cc: René van Dorst, Daniel Golle, Sean Wang, John Crispin
In-Reply-To: <20190816132325.28426-1-sr@denx.de>
This patch adds support for the MediaTek MT7628/88 SoCs to the common
MediaTek ethernet driver. Some minor changes are needed for this and
a bigger change, as the MT7628 does not support QDMA (only PDMA).
Signed-off-by: Stefan Roese <sr@denx.de>
Cc: René van Dorst <opensource@vdorst.com>
Cc: Daniel Golle <daniel@makrotopia.org>
Cc: Sean Wang <sean.wang@mediatek.com>
Cc: John Crispin <john@phrozen.org>
---
v3:
- Corrected pointer arthmetic - use proper (void *) cast (David)
v2:
- Rebased on net-next (David)
- Used "ralink,rt5350-eth" compatible (Daniel)
- Fixed capability bit usage (Rene)
- Extracted DT bindings description to separate patch
- Introduced MTK_QDMA capability, which is used on all
currently supported SoCs. Only the newly introcuded MT7628
uses PDMA and does not have this capability bit set
- Added tx_int_mask_reg/tx_int_status_reg variables to better
abstract the QDMA vs PDMA usage
drivers/net/ethernet/mediatek/Kconfig | 2 +-
drivers/net/ethernet/mediatek/mtk_eth_path.c | 4 +
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 480 ++++++++++++++-----
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 51 +-
4 files changed, 425 insertions(+), 112 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 1f7fff81f24d..b76cf2e1c9dc 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config NET_VENDOR_MEDIATEK
bool "MediaTek ethernet driver"
- depends on ARCH_MEDIATEK || SOC_MT7621
+ depends on ARCH_MEDIATEK || SOC_MT7621 || SOC_MT7620
---help---
If you have a Mediatek SoC with ethernet, say Y.
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c
index 7f05880cf9ef..28960e4c4e43 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_path.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c
@@ -315,6 +315,10 @@ int mtk_setup_hw_path(struct mtk_eth *eth, int mac_id, int phymode)
{
int err;
+ /* No mux'ing for MT7628/88 */
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+ return 0;
+
switch (phymode) {
case PHY_INTERFACE_MODE_TRGMII:
case PHY_INTERFACE_MODE_RGMII_TXID:
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index d9978174b96a..8ddbb8dcf032 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -323,11 +323,14 @@ static int mtk_phy_connect(struct net_device *dev)
goto err_phy;
}
- /* put the gmac into the right mode */
- regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
- val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
- val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
- regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
+ /* No MT7628/88 support for now */
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ /* put the gmac into the right mode */
+ regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+ val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
+ val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
+ regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
+ }
/* couple phydev to net_device */
if (mtk_phy_connect_node(eth, mac, np))
@@ -395,8 +398,8 @@ static inline void mtk_tx_irq_disable(struct mtk_eth *eth, u32 mask)
u32 val;
spin_lock_irqsave(ð->tx_irq_lock, flags);
- val = mtk_r32(eth, MTK_QDMA_INT_MASK);
- mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+ val = mtk_r32(eth, eth->tx_int_mask_reg);
+ mtk_w32(eth, val & ~mask, eth->tx_int_mask_reg);
spin_unlock_irqrestore(ð->tx_irq_lock, flags);
}
@@ -406,8 +409,8 @@ static inline void mtk_tx_irq_enable(struct mtk_eth *eth, u32 mask)
u32 val;
spin_lock_irqsave(ð->tx_irq_lock, flags);
- val = mtk_r32(eth, MTK_QDMA_INT_MASK);
- mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+ val = mtk_r32(eth, eth->tx_int_mask_reg);
+ mtk_w32(eth, val | mask, eth->tx_int_mask_reg);
spin_unlock_irqrestore(ð->tx_irq_lock, flags);
}
@@ -437,6 +440,7 @@ static int mtk_set_mac_address(struct net_device *dev, void *p)
{
int ret = eth_mac_addr(dev, p);
struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
const char *macaddr = dev->dev_addr;
if (ret)
@@ -446,11 +450,19 @@ static int mtk_set_mac_address(struct net_device *dev, void *p)
return -EBUSY;
spin_lock_bh(&mac->hw->page_lock);
- mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
- MTK_GDMA_MAC_ADRH(mac->id));
- mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
- (macaddr[4] << 8) | macaddr[5],
- MTK_GDMA_MAC_ADRL(mac->id));
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
+ MT7628_SDM_MAC_ADRH);
+ mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
+ (macaddr[4] << 8) | macaddr[5],
+ MT7628_SDM_MAC_ADRL);
+ } else {
+ mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
+ MTK_GDMA_MAC_ADRH(mac->id));
+ mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
+ (macaddr[4] << 8) | macaddr[5],
+ MTK_GDMA_MAC_ADRL(mac->id));
+ }
spin_unlock_bh(&mac->hw->page_lock);
return 0;
@@ -626,19 +638,47 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
return &ring->buf[idx];
}
+static struct mtk_tx_dma *qdma_to_pdma(struct mtk_tx_ring *ring,
+ struct mtk_tx_dma *dma)
+{
+ return ring->dma_pdma - ring->dma + dma;
+}
+
+static int txd_to_idx(struct mtk_tx_ring *ring, struct mtk_tx_dma *dma)
+{
+ return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
+}
+
static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
{
- if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
- dma_unmap_single(eth->dev,
- dma_unmap_addr(tx_buf, dma_addr0),
- dma_unmap_len(tx_buf, dma_len0),
- DMA_TO_DEVICE);
- } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
- dma_unmap_page(eth->dev,
- dma_unmap_addr(tx_buf, dma_addr0),
- dma_unmap_len(tx_buf, dma_len0),
- DMA_TO_DEVICE);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
+ dma_unmap_single(eth->dev,
+ dma_unmap_addr(tx_buf, dma_addr0),
+ dma_unmap_len(tx_buf, dma_len0),
+ DMA_TO_DEVICE);
+ } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
+ dma_unmap_page(eth->dev,
+ dma_unmap_addr(tx_buf, dma_addr0),
+ dma_unmap_len(tx_buf, dma_len0),
+ DMA_TO_DEVICE);
+ }
+ } else {
+ if (dma_unmap_len(tx_buf, dma_len0)) {
+ dma_unmap_page(eth->dev,
+ dma_unmap_addr(tx_buf, dma_addr0),
+ dma_unmap_len(tx_buf, dma_len0),
+ DMA_TO_DEVICE);
+ }
+
+ if (dma_unmap_len(tx_buf, dma_len1)) {
+ dma_unmap_page(eth->dev,
+ dma_unmap_addr(tx_buf, dma_addr1),
+ dma_unmap_len(tx_buf, dma_len1),
+ DMA_TO_DEVICE);
+ }
}
+
tx_buf->flags = 0;
if (tx_buf->skb &&
(tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
@@ -646,19 +686,45 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
tx_buf->skb = NULL;
}
+static void setup_tx_buf(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
+ struct mtk_tx_dma *txd, dma_addr_t mapped_addr,
+ size_t size, int idx)
+{
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+ dma_unmap_len_set(tx_buf, dma_len0, size);
+ } else {
+ if (idx & 1) {
+ txd->txd3 = mapped_addr;
+ txd->txd2 |= TX_DMA_PLEN1(size);
+ dma_unmap_addr_set(tx_buf, dma_addr1, mapped_addr);
+ dma_unmap_len_set(tx_buf, dma_len1, size);
+ } else {
+ tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
+ txd->txd1 = mapped_addr;
+ txd->txd2 = TX_DMA_PLEN0(size);
+ dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+ dma_unmap_len_set(tx_buf, dma_len0, size);
+ }
+ }
+}
+
static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
int tx_num, struct mtk_tx_ring *ring, bool gso)
{
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
struct mtk_tx_dma *itxd, *txd;
+ struct mtk_tx_dma *itxd_pdma, *txd_pdma;
struct mtk_tx_buf *itx_buf, *tx_buf;
dma_addr_t mapped_addr;
unsigned int nr_frags;
int i, n_desc = 1;
u32 txd4 = 0, fport;
+ int k = 0;
itxd = ring->next_free;
+ itxd_pdma = qdma_to_pdma(ring, itxd);
if (itxd == ring->last_free)
return -ENOMEM;
@@ -689,12 +755,14 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
MTK_TX_FLAGS_FPORT1;
- dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
- dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
+ setup_tx_buf(eth, itx_buf, itxd_pdma, mapped_addr, skb_headlen(skb),
+ k++);
/* TX SG offload */
txd = itxd;
+ txd_pdma = qdma_to_pdma(ring, txd);
nr_frags = skb_shinfo(skb)->nr_frags;
+
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
unsigned int offset = 0;
@@ -703,12 +771,21 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
while (frag_size) {
bool last_frag = false;
unsigned int frag_map_size;
+ bool new_desc = true;
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA) ||
+ (i & 0x1)) {
+ txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
+ txd_pdma = qdma_to_pdma(ring, txd);
+ if (txd == ring->last_free)
+ goto err_dma;
+
+ n_desc++;
+ } else {
+ new_desc = false;
+ }
- txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
- if (txd == ring->last_free)
- goto err_dma;
- n_desc++;
frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
frag_map_size,
@@ -727,14 +804,16 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
WRITE_ONCE(txd->txd4, fport);
tx_buf = mtk_desc_to_tx_buf(ring, txd);
- memset(tx_buf, 0, sizeof(*tx_buf));
+ if (new_desc)
+ memset(tx_buf, 0, sizeof(*tx_buf));
tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
MTK_TX_FLAGS_FPORT1;
- dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
+ setup_tx_buf(eth, tx_buf, txd_pdma, mapped_addr,
+ frag_map_size, k++);
+
frag_size -= frag_map_size;
offset += frag_map_size;
}
@@ -746,6 +825,12 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
WRITE_ONCE(itxd->txd4, txd4);
WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
(!nr_frags * TX_DMA_LS0)));
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ if (k & 0x1)
+ txd_pdma->txd2 |= TX_DMA_LS0;
+ else
+ txd_pdma->txd2 |= TX_DMA_LS1;
+ }
netdev_sent_queue(dev, skb->len);
skb_tx_timestamp(skb);
@@ -758,9 +843,15 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
*/
wmb();
- if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) ||
- !netdev_xmit_more())
- mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) ||
+ !netdev_xmit_more())
+ mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
+ } else {
+ int next_idx = NEXT_DESP_IDX(txd_to_idx(ring, txd),
+ ring->dma_size);
+ mtk_w32(eth, next_idx, MT7628_TX_CTX_IDX0);
+ }
return 0;
@@ -772,7 +863,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
mtk_tx_unmap(eth, tx_buf);
itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+ itxd_pdma->txd2 = TX_DMA_DESP2_DEF;
+
itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
+ itxd_pdma = qdma_to_pdma(ring, itxd);
} while (itxd != txd);
return -ENOMEM;
@@ -946,7 +1041,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
struct net_device *netdev;
unsigned int pktlen;
dma_addr_t dma_addr;
- int mac = 0;
+ int mac;
ring = mtk_get_rx_ring(eth);
if (unlikely(!ring))
@@ -961,9 +1056,13 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
break;
/* find out which mac the packet come from. values start at 1 */
- mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
- RX_DMA_FPORT_MASK;
- mac--;
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ mac = 0;
+ } else {
+ mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
+ RX_DMA_FPORT_MASK;
+ mac--;
+ }
if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
!eth->netdev[mac]))
@@ -981,7 +1080,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
goto release_desc;
}
dma_addr = dma_map_single(eth->dev,
- new_data + NET_SKB_PAD,
+ new_data + NET_SKB_PAD +
+ eth->ip_align,
ring->buf_size,
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
@@ -1004,7 +1104,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
skb->dev = netdev;
skb_put(skb, pktlen);
- if (trxd.rxd4 & RX_DMA_L4_VALID)
+ if (trxd.rxd4 & eth->rx_dma_l4_valid)
skb->ip_summed = CHECKSUM_UNNECESSARY;
else
skb_checksum_none_assert(skb);
@@ -1021,7 +1121,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
rxd->rxd1 = (unsigned int)dma_addr;
release_desc:
- rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+ rxd->rxd2 = RX_DMA_LSO;
+ else
+ rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
ring->calc_idx = idx;
@@ -1040,19 +1143,14 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
return done;
}
-static int mtk_poll_tx(struct mtk_eth *eth, int budget)
+static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
+ unsigned int *done, unsigned int *bytes)
{
struct mtk_tx_ring *ring = ð->tx_ring;
struct mtk_tx_dma *desc;
struct sk_buff *skb;
struct mtk_tx_buf *tx_buf;
- unsigned int done[MTK_MAX_DEVS];
- unsigned int bytes[MTK_MAX_DEVS];
u32 cpu, dma;
- int total = 0, i;
-
- memset(done, 0, sizeof(done));
- memset(bytes, 0, sizeof(bytes));
cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
@@ -1090,6 +1188,62 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
+ return budget;
+}
+
+static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
+ unsigned int *done, unsigned int *bytes)
+{
+ struct mtk_tx_ring *ring = ð->tx_ring;
+ struct mtk_tx_dma *desc;
+ struct sk_buff *skb;
+ struct mtk_tx_buf *tx_buf;
+ u32 cpu, dma;
+
+ cpu = ring->cpu_idx;
+ dma = mtk_r32(eth, MT7628_TX_DTX_IDX0);
+
+ while ((cpu != dma) && budget) {
+ tx_buf = &ring->buf[cpu];
+ skb = tx_buf->skb;
+ if (!skb)
+ break;
+
+ if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
+ bytes[0] += skb->len;
+ done[0]++;
+ budget--;
+ }
+
+ mtk_tx_unmap(eth, tx_buf);
+
+ desc = &ring->dma[cpu];
+ ring->last_free = desc;
+ atomic_inc(&ring->free_count);
+
+ cpu = NEXT_DESP_IDX(cpu, ring->dma_size);
+ }
+
+ ring->cpu_idx = cpu;
+
+ return budget;
+}
+
+static int mtk_poll_tx(struct mtk_eth *eth, int budget)
+{
+ struct mtk_tx_ring *ring = ð->tx_ring;
+ unsigned int done[MTK_MAX_DEVS];
+ unsigned int bytes[MTK_MAX_DEVS];
+ int total = 0, i;
+
+ memset(done, 0, sizeof(done));
+ memset(bytes, 0, sizeof(bytes));
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+ budget = mtk_poll_tx_qdma(eth, budget, done, bytes);
+ else
+ budget = mtk_poll_tx_pdma(eth, budget, done, bytes);
+
for (i = 0; i < MTK_MAC_COUNT; i++) {
if (!eth->netdev[i] || !done[i])
continue;
@@ -1121,13 +1275,14 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
u32 status, mask;
int tx_done = 0;
- mtk_handle_status_irq(eth);
- mtk_w32(eth, MTK_TX_DONE_INT, MTK_QDMA_INT_STATUS);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+ mtk_handle_status_irq(eth);
+ mtk_w32(eth, MTK_TX_DONE_INT, eth->tx_int_status_reg);
tx_done = mtk_poll_tx(eth, budget);
if (unlikely(netif_msg_intr(eth))) {
- status = mtk_r32(eth, MTK_QDMA_INT_STATUS);
- mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+ status = mtk_r32(eth, eth->tx_int_status_reg);
+ mask = mtk_r32(eth, eth->tx_int_mask_reg);
dev_info(eth->dev,
"done tx %d, intr 0x%08x/0x%x\n",
tx_done, status, mask);
@@ -1136,7 +1291,7 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
if (tx_done == budget)
return budget;
- status = mtk_r32(eth, MTK_QDMA_INT_STATUS);
+ status = mtk_r32(eth, eth->tx_int_status_reg);
if (status & MTK_TX_DONE_INT)
return budget;
@@ -1203,6 +1358,24 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
ring->dma[i].txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
}
+ /* On MT7688 (PDMA only) this driver uses the ring->dma structs
+ * only as the framework. The real HW descriptors are the PDMA
+ * descriptors in ring->dma_pdma.
+ */
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ ring->dma_pdma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
+ &ring->phys_pdma,
+ GFP_ATOMIC);
+ if (!ring->dma_pdma)
+ goto no_tx_mem;
+
+ for (i = 0; i < MTK_DMA_SIZE; i++) {
+ ring->dma_pdma[i].txd2 = TX_DMA_DESP2_DEF;
+ ring->dma_pdma[i].txd4 = 0;
+ }
+ }
+
+ ring->dma_size = MTK_DMA_SIZE;
atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
ring->next_free = &ring->dma[0];
ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
@@ -1213,15 +1386,23 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
*/
wmb();
- mtk_w32(eth, ring->phys, MTK_QTX_CTX_PTR);
- mtk_w32(eth, ring->phys, MTK_QTX_DTX_PTR);
- mtk_w32(eth,
- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
- MTK_QTX_CRX_PTR);
- mtk_w32(eth,
- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
- MTK_QTX_DRX_PTR);
- mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ mtk_w32(eth, ring->phys, MTK_QTX_CTX_PTR);
+ mtk_w32(eth, ring->phys, MTK_QTX_DTX_PTR);
+ mtk_w32(eth,
+ ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+ MTK_QTX_CRX_PTR);
+ mtk_w32(eth,
+ ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+ MTK_QTX_DRX_PTR);
+ mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
+ MTK_QTX_CFG(0));
+ } else {
+ mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
+ mtk_w32(eth, MTK_DMA_SIZE, MT7628_TX_MAX_CNT0);
+ mtk_w32(eth, 0, MT7628_TX_CTX_IDX0);
+ mtk_w32(eth, MT7628_PST_DTX_IDX0, MTK_PDMA_RST_IDX);
+ }
return 0;
@@ -1248,6 +1429,14 @@ static void mtk_tx_clean(struct mtk_eth *eth)
ring->phys);
ring->dma = NULL;
}
+
+ if (ring->dma_pdma) {
+ dma_free_coherent(eth->dev,
+ MTK_DMA_SIZE * sizeof(*ring->dma_pdma),
+ ring->dma_pdma,
+ ring->phys_pdma);
+ ring->dma_pdma = NULL;
+ }
}
static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
@@ -1295,14 +1484,17 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
for (i = 0; i < rx_dma_size; i++) {
dma_addr_t dma_addr = dma_map_single(eth->dev,
- ring->data[i] + NET_SKB_PAD,
+ ring->data[i] + NET_SKB_PAD + eth->ip_align,
ring->buf_size,
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
return -ENOMEM;
ring->dma[i].rxd1 = (unsigned int)dma_addr;
- ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+ ring->dma[i].rxd2 = RX_DMA_LSO;
+ else
+ ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
}
ring->dma_size = rx_dma_size;
ring->calc_idx_update = false;
@@ -1618,9 +1810,16 @@ static int mtk_dma_busy_wait(struct mtk_eth *eth)
unsigned long t_start = jiffies;
while (1) {
- if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
- (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
- return 0;
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
+ (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
+ return 0;
+ } else {
+ if (!(mtk_r32(eth, MTK_PDMA_GLO_CFG) &
+ (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
+ return 0;
+ }
+
if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
break;
}
@@ -1637,20 +1836,24 @@ static int mtk_dma_init(struct mtk_eth *eth)
if (mtk_dma_busy_wait(eth))
return -EBUSY;
- /* QDMA needs scratch memory for internal reordering of the
- * descriptors
- */
- err = mtk_init_fq_dma(eth);
- if (err)
- return err;
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ /* QDMA needs scratch memory for internal reordering of the
+ * descriptors
+ */
+ err = mtk_init_fq_dma(eth);
+ if (err)
+ return err;
+ }
err = mtk_tx_alloc(eth);
if (err)
return err;
- err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
- if (err)
- return err;
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
+ if (err)
+ return err;
+ }
err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
if (err)
@@ -1667,10 +1870,14 @@ static int mtk_dma_init(struct mtk_eth *eth)
return err;
}
- /* Enable random early drop and set drop threshold automatically */
- mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
- MTK_QDMA_FC_THRES);
- mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ /* Enable random early drop and set drop threshold
+ * automatically
+ */
+ mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN |
+ FC_THRES_MIN, MTK_QDMA_FC_THRES);
+ mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
+ }
return 0;
}
@@ -1741,13 +1948,15 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
static irqreturn_t mtk_handle_irq(int irq, void *_eth)
{
struct mtk_eth *eth = _eth;
+ u32 status;
+ status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
if (mtk_r32(eth, MTK_PDMA_INT_MASK) & MTK_RX_DONE_INT) {
if (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT)
mtk_handle_irq_rx(irq, _eth);
}
- if (mtk_r32(eth, MTK_QDMA_INT_MASK) & MTK_TX_DONE_INT) {
- if (mtk_r32(eth, MTK_QDMA_INT_STATUS) & MTK_TX_DONE_INT)
+ if (mtk_r32(eth, eth->tx_int_mask_reg) & MTK_TX_DONE_INT) {
+ if (mtk_r32(eth, eth->tx_int_status_reg) & MTK_TX_DONE_INT)
mtk_handle_irq_tx(irq, _eth);
}
@@ -1779,17 +1988,23 @@ static int mtk_start_dma(struct mtk_eth *eth)
return err;
}
- mtk_w32(eth,
- MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
- MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
- MTK_RX_BT_32DWORDS,
- MTK_QDMA_GLO_CFG);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ mtk_w32(eth,
+ MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+ MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
+ MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+ MTK_RX_BT_32DWORDS,
+ MTK_QDMA_GLO_CFG);
- mtk_w32(eth,
- MTK_RX_DMA_EN | rx_2b_offset |
- MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
- MTK_PDMA_GLO_CFG);
+ mtk_w32(eth,
+ MTK_RX_DMA_EN | rx_2b_offset |
+ MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
+ MTK_PDMA_GLO_CFG);
+ } else {
+ mtk_w32(eth, MTK_TX_WB_DDONE | MTK_TX_DMA_EN | MTK_RX_DMA_EN |
+ MTK_MULTI_EN | MTK_PDMA_SIZE_8DWORDS,
+ MTK_PDMA_GLO_CFG);
+ }
return 0;
}
@@ -1817,7 +2032,6 @@ static int mtk_open(struct net_device *dev)
phy_start(dev->phydev);
netif_start_queue(dev);
-
return 0;
}
@@ -1861,7 +2075,8 @@ static int mtk_stop(struct net_device *dev)
napi_disable(ð->tx_napi);
napi_disable(ð->rx_napi);
- mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+ mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
mtk_dma_free(eth);
@@ -1923,6 +2138,24 @@ static int mtk_hw_init(struct mtk_eth *eth)
if (ret)
goto err_disable_pm;
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ ret = device_reset(eth->dev);
+ if (ret) {
+ dev_err(eth->dev, "MAC reset failed!\n");
+ goto err_disable_pm;
+ }
+
+ /* enable interrupt delay for RX */
+ mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
+
+ /* disable delay and normal interrupt */
+ mtk_tx_irq_disable(eth, ~0);
+ mtk_rx_irq_disable(eth, ~0);
+
+ return 0;
+ }
+
+ /* Non-MT7628 handling... */
ethsys_reset(eth, RSTCTRL_FE);
ethsys_reset(eth, RSTCTRL_PPE);
@@ -2426,13 +2659,13 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
eth->netdev[id]->base_addr = (unsigned long)eth->base;
- eth->netdev[id]->hw_features = MTK_HW_FEATURES;
+ eth->netdev[id]->hw_features = eth->soc->hw_features;
if (eth->hwlro)
eth->netdev[id]->hw_features |= NETIF_F_LRO;
- eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
+ eth->netdev[id]->vlan_features = eth->soc->hw_features &
~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
- eth->netdev[id]->features |= MTK_HW_FEATURES;
+ eth->netdev[id]->features |= eth->soc->hw_features;
eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
eth->netdev[id]->irq = eth->irq[0];
@@ -2463,15 +2696,32 @@ static int mtk_probe(struct platform_device *pdev)
if (IS_ERR(eth->base))
return PTR_ERR(eth->base);
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ eth->tx_int_mask_reg = MTK_QDMA_INT_MASK;
+ eth->tx_int_status_reg = MTK_QDMA_INT_STATUS;
+ } else {
+ eth->tx_int_mask_reg = MTK_PDMA_INT_MASK;
+ eth->tx_int_status_reg = MTK_PDMA_INT_STATUS;
+ }
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ eth->rx_dma_l4_valid = RX_DMA_L4_VALID_PDMA;
+ eth->ip_align = NET_IP_ALIGN;
+ } else {
+ eth->rx_dma_l4_valid = RX_DMA_L4_VALID;
+ }
+
spin_lock_init(ð->page_lock);
spin_lock_init(ð->tx_irq_lock);
spin_lock_init(ð->rx_irq_lock);
- eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
- "mediatek,ethsys");
- if (IS_ERR(eth->ethsys)) {
- dev_err(&pdev->dev, "no ethsys regmap found\n");
- return PTR_ERR(eth->ethsys);
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "mediatek,ethsys");
+ if (IS_ERR(eth->ethsys)) {
+ dev_err(&pdev->dev, "no ethsys regmap found\n");
+ return PTR_ERR(eth->ethsys);
+ }
}
if (MTK_HAS_CAPS(eth->soc->caps, MTK_INFRA)) {
@@ -2572,9 +2822,12 @@ static int mtk_probe(struct platform_device *pdev)
if (err)
goto err_free_dev;
- err = mtk_mdio_init(eth);
- if (err)
- goto err_free_dev;
+ /* No MT7628/88 support yet */
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ err = mtk_mdio_init(eth);
+ if (err)
+ goto err_free_dev;
+ }
for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
@@ -2637,12 +2890,14 @@ static int mtk_remove(struct platform_device *pdev)
static const struct mtk_soc_data mt2701_data = {
.caps = MT7623_CAPS | MTK_HWLRO,
+ .hw_features = MTK_HW_FEATURES,
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
};
static const struct mtk_soc_data mt7621_data = {
.caps = MT7621_CAPS,
+ .hw_features = MTK_HW_FEATURES,
.required_clks = MT7621_CLKS_BITMAP,
.required_pctl = false,
};
@@ -2650,12 +2905,14 @@ static const struct mtk_soc_data mt7621_data = {
static const struct mtk_soc_data mt7622_data = {
.ana_rgc3 = 0x2028,
.caps = MT7622_CAPS | MTK_HWLRO,
+ .hw_features = MTK_HW_FEATURES,
.required_clks = MT7622_CLKS_BITMAP,
.required_pctl = false,
};
static const struct mtk_soc_data mt7623_data = {
.caps = MT7623_CAPS | MTK_HWLRO,
+ .hw_features = MTK_HW_FEATURES,
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
};
@@ -2663,16 +2920,25 @@ static const struct mtk_soc_data mt7623_data = {
static const struct mtk_soc_data mt7629_data = {
.ana_rgc3 = 0x128,
.caps = MT7629_CAPS | MTK_HWLRO,
+ .hw_features = MTK_HW_FEATURES,
.required_clks = MT7629_CLKS_BITMAP,
.required_pctl = false,
};
+static const struct mtk_soc_data rt5350_data = {
+ .caps = MT7628_CAPS,
+ .hw_features = MTK_HW_FEATURES_MT7628,
+ .required_clks = MT7628_CLKS_BITMAP,
+ .required_pctl = false,
+};
+
const struct of_device_id of_mtk_match[] = {
{ .compatible = "mediatek,mt2701-eth", .data = &mt2701_data},
{ .compatible = "mediatek,mt7621-eth", .data = &mt7621_data},
{ .compatible = "mediatek,mt7622-eth", .data = &mt7622_data},
{ .compatible = "mediatek,mt7623-eth", .data = &mt7623_data},
{ .compatible = "mediatek,mt7629-eth", .data = &mt7629_data},
+ { .compatible = "ralink,rt5350-eth", .data = &rt5350_data},
{},
};
MODULE_DEVICE_TABLE(of, of_mtk_match);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 556644f28eae..cc1466ae0926 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -39,6 +39,7 @@
NETIF_F_SG | NETIF_F_TSO | \
NETIF_F_TSO6 | \
NETIF_F_IPV6_CSUM)
+#define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
#define NEXT_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1))
#define MTK_MAX_RX_RING_NUM 4
@@ -118,6 +119,7 @@
/* PDMA Global Configuration Register */
#define MTK_PDMA_GLO_CFG 0xa04
#define MTK_MULTI_EN BIT(10)
+#define MTK_PDMA_SIZE_8DWORDS (1 << 4)
/* PDMA Reset Index Register */
#define MTK_PDMA_RST_IDX 0xa08
@@ -276,11 +278,18 @@
#define TX_DMA_OWNER_CPU BIT(31)
#define TX_DMA_LS0 BIT(30)
#define TX_DMA_PLEN0(_x) (((_x) & MTK_TX_DMA_BUF_LEN) << 16)
+#define TX_DMA_PLEN1(_x) ((_x) & MTK_TX_DMA_BUF_LEN)
#define TX_DMA_SWC BIT(14)
#define TX_DMA_SDL(_x) (((_x) & 0x3fff) << 16)
+/* PDMA on MT7628 */
+#define TX_DMA_DONE BIT(31)
+#define TX_DMA_LS1 BIT(14)
+#define TX_DMA_DESP2_DEF (TX_DMA_LS0 | TX_DMA_DONE)
+
/* QDMA descriptor rxd2 */
#define RX_DMA_DONE BIT(31)
+#define RX_DMA_LSO BIT(30)
#define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16)
#define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff)
@@ -289,6 +298,7 @@
/* QDMA descriptor rxd4 */
#define RX_DMA_L4_VALID BIT(24)
+#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
#define RX_DMA_FPORT_SHIFT 19
#define RX_DMA_FPORT_MASK 0x7
@@ -412,6 +422,19 @@
#define CO_QPHY_SEL BIT(0)
#define GEPHY_MAC_SEL BIT(1)
+/* MT7628/88 specific stuff */
+#define MT7628_PDMA_OFFSET 0x0800
+#define MT7628_SDM_OFFSET 0x0c00
+
+#define MT7628_TX_BASE_PTR0 (MT7628_PDMA_OFFSET + 0x00)
+#define MT7628_TX_MAX_CNT0 (MT7628_PDMA_OFFSET + 0x04)
+#define MT7628_TX_CTX_IDX0 (MT7628_PDMA_OFFSET + 0x08)
+#define MT7628_TX_DTX_IDX0 (MT7628_PDMA_OFFSET + 0x0c)
+#define MT7628_PST_DTX_IDX0 BIT(0)
+
+#define MT7628_SDM_MAC_ADRL (MT7628_SDM_OFFSET + 0x0c)
+#define MT7628_SDM_MAC_ADRH (MT7628_SDM_OFFSET + 0x10)
+
struct mtk_rx_dma {
unsigned int rxd1;
unsigned int rxd2;
@@ -509,6 +532,7 @@ enum mtk_clks_map {
BIT(MTK_CLK_SGMII_CK) | \
BIT(MTK_CLK_ETH2PLL))
#define MT7621_CLKS_BITMAP (0)
+#define MT7628_CLKS_BITMAP (0)
#define MT7629_CLKS_BITMAP (BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) | \
BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
BIT(MTK_CLK_GP2) | BIT(MTK_CLK_FE) | \
@@ -563,6 +587,10 @@ struct mtk_tx_ring {
struct mtk_tx_dma *last_free;
u16 thresh;
atomic_t free_count;
+ int dma_size;
+ struct mtk_tx_dma *dma_pdma; /* For MT7628/88 PDMA handling */
+ dma_addr_t phys_pdma;
+ int cpu_idx;
};
/* PDMA rx ring mode */
@@ -604,6 +632,8 @@ enum mkt_eth_capabilities {
MTK_HWLRO_BIT,
MTK_SHARED_INT_BIT,
MTK_TRGMII_MT7621_CLK_BIT,
+ MTK_QDMA_BIT,
+ MTK_SOC_MT7628_BIT,
/* MUX BITS*/
MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT,
@@ -634,6 +664,8 @@ enum mkt_eth_capabilities {
#define MTK_HWLRO BIT(MTK_HWLRO_BIT)
#define MTK_SHARED_INT BIT(MTK_SHARED_INT_BIT)
#define MTK_TRGMII_MT7621_CLK BIT(MTK_TRGMII_MT7621_CLK_BIT)
+#define MTK_QDMA BIT(MTK_QDMA_BIT)
+#define MTK_SOC_MT7628 BIT(MTK_SOC_MT7628_BIT)
#define MTK_ETH_MUX_GDM1_TO_GMAC1_ESW \
BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT)
@@ -687,26 +719,31 @@ enum mkt_eth_capabilities {
#define MTK_HAS_CAPS(caps, _x) (((caps) & (_x)) == (_x))
#define MT7621_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | \
- MTK_GMAC2_RGMII | MTK_SHARED_INT | MTK_TRGMII_MT7621_CLK)
+ MTK_GMAC2_RGMII | MTK_SHARED_INT | \
+ MTK_TRGMII_MT7621_CLK | MTK_QDMA)
#define MT7622_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_SGMII | MTK_GMAC2_RGMII | \
MTK_GMAC2_SGMII | MTK_GDM1_ESW | \
MTK_MUX_GDM1_TO_GMAC1_ESW | \
- MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII)
+ MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_QDMA)
+
+#define MT7623_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | MTK_GMAC2_RGMII | \
+ MTK_QDMA)
-#define MT7623_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | MTK_GMAC2_RGMII)
+#define MT7628_CAPS (MTK_SHARED_INT | MTK_SOC_MT7628)
#define MT7629_CAPS (MTK_GMAC1_SGMII | MTK_GMAC2_SGMII | MTK_GMAC2_GEPHY | \
MTK_GDM1_ESW | MTK_MUX_GDM1_TO_GMAC1_ESW | \
MTK_MUX_GMAC2_GMAC0_TO_GEPHY | \
MTK_MUX_U3_GMAC2_TO_QPHY | \
- MTK_MUX_GMAC12_TO_GEPHY_SGMII)
+ MTK_MUX_GMAC12_TO_GEPHY_SGMII | MTK_QDMA)
/* struct mtk_eth_data - This is the structure holding all differences
* among various plaforms
* @ana_rgc3: The offset for register ANA_RGC3 related to
* sgmiisys syscon
* @caps Flags shown the extra capability for the SoC
+ * @hw_features Flags shown HW features
* @required_clks Flags shown the bitmap for required clocks on
* the target SoC
* @required_pctl A bool value to show whether the SoC requires
@@ -717,6 +754,7 @@ struct mtk_soc_data {
u32 caps;
u32 required_clks;
bool required_pctl;
+ netdev_features_t hw_features;
};
/* currently no SoC has more than 2 macs */
@@ -810,6 +848,11 @@ struct mtk_eth {
unsigned long state;
const struct mtk_soc_data *soc;
+
+ u32 tx_int_mask_reg;
+ u32 tx_int_status_reg;
+ u32 rx_dma_l4_valid;
+ int ip_align;
};
/* struct mtk_mac - the structure that holds the info about the MACs of the
--
2.22.1
^ permalink raw reply related
* Re: [PATCH v5 12/13] net: phy: adin: add ethtool get_stats support
From: Andrew Lunn @ 2019-08-16 13:24 UTC (permalink / raw)
To: Alexandru Ardelean
Cc: netdev, devicetree, linux-kernel, davem, robh+dt, mark.rutland,
f.fainelli, hkallweit1
In-Reply-To: <20190816131011.23264-13-alexandru.ardelean@analog.com>
On Fri, Aug 16, 2019 at 04:10:10PM +0300, Alexandru Ardelean wrote:
> This change implements retrieving all the error counters from the PHY.
>
> The counters require that the RxErrCnt register (0x0014) be read first,
> after which copies of the counters are latched into the registers. This
> ensures that all registers read after RxErrCnt are synchronized at the
> moment that they are read.
>
> The counter values need to be accumulated by the driver, as each time that
> RxErrCnt is read, the values that are latched are the ones that have
> incremented from the last read.
>
> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Andrew
^ permalink raw reply
* Re: [PATCH net-next v2 6/9] net: macsec: hardware offloading infrastructure
From: Sabrina Dubroca @ 2019-08-16 13:25 UTC (permalink / raw)
To: Antoine Tenart
Cc: Igor Russkikh, davem@davemloft.net, andrew@lunn.ch,
f.fainelli@gmail.com, hkallweit1@gmail.com,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
thomas.petazzoni@bootlin.com, alexandre.belloni@bootlin.com,
allan.nielsen@microchip.com, camelia.groza@nxp.com,
Simon Edelhaus, Pavel Belous
In-Reply-To: <20190813085817.GA3200@kwain>
2019-08-13, 10:58:17 +0200, Antoine Tenart wrote:
> Hi Igor,
>
> On Sat, Aug 10, 2019 at 01:20:32PM +0000, Igor Russkikh wrote:
> > On 08.08.2019 17:05, Antoine Tenart wrote:
> >
> > > The Rx and TX handlers are modified to take in account the special case
> > > were the MACsec transformation happens in the hardware, whether in a PHY
> > > or in a MAC, as the packets seen by the networking stack on both the
> >
> > Don't you think we may eventually may need xmit / handle_frame ops to be
> > a part of macsec_ops?
> >
> > That way software macsec could be extract to just another type of offload.
> > The drawback of current code is it doesn't show explicitly the path of
> > offloaded packets. It is hidden in `handle_not_macsec` and in
> > `macsec_start_xmit` branch. This makes incorrect counters to tick (see my below
> > comment)
> >
> > Another thing is that both xmit / macsec_handle_frame can't now be customized
> > by device driver. But this may be required.
> > We for example have usecases and HW features to allow specific flows to bypass
> > macsec encryption. This is normally used for macsec key control protocols,
> > identified by ethertype. Your phy is also capable on that as I see.
>
> I think this question is linked to the use of a MACsec virtual interface
> when using h/w offloading. The starting point for me was that I wanted
> to reuse the data structures and the API exposed to the userspace by the
> s/w implementation of MACsec. I then had two choices: keeping the exact
> same interface for the user (having a virtual MACsec interface), or
Unless it's really infeasible, yes, that's how things should be done IMO.
> registering the MACsec genl ops onto the real net devices (and making
> the s/w implementation a virtual net dev and a provider of the MACsec
> "offloading" ops).
Please, no :( Let's keep it as close as possible to the software
implementation, unless there's a really good reason not to. It's not
just "ip macsec" btw, wpa_supplicant can also configure MACsec and
would also need some logic to pick the device on which to do the genl
operations in that case.
> The advantages of the first option were that nearly all the logic of the
> s/w implementation could be kept and especially that it would be
> transparent for the user to use both implementations of MACsec. But this
> raised an issue as I had to modify the xmit / handle_frame ops to let
> all the traffic pass. This is because we have no way of knowing if a
> frame was handled by the MACsec h/w or not in ingress. So the virtual
> interface here only serve as the entrypoint for the API...
It's also the interface on which you'll run DHCP or install IP addresses.
> The second option would have the advantage to better represent the actual
> flow, but the way of configuring MACsec would be a bit different for the
> user, whether he wants to use s/w or h/w MACsec. If we were to do this I
> think we could extract the genl functions from the MACsec s/w
> implementation, and let it implement the MACsec ops (exactly as the
> offloading drivers).
>
> I'm open to discussing this :)
>
> As for the need for xmit / handle_frame ops (for a MAC w/ MACsec
> offloading), I'd say the xmit / handle_frame ops of the real net device
> driver could be used as the one of the MACsec virtual interface do not
> do much (regardless of the implementation choice discussed above).
There's no "handle_frame" op on a real device. macsec_handle_frame is
an rx_handler specificity that grabs packets from a real device and
sends them into a virtual device stacked on top of it. A real device
just hands packets over to the stack via NAPI.
> > > @@ -2546,11 +2814,15 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
> > > {
> > > struct macsec_dev *macsec = netdev_priv(dev);
> > > struct macsec_secy *secy = &macsec->secy;
> > > + struct macsec_tx_sc *tx_sc = &secy->tx_sc;
> > > struct pcpu_secy_stats *secy_stats;
> > > + struct macsec_tx_sa *tx_sa;
> > > int ret, len;
> > >
> > > + tx_sa = macsec_txsa_get(tx_sc->sa[tx_sc->encoding_sa]);
> >
> > Declared, but not used?
>
> I'll remove it then.
That's also a refcount leak, so, yes, please get rid of it.
[I'll answer the rest of the patch separately]
--
Sabrina
^ permalink raw reply
* Re: [PATCH net-next v2 6/9] net: macsec: hardware offloading infrastructure
From: Sabrina Dubroca @ 2019-08-16 13:26 UTC (permalink / raw)
To: Andrew Lunn
Cc: Igor Russkikh, Antoine Tenart, davem@davemloft.net,
f.fainelli@gmail.com, hkallweit1@gmail.com,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
thomas.petazzoni@bootlin.com, alexandre.belloni@bootlin.com,
allan.nielsen@microchip.com, camelia.groza@nxp.com,
Simon Edelhaus, Pavel Belous
In-Reply-To: <20190813162823.GH15047@lunn.ch>
2019-08-13, 18:28:23 +0200, Andrew Lunn wrote:
> > 1) With current implementation it's impossible to install SW macsec engine onto
> > the device which supports HW offload. That could be a strong limitation in
> > cases when user sees HW macsec offload is broken or work differently, and he/she
> > wants to replace it with SW one.
> > MACSec is a complex feature, and it may happen something is missing in HW.
> > Trivial example is 256bit encryption, which is not always a musthave in HW
> > implementations.
>
> Ideally, we want the driver to return EOPNOTSUPP if it does not
> support something and the software implement should be used.
>
> If the offload is broken, we want a bug report! And if it works
> differently, it suggests there is also a bug we need to fix, or the
> standard is ambiguous.
Yes. But in the meantime, we want the user to be able to disable the
offload. It's helpful for debugging purposes, and it can provide some
level of functionality until the bug is fixed or non-buggy hardware
becomes available.
> It would also be nice to add extra information to the netlink API to
> indicate if HW or SW is being used. In other places where we offload
> to accelerators we have such additional information.
+1
--
Sabrina
^ permalink raw reply
* Re: [PATCH net-next v2 6/9] net: macsec: hardware offloading infrastructure
From: Sabrina Dubroca @ 2019-08-16 13:29 UTC (permalink / raw)
To: Igor Russkikh
Cc: Andrew Lunn, Antoine Tenart, davem@davemloft.net,
f.fainelli@gmail.com, hkallweit1@gmail.com,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
thomas.petazzoni@bootlin.com, alexandre.belloni@bootlin.com,
allan.nielsen@microchip.com, camelia.groza@nxp.com,
Simon Edelhaus, Pavel Belous
In-Reply-To: <2e3c2307-d414-a531-26cb-064e05fa01fc@aquantia.com>
2019-08-13, 16:18:40 +0000, Igor Russkikh wrote:
> On 13.08.2019 16:17, Andrew Lunn wrote:
> > On Tue, Aug 13, 2019 at 10:58:17AM +0200, Antoine Tenart wrote:
> >> I think this question is linked to the use of a MACsec virtual interface
> >> when using h/w offloading. The starting point for me was that I wanted
> >> to reuse the data structures and the API exposed to the userspace by the
> >> s/w implementation of MACsec. I then had two choices: keeping the exact
> >> same interface for the user (having a virtual MACsec interface), or
> >> registering the MACsec genl ops onto the real net devices (and making
> >> the s/w implementation a virtual net dev and a provider of the MACsec
> >> "offloading" ops).
> >>
> >> The advantages of the first option were that nearly all the logic of the
> >> s/w implementation could be kept and especially that it would be
> >> transparent for the user to use both implementations of MACsec.
> >
> > Hi Antoine
> >
> > We have always talked about offloading operations to the hardware,
> > accelerating what the linux stack can do by making use of hardware
> > accelerators. The basic user API should not change because of
> > acceleration. Those are the general guidelines.
> >
> > It would however be interesting to get comments from those who did the
> > software implementation and what they think of this architecture. I've
> > no personal experience with MACSec, so it is hard for me to say if the
> > current architecture makes sense when using accelerators.
>
> In terms of overall concepts, I'd add the following:
>
> 1) With current implementation it's impossible to install SW macsec engine onto
> the device which supports HW offload.
You mean how it's implemented in this patchset?
> That could be a strong limitation in
> cases when user sees HW macsec offload is broken or work differently, and he/she
> wants to replace it with SW one.
Agreed, I think an offload that cannot be disabled is quite problematic.
> MACSec is a complex feature, and it may happen something is missing in HW.
> Trivial example is 256bit encryption, which is not always a musthave in HW
> implementations.
+1
> 2) I think, Antoine, its not totally true that otherwise the user macsec API
> will be broken/changed. netlink api is the same, the only thing we may want to
> add is an optional parameter to force selection of SW macsec engine.
Yes, I think we need an offload on/off parameter (and IMO it should
probably be off by default). Then, if offloading is requested but
cannot be satisfied (unsupported key length, too many SAs, etc), or if
incompatible settings are requested (mixing offloaded and
non-offloaded SCs on a device that cannot do it), return an error.
If we also export that offload parameter during netlink dumps, we can
inspect the state of the system, which helps for debugging.
> I'm also eager to hear from sw macsec users/devs on whats better here.
I don't do much development on MACsec these days, and I don't
personally use it outside of testing and development.
--
Sabrina
^ permalink raw reply
* Re: [Intel-wired-lan] [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
From: Björn Töpel @ 2019-08-16 13:32 UTC (permalink / raw)
To: Samudrala, Sridhar
Cc: Björn Töpel, Karlsson, Magnus, Netdev, bpf,
intel-wired-lan, maciej.fijalkowski, tom.herbert
In-Reply-To: <cc3a09eb-bcb8-a6e1-7175-77bddaf10c11@intel.com>
On Thu, 15 Aug 2019 at 18:46, Samudrala, Sridhar
<sridhar.samudrala@intel.com> wrote:
>
> On 8/15/2019 5:51 AM, Björn Töpel wrote:
> > On 2019-08-15 05:46, Sridhar Samudrala wrote:
> >> This patch series introduces XDP_SKIP_BPF flag that can be specified
> >> during the bind() call of an AF_XDP socket to skip calling the BPF
> >> program in the receive path and pass the buffer directly to the socket.
> >>
> >> When a single AF_XDP socket is associated with a queue and a HW
> >> filter is used to redirect the packets and the app is interested in
> >> receiving all the packets on that queue, we don't need an additional
> >> BPF program to do further filtering or lookup/redirect to a socket.
> >>
> >> Here are some performance numbers collected on
> >> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
> >> - Intel 40Gb Ethernet NIC (i40e)
> >>
> >> All tests use 2 cores and the results are in Mpps.
> >>
> >> turbo on (default)
> >> ---------------------------------------------
> >> no-skip-bpf skip-bpf
> >> ---------------------------------------------
> >> rxdrop zerocopy 21.9 38.5
> >> l2fwd zerocopy 17.0 20.5
> >> rxdrop copy 11.1 13.3
> >> l2fwd copy 1.9 2.0
> >>
> >> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
> >> ---------------------------------------------
> >> no-skip-bpf skip-bpf
> >> ---------------------------------------------
> >> rxdrop zerocopy 15.4 29.0
> >> l2fwd zerocopy 11.8 18.2
> >> rxdrop copy 8.2 10.5
> >> l2fwd copy 1.7 1.7
> >> ---------------------------------------------
> >>
> >
> > This work is somewhat similar to the XDP_ATTACH work [1]. Avoiding the
> > retpoline in the XDP program call is a nice performance boost! I like
> > the numbers! :-) I also like the idea of adding a flag that just does
> > what most AF_XDP Rx users want -- just getting all packets of a
> > certain queue into the XDP sockets.
> >
> > In addition to Toke's mail, I have some more concerns with the series:
> >
> > * AFAIU the SKIP_BPF only works for zero-copy enabled sockets. IMO, it
> > should work for all modes (including XDP_SKB).
>
> This patch enables SKIP_BPF for AF_XDP sockets where an XDP program is
> attached at driver level (both zerocopy and copy modes)
> I tried a quick hack to see the perf benefit with generic XDP mode, but
> i didn't see any significant improvement in performance in that
> scenario. so i didn't include that mode.
>
> >
> > * In order to work, a user still needs an XDP program running. That's
> > clunky. I'd like the behavior that if no XDP program is attached,
> > and the option is set, the packets for a that queue end up in the
> > socket. If there's an XDP program attached, the program has
> > precedence.
>
> I think this would require more changes in the drivers to take XDP
> datapath even when there is no XDP program loaded.
>
Today, from a driver perspective, to enable XDP you pass a struct
bpf_prog pointer via the ndo_bpf. The program get executed in
BPF_PROG_RUN (via bpf_prog_run_xdp) from include/linux/filter.h.
I think it's possible to achieve what you're doing w/o *any* driver
modification. Pass a special, invalid, pointer to the driver (say
(void *)0x1 or smth more elegant), which has a special handling in
BPF_RUN_PROG e.g. setting a per-cpu state and return XDP_REDIRECT. The
per-cpu state is picked up in xdp_do_redirect and xdp_flush.
An approach like this would be general, and apply to all modes
automatically.
Thoughts?
> >
> > * It requires changes in all drivers. Not nice, and scales badly. Try
> > making it generic (xdp_do_redirect/xdp_flush), so it Just Works for
> > all XDP capable drivers.
>
> I tried to make this as generic as possible and make the changes to the
> driver very minimal, but could not find a way to avoid any changes at
> all to the driver. xdp_do_direct() gets called based after the call to
> bpf_prog_run_xdp() in the drivers.
>
> >
> > Thanks for working on this!
> >
> >
> > Björn
> >
> > [1]
> > https://lore.kernel.org/netdev/20181207114431.18038-1-bjorn.topel@gmail.com/
> >
> >
> >
> >> Sridhar Samudrala (5):
> >> xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
> >> xsk: Introduce XDP_SKIP_BPF bind option
> >> i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
> >> ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
> >> xdpsock_user: Add skip_bpf option
> >>
> >> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++-
> >> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +++
> >> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++-
> >> drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 ++++++-
> >> include/net/xdp_sock.h | 21 ++++++++-
> >> include/uapi/linux/if_xdp.h | 1 +
> >> include/uapi/linux/xdp_diag.h | 1 +
> >> net/xdp/xdp_umem.c | 9 ++--
> >> net/xdp/xsk.c | 43 ++++++++++++++++---
> >> net/xdp/xsk_diag.c | 5 ++-
> >> samples/bpf/xdpsock_user.c | 8 ++++
> >> 11 files changed, 135 insertions(+), 17 deletions(-)
> >>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
^ permalink raw reply
* Re: [PATCH v2 00/10] Add definition for the number of standard PCI BARs
From: Bjorn Helgaas @ 2019-08-16 13:35 UTC (permalink / raw)
To: Andrew Murray
Cc: Denis Efremov, linux-kernel, linux-pci, Sebastian Ott,
Gerald Schaefer, H. Peter Anvin, Giuseppe Cavallaro,
Alexandre Torgue, Matt Porter, Alexandre Bounine, Peter Jones,
Bartlomiej Zolnierkiewicz, Cornelia Huck, Alex Williamson,
Jose Abreu, kvm, linux-fbdev, netdev, x86, linux-s390
In-Reply-To: <20190816105128.GD14111@e119886-lin.cambridge.arm.com>
On Fri, Aug 16, 2019 at 11:51:28AM +0100, Andrew Murray wrote:
> On Fri, Aug 16, 2019 at 12:24:27PM +0300, Denis Efremov wrote:
> > Code that iterates over all standard PCI BARs typically uses
> > PCI_STD_RESOURCE_END, but this is error-prone because it requires
> > "i <= PCI_STD_RESOURCE_END" rather than something like
> > "i < PCI_STD_NUM_BARS". We could add such a definition and use it the same
> > way PCI_SRIOV_NUM_BARS is used. There is already the definition
> > PCI_BAR_COUNT for s390 only. Thus, this patchset introduces it globally.
> >
> > Changes in v2:
> > - Reverse checks in pci_iomap_range,pci_iomap_wc_range.
> > - Refactor loops in vfio_pci to keep PCI_STD_RESOURCES.
> > - Add 2 new patches to replace the magic constant with new define.
> > - Split net patch in v1 to separate stmmac and dwc-xlgmac patches.
> >
> > Denis Efremov (10):
> > PCI: Add define for the number of standard PCI BARs
> > s390/pci: Loop using PCI_STD_NUM_BARS
> > x86/PCI: Loop using PCI_STD_NUM_BARS
> > stmmac: pci: Loop using PCI_STD_NUM_BARS
> > net: dwc-xlgmac: Loop using PCI_STD_NUM_BARS
> > rapidio/tsi721: Loop using PCI_STD_NUM_BARS
> > efifb: Loop using PCI_STD_NUM_BARS
> > vfio_pci: Loop using PCI_STD_NUM_BARS
> > PCI: hv: Use PCI_STD_NUM_BARS
> > PCI: Use PCI_STD_NUM_BARS
> >
> > arch/s390/include/asm/pci.h | 5 +----
> > arch/s390/include/asm/pci_clp.h | 6 +++---
> > arch/s390/pci/pci.c | 16 ++++++++--------
> > arch/s390/pci/pci_clp.c | 6 +++---
> > arch/x86/pci/common.c | 2 +-
> > drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 ++--
> > drivers/net/ethernet/synopsys/dwc-xlgmac-pci.c | 2 +-
> > drivers/pci/controller/pci-hyperv.c | 10 +++++-----
> > drivers/pci/pci.c | 11 ++++++-----
> > drivers/pci/quirks.c | 4 ++--
> > drivers/rapidio/devices/tsi721.c | 2 +-
> > drivers/vfio/pci/vfio_pci.c | 11 +++++++----
> > drivers/vfio/pci/vfio_pci_config.c | 10 ++++++----
> > drivers/vfio/pci/vfio_pci_private.h | 4 ++--
> > drivers/video/fbdev/efifb.c | 2 +-
> > include/linux/pci.h | 2 +-
> > include/uapi/linux/pci_regs.h | 1 +
> > 17 files changed, 51 insertions(+), 47 deletions(-)
>
> I've come across a few more places where this change can be made. There
> may be multiple instances in the same file, but only the first is shown
> below:
>
> drivers/misc/pci_endpoint_test.c: for (bar = BAR_0; bar <= BAR_5; bar++) {
> drivers/net/ethernet/intel/e1000/e1000_main.c: for (i = BAR_1; i <= BAR_5; i++) {
> drivers/net/ethernet/intel/ixgb/ixgb_main.c: for (i = BAR_1; i <= BAR_5; i++) {
> drivers/pci/controller/dwc/pci-dra7xx.c: for (bar = BAR_0; bar <= BAR_5; bar++)
> drivers/pci/controller/dwc/pci-layerscape-ep.c: for (bar = BAR_0; bar <= BAR_5; bar++)
> drivers/pci/controller/dwc/pcie-artpec6.c: for (bar = BAR_0; bar <= BAR_5; bar++)
> drivers/pci/controller/dwc/pcie-designware-plat.c: for (bar = BAR_0; bar <= BAR_5; bar++)
> drivers/pci/endpoint/functions/pci-epf-test.c: for (bar = BAR_0; bar <= BAR_5; bar++) {
> include/linux/pci-epc.h: u64 bar_fixed_size[BAR_5 + 1];
> drivers/scsi/pm8001/pm8001_hwi.c: for (bar = 0; bar < 6; bar++) {
> drivers/scsi/pm8001/pm8001_init.c: for (bar = 0; bar < 6; bar++) {
> drivers/ata/sata_nv.c: for (bar = 0; bar < 6; bar++)
> drivers/video/fbdev/core/fbmem.c: for (idx = 0, bar = 0; bar < PCI_ROM_RESOURCE; bar++) {
> drivers/staging/gasket/gasket_core.c: for (i = 0; i < GASKET_NUM_BARS; i++) {
> drivers/tty/serial/8250/8250_pci.c: for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) { <-----------
Thanks, I agree, these look like good candidates as well.
> It looks like BARs are often iterated with PCI_NUM_BAR_RESOURCES, there
> are a load of these too found with:
>
> git grep PCI_ROM_RESOURCE | grep "< "
Good point, those are slightly questionable and I'd change those too.
Bjorn
^ permalink raw reply
* Re: [GIT PULL] Keys: Set 4 - Key ACLs for 5.3
From: David Howells @ 2019-08-16 13:36 UTC (permalink / raw)
To: Mimi Zohar
Cc: dhowells, Linus Torvalds, James Morris, keyrings, Netdev,
linux-nfs, CIFS, linux-afs, linux-fsdevel, linux-integrity,
LSM List, Linux List Kernel Mailing
In-Reply-To: <1562814435.4014.11.camel@linux.ibm.com>
Mimi Zohar <zohar@linux.ibm.com> wrote:
> Sorry for the delay. An exception is needed for loading builtin keys
> "KEY_ALLOC_BUILT_IN" onto a keyring that is not writable by userspace.
> The following works, but probably is not how David would handle the
> exception.
I think the attached is the right way to fix it.
load_system_certificate_list(), for example, when it creates keys does this:
key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1),
marking the keyring as "possessed" in make_key_ref(). This allows the
possessor permits to be used - and that's the *only* way to use them for
internal keyrings like this because you can't link to them and you can't join
them.
David
---
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 57be78b5fdfc..1f8f26f7bb05 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -99,7 +99,7 @@ static __init int system_trusted_keyring_init(void)
builtin_trusted_keys =
keyring_alloc(".builtin_trusted_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- &internal_key_acl, KEY_ALLOC_NOT_IN_QUOTA,
+ &internal_keyring_acl, KEY_ALLOC_NOT_IN_QUOTA,
NULL, NULL);
if (IS_ERR(builtin_trusted_keys))
panic("Can't allocate builtin trusted keyring\n");
diff --git a/security/keys/permission.c b/security/keys/permission.c
index fc84d9ef6239..86efd3eaf083 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -47,7 +47,7 @@ struct key_acl internal_keyring_acl = {
.usage = REFCOUNT_INIT(1),
.nr_ace = 2,
.aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH),
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH),
}
};
^ permalink raw reply related
* linux-next: Fixes tag needs some work in the bpf-next tree
From: Stephen Rothwell @ 2019-08-16 13:46 UTC (permalink / raw)
To: Daniel Borkmann, Alexei Starovoitov, Networking
Cc: Linux Next Mailing List, Linux Kernel Mailing List,
Quentin Monnet, Alexei Starovoitov
[-- Attachment #1: Type: text/plain, Size: 535 bytes --]
Hi all,
In commit
ed4a3983cd3e ("tools: bpftool: fix argument for p_err() in BTF do_dump()")
Fixes tag
Fixes: c93cc69004dt ("bpftool: add ability to dump BTF types")
has these problem(s):
- missing space between the SHA1 and the subject
This is dues to the trailing 't' on the SHA1 :-(
- SHA1 should be at least 12 digits long
Can be fixed by setting core.abbrev to 12 (or more) or (for git v2.11
or later) just making sure it is not set (or set to "auto").
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: r8169: Performance regression and latency instability
From: Holger Hoffstätte @ 2019-08-16 13:59 UTC (permalink / raw)
To: Eric Dumazet, Juliana Rodrigueiro, netdev; +Cc: hkallweit1
In-Reply-To: <217e3fa9-7782-08c7-1f2b-8dabacaa83f9@gmail.com>
On 8/16/19 2:35 PM, Eric Dumazet wrote:
..snip..
> I also see this relevant commit : I have no idea why SG would have any relation with TSO.
>
> commit a7eb6a4f2560d5ae64bfac98d79d11378ca2de6c
> Author: Holger Hoffstätte <holger@applied-asynchrony.com>
> Date: Fri Aug 9 00:02:40 2019 +0200
>
> r8169: fix performance issue on RTL8168evl
>
> Disabling TSO but leaving SG active results is a significant
> performance drop. Therefore disable also SG on RTL8168evl.
> This restores the original performance.
>
> Fixes: 93681cd7d94f ("r8169: enable HW csum and TSO")
> Signed-off-by: Holger Hoffstätte <holger@applied-asynchrony.com>
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
It does not - and admittedly none of this makes sense, but stay with me here.
The commit 93681cd7d94f to net-next enabled rx/tx HW checksumming and TSO
by default, but disabled TSO for one specific chip revision - the most popular
one, of course. Enabling rx/tx checksums by default while leaving SG on turned
out to be the performance issue (~780 MBit max) that I found & fixed in the
quoted commit. SG *can* be enabled when rx/tx checkusmming is *dis*abled
(I just verified again), we just had to sanitize the new default.
An alternative strategy could still be to (again?) disable everything by default
and just let people manually enable whatever settings work for their random
chip revision + BIOS combination. I'll let Heiner chime in here.
Basically these chips are dumpster fires and should not be used for anything
ever, which of course means they are everywhere.
AFAICT none of this has anything to do with Juliana's problem..
-h
^ permalink raw reply
* Re: [RFC PATCH net-next 03/11] spi: Add a PTP system timestamp to the transfer structure
From: Vladimir Oltean @ 2019-08-16 14:05 UTC (permalink / raw)
To: Mark Brown
Cc: Hubert Feurstein, mlichvar, Richard Cochran, Andrew Lunn,
Florian Fainelli, linux-spi, netdev
In-Reply-To: <20190816125820.GF4039@sirena.co.uk>
On Fri, 16 Aug 2019 at 15:58, Mark Brown <broonie@kernel.org> wrote:
>
> On Fri, Aug 16, 2019 at 03:35:30PM +0300, Vladimir Oltean wrote:
> > On Fri, 16 Aug 2019 at 15:18, Mark Brown <broonie@kernel.org> wrote:
> > > On Fri, Aug 16, 2019 at 03:44:41AM +0300, Vladimir Oltean wrote:
>
> > > > @@ -842,6 +843,9 @@ struct spi_transfer {
> > > >
> > > > u32 effective_speed_hz;
> > > >
> > > > + struct ptp_system_timestamp *ptp_sts;
> > > > + unsigned int ptp_sts_word_offset;
> > > > +
>
> > > You've not documented these fields at all so it's not clear what the
> > > intended usage is.
>
> > Thanks for looking into this.
> > Indeed I didn't document them as the patch is part of a RFC and I
> > thought the purpose was more clear from the context (cover letter
> > etc).
> > If I do ever send a patchset for submission I will document the newly
> > introduced fields properly.
>
> The issue I'm having is that I have zero idea about the PTP API so I've
> got nothing to go on when thinking about if this approach makes any
> sense unless I go do some research.
>
> > So let me clarify:
> > The SPI slave device driver is populating these fields to indicate to
> > the controller driver that it wants word number @ptp_sts_word_offset
> > from the tx buffer snapshotted. The controller driver is supposed to
> > put the snapshot into the @ptp_sts field, which is a pointer to a
> > memory location under the control of the SPI slave device driver.
>
> Snapshot here basically meaning recording a timestamp? This interface
> does seem like it basically precludes DMA based controllers from using
> it unless someone happened to implement some very specific stuff in
> hardware which seems implausible. I'd be inclined to just require that
> users can only snapshot the first (and possibly also the last, though
> DMA completions make that fun) word of a transfer, we could then pull
> this out into the core a bit by providing a wrapper function drivers
> should call at the appropriate moment.
>
I'm not sure how to respond to this, because I don't know anything
about the timing of DMA transfers.
Maybe snapshotting DMA transfers the same way is not possible (if at
all). Maybe they are not exactly adequate for this sort of application
anyway. Maybe it depends.
But the switch I'm working on is issuing an internal read transaction
of the PTP timer exactly at the 4th-to-last bit of the 3rd byte. This
is so that it has time (4 SPI clock cycles, to be precise) for the
result of the read transaction to become available again to the SPI
block, for output. It is impossible to know exactly when the switch
will snapshot the time internally (because there are several clock
domain crossings from the SPI interface towards its core) but for
certain it takes place during the latter part of the 3rd SPI byte. I
believe other devices are similar in this regard.
In other words, from a purely performance perspective, I am against
limiting the API to just snapshotting the first and last byte. At this
level of "zoom", if I change the offset of the byte to anything other
than 3, the synchronization offset refuses to converge towards zero,
because the snapshot is incurring a constant offset that the servo
loop from userspace (phc2sys) can't compensate for.
Maybe the SPI master driver should just report what sort of
snapshotting capability it can offer, ranging from none (default
unless otherwise specified), to transfer-level (DMA style) or
byte-level.
I'm afraid more actual experimentation is needed with DMA-based
controllers to understand what can be expected from them, and as a
result, how the API should map around them.
MDIO bus controllers are in a similar situation (with Hubert's patch)
but at least there the frame size is fixed and I haven't heard of an
MDIO controller to use DMA.
I'm not really sure what the next step would be. In the other thread,
Richard Cochran mentioned something about a two-part write API,
although I didn't quite understand the idea behind it.
> > It is ok if the ptp_sts pointer is NULL (no need to check), because
> > the API for taking snapshots already checks for that.
> > At the moment there is yet no proposed mechanism for the SPI slave
> > driver to ensure that the controller will really act upon this
> > request. That would be really nice to have, since some SPI slave
> > devices are time-sensitive and warning early is a good way to prevent
> > unnecessary troubleshooting.
>
> Yes, that's one of the things I was thinking about looking at the series
> - we should at least be able to warn if we can't timestamp so nobody
> gets confused, possibly error out if the calling code particularly
> depends on it.
Regards,
-Vladimir
^ permalink raw reply
* [PATCH] rtlwifi: remove unused variables 'RTL8712_SDIO_EFUSE_TABLE' and 'MAX_PGPKT_SIZE'
From: YueHaibing @ 2019-08-16 14:05 UTC (permalink / raw)
To: pkshih, kvalo, davem; +Cc: linux-kernel, netdev, linux-wireless, YueHaibing
drivers/net/wireless/realtek/rtlwifi/efuse.c:16:31:
warning: RTL8712_SDIO_EFUSE_TABLE defined but not used [-Wunused-const-variable=]
drivers/net/wireless/realtek/rtlwifi/efuse.c:9:17:
warning: MAX_PGPKT_SIZE defined but not used [-Wunused-const-variable=]
They are never used, so can be removed.
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
drivers/net/wireless/realtek/rtlwifi/efuse.c | 17 -----------------
1 file changed, 17 deletions(-)
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c b/drivers/net/wireless/realtek/rtlwifi/efuse.c
index ea4fc53..2646672 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
@@ -6,29 +6,12 @@
#include "pci.h"
#include <linux/export.h>
-static const u8 MAX_PGPKT_SIZE = 9;
static const u8 PGPKT_DATA_SIZE = 8;
static const int EFUSE_MAX_SIZE = 512;
#define START_ADDRESS 0x1000
#define REG_MCUFWDL 0x0080
-static const struct efuse_map RTL8712_SDIO_EFUSE_TABLE[] = {
- {0, 0, 0, 2},
- {0, 1, 0, 2},
- {0, 2, 0, 2},
- {1, 0, 0, 1},
- {1, 0, 1, 1},
- {1, 1, 0, 1},
- {1, 1, 1, 3},
- {1, 3, 0, 17},
- {3, 3, 1, 48},
- {10, 0, 0, 6},
- {10, 3, 0, 1},
- {10, 3, 1, 1},
- {11, 0, 0, 28}
-};
-
static const struct rtl_efuse_ops efuse_ops = {
.efuse_onebyte_read = efuse_one_byte_read,
.efuse_logical_map_read = efuse_shadow_read,
--
2.7.4
^ permalink raw reply related
* Re: linux-next: Signed-off-by missing for commits in the net-next tree
From: Gerd Rausch @ 2019-08-16 14:10 UTC (permalink / raw)
To: Andy Grover, Stephen Rothwell, David Miller, Networking,
Chris Mason
Cc: Linux Next Mailing List, Linux Kernel Mailing List, Andy Grover,
Chris Mason
In-Reply-To: <e85146f3-93a0-b23f-6a6e-11e42815946d@groveronline.com>
Hi,
On 16/08/2019 02.15, Andy Grover wrote:
> On 8/16/19 3:06 PM, Gerd Rausch wrote:
>> Hi,
>>
>> Just added the e-mail addresses I found using a simple "google search",
>> in order to reach out to the original authors of these commits:
>> Chris Mason and Andy Grover.
>>
>> I'm hoping they still remember their work from 7-8 years ago.
>
> Yes looks like what I was working on. What did you need from me? It's
> too late to amend the commitlogs...
>
I'll let Stephen or David respond to what (if any) action is necessary.
The missing Signed-off-by was pointed out to me by Stephen yesterday.
Hence I tried to locate you guys to pull you into the loop in order to
not leave his concern unanswered.
Thanks,
Gerd
^ permalink raw reply
* Re: [PATCH RFC ipsec-next 0/7] ipsec: add TCP encapsulation support (RFC 8229)
From: Sabrina Dubroca @ 2019-08-16 14:18 UTC (permalink / raw)
To: netdev, Steffen Klassert; +Cc: Herbert Xu
In-Reply-To: <cover.1561457281.git.sd@queasysnail.net>
Hi Steffen,
2019-06-25, 12:11:33 +0200, Sabrina Dubroca wrote:
> This patchset introduces support for TCP encapsulation of IKE and ESP
> messages, as defined by RFC 8229 [0]. It is an evolution of what
> Herbert Xu proposed in January 2018 [1] that addresses the main
> criticism against it, by not interfering with the TCP implementation
> at all. The networking stack now has infrastructure for this: TCP ULPs
> and Stream Parsers.
Have you had a chance to look at this? I was going to rebase and
resend, but the patches still apply to ipsec-next and net-next (patch
2 is already in net-next as commit bd95e678e0f6).
Thanks,
--
Sabrina
^ permalink raw reply
* Re: linux-next: Signed-off-by missing for commits in the net-next tree
From: Stephen Rothwell @ 2019-08-16 14:31 UTC (permalink / raw)
To: Gerd Rausch
Cc: Andy Grover, David Miller, Networking, Chris Mason,
Linux Next Mailing List, Linux Kernel Mailing List, Andy Grover,
Chris Mason
In-Reply-To: <15078f1f-a036-2a54-1a07-9197f81bd58f@oracle.com>
[-- Attachment #1: Type: text/plain, Size: 1099 bytes --]
Hi all,
On Fri, 16 Aug 2019 07:10:34 -0700 Gerd Rausch <gerd.rausch@oracle.com> wrote:
>
> On 16/08/2019 02.15, Andy Grover wrote:
> > On 8/16/19 3:06 PM, Gerd Rausch wrote:
> >>
> >> Just added the e-mail addresses I found using a simple "google search",
> >> in order to reach out to the original authors of these commits:
> >> Chris Mason and Andy Grover.
> >>
> >> I'm hoping they still remember their work from 7-8 years ago.
> >
> > Yes looks like what I was working on. What did you need from me? It's
> > too late to amend the commitlogs...
Yeah, Dave doesn't rebase his trees.
> I'll let Stephen or David respond to what (if any) action is necessary.
>
> The missing Signed-off-by was pointed out to me by Stephen yesterday.
>
> Hence I tried to locate you guys to pull you into the loop in order to
> not leave his concern unanswered.
It is OK for SOBs to be missing, I just wanted to make sure that it was
OK in this instance. (Its better that I ask when its OK then not to
ask and find something has gone wrong.)
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: linux-next: Fixes tag needs some work in the bpf-next tree
From: Quentin Monnet @ 2019-08-16 14:35 UTC (permalink / raw)
To: Stephen Rothwell, Daniel Borkmann, Alexei Starovoitov, Networking
Cc: Linux Next Mailing List, Linux Kernel Mailing List
In-Reply-To: <20190816234613.351ddf07@canb.auug.org.au>
2019-08-16 23:46 UTC+1000 ~ Stephen Rothwell <sfr@canb.auug.org.au>
> Hi all,
>
> In commit
>
> ed4a3983cd3e ("tools: bpftool: fix argument for p_err() in BTF do_dump()")
>
> Fixes tag
>
> Fixes: c93cc69004dt ("bpftool: add ability to dump BTF types")
>
> has these problem(s):
>
> - missing space between the SHA1 and the subject
>
> This is dues to the trailing 't' on the SHA1 :-(
>
> - SHA1 should be at least 12 digits long
> Can be fixed by setting core.abbrev to 12 (or more) or (for git v2.11
> or later) just making sure it is not set (or set to "auto").
>
Hi Stephen,
I made that typo, please accept my apologies :(.
The correct tag should be:
Fixes: c93cc69004df ("bpftool: add ability to dump BTF types")
Regards,
Quentin
^ permalink raw reply
* RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-16 14:48 UTC (permalink / raw)
To: vkuznets, sashal@kernel.org, davem@davemloft.net,
saeedm@mellanox.com, leon@kernel.org, eranbe@mellanox.com,
lorenzo.pieralisi@arm.com, bhelgaas@google.com,
linux-pci@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org
Cc: KY Srinivasan, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <878srt8fd8.fsf@vitty.brq.redhat.com>
> -----Original Message-----
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Friday, August 16, 2019 8:28 AM
> To: Haiyang Zhang <haiyangz@microsoft.com>; sashal@kernel.org;
> davem@davemloft.net; saeedm@mellanox.com; leon@kernel.org;
> eranbe@mellanox.com; lorenzo.pieralisi@arm.com; bhelgaas@google.com;
> linux-pci@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org
> Cc: Haiyang Zhang <haiyangz@microsoft.com>; KY Srinivasan
> <kys@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for
> software backchannel interface
>
> Haiyang Zhang <haiyangz@microsoft.com> writes:
>
> > This mini driver is a helper driver allows other drivers to have a
> > common interface with the Hyper-V PCI frontend driver.
> >
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> > ---
> > MAINTAINERS | 1 +
> > drivers/pci/Kconfig | 1 +
> > drivers/pci/controller/Kconfig | 7 ++++
> > drivers/pci/controller/Makefile | 1 +
> > drivers/pci/controller/pci-hyperv-mini.c | 70
> ++++++++++++++++++++++++++++++++
> > drivers/pci/controller/pci-hyperv.c | 12 ++++--
> > include/linux/hyperv.h | 30 ++++++++++----
> > 7 files changed, 111 insertions(+), 11 deletions(-) create mode
> > 100644 drivers/pci/controller/pci-hyperv-mini.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index e352550..c4962b9 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -7453,6 +7453,7 @@ F: drivers/hid/hid-hyperv.c
> > F: drivers/hv/
> > F: drivers/input/serio/hyperv-keyboard.c
> > F: drivers/pci/controller/pci-hyperv.c
> > +F: drivers/pci/controller/pci-hyperv-mini.c
> > F: drivers/net/hyperv/
> > F: drivers/scsi/storvsc_drv.c
> > F: drivers/uio/uio_hv_generic.c
> > diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index
> > 2ab9240..bb852f5 100644
> > --- a/drivers/pci/Kconfig
> > +++ b/drivers/pci/Kconfig
> > @@ -182,6 +182,7 @@ config PCI_LABEL
> > config PCI_HYPERV
> > tristate "Hyper-V PCI Frontend"
> > depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
> &&
> > X86_64
> > + select PCI_HYPERV_MINI
> > help
> > The PCI device frontend driver allows the kernel to import arbitrary
> > PCI devices from a PCI backend to support PCI driver domains.
> > diff --git a/drivers/pci/controller/Kconfig
> > b/drivers/pci/controller/Kconfig index fe9f9f1..8e31cba 100644
> > --- a/drivers/pci/controller/Kconfig
> > +++ b/drivers/pci/controller/Kconfig
> > @@ -281,5 +281,12 @@ config VMD
> > To compile this driver as a module, choose M here: the
> > module will be called vmd.
> >
> > +config PCI_HYPERV_MINI
> > + tristate "Hyper-V PCI Mini"
> > + depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
> && X86_64
> > + help
> > + The Hyper-V PCI Mini is a helper driver allows other drivers to
> > + have a common interface with the Hyper-V PCI frontend driver.
> > +
>
> Out of pure curiosity, why not just export this interface from PCI_HYPERV
> directly? Why do we need this stub?
The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other
drivers like MLX5e will have symbolic dependency of pci_hyperv if they
use functions exported by pci_hyperv. This dependency will cause other
drivers fail to load on other platforms, like VMs on KVM. So we created
this mini driver, which can be loaded on any platforms to provide the
symbolic dependency.
Thanks,
- Haiyang
^ permalink raw reply
* Re: [PATCH] arm64: do_csum: implement accelerated scalar version
From: Robin Murphy @ 2019-08-16 14:55 UTC (permalink / raw)
To: Shaokun Zhang, Will Deacon
Cc: Ard Biesheuvel, linux-arm-kernel, netdev, ilias.apalodimas,
huanglingyan (A), steve.capper
In-Reply-To: <37fbc2a3-069d-9f75-f3d0-3eda2efa5c9b@hisilicon.com>
On 16/08/2019 09:15, Shaokun Zhang wrote:
> Hi Will,
>
> On 2019/8/16 0:46, Will Deacon wrote:
>> On Thu, May 16, 2019 at 11:14:35AM +0800, Zhangshaokun wrote:
>>> On 2019/5/15 17:47, Will Deacon wrote:
>>>> On Mon, Apr 15, 2019 at 07:18:22PM +0100, Robin Murphy wrote:
>>>>> On 12/04/2019 10:52, Will Deacon wrote:
>>>>>> I'm waiting for Robin to come back with numbers for a C implementation.
>>>>>>
>>>>>> Robin -- did you get anywhere with that?
>>>>>
>>>>> Still not what I would call finished, but where I've got so far (besides an
>>>>> increasingly elaborate test rig) is as below - it still wants some unrolling
>>>>> in the middle to really fly (and actual testing on BE), but the worst-case
>>>>> performance already equals or just beats this asm version on Cortex-A53 with
>>>>> GCC 7 (by virtue of being alignment-insensitive and branchless except for
>>>>> the loop). Unfortunately, the advantage of C code being instrumentable does
>>>>> also come around to bite me...
>>>>
>>>> Is there any interest from anybody in spinning a proper patch out of this?
>>>> Shaokun?
>>>
>>> HiSilicon's Kunpeng920(Hi1620) benefits from do_csum optimization, if Ard and
>>> Robin are ok, Lingyan or I can try to do it.
>>> Of course, if any guy posts the patch, we are happy to test it.
>>> Any will be ok.
>>
>> I don't mind who posts it, but Robin is super busy with SMMU stuff at the
>> moment so it probably makes more sense for you or Lingyan to do it.
>
> Thanks for restarting this topic, I or Lingyan will do it soon.
FWIW, I've rolled up what I had so far and dumped it up into a quick
semi-realistic patch here:
http://linux-arm.org/git?p=linux-rm.git;a=commitdiff;h=859c5566510c32ae72039aa5072e932a771a3596
So far I'd put most of the effort into the aforementioned benchmarking
harness to compare performance and correctness for all the proposed
implementations over all reasonable alignment/length combinations - I
think that got pretty much finished, but as Will says I'm unlikely to
find time to properly look at this again for several weeks.
Robin.
^ permalink raw reply
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
From: Vlad Buslov @ 2019-08-16 15:04 UTC (permalink / raw)
To: wenxu
Cc: Vlad Buslov, Jakub Kicinski, David Miller, Jiri Pirko,
pablo@netfilter.org, netfilter-devel@vger.kernel.org,
netdev@vger.kernel.org
In-Reply-To: <f28ddefe-a7d8-e5ad-e03e-08cfee4db147@ucloud.cn>
On Wed 14 Aug 2019 at 05:50, wenxu <wenxu@ucloud.cn> wrote:
> On 8/12/2019 10:11 PM, Vlad Buslov wrote:
>>
>>> +static void flow_block_ing_cmd(struct net_device *dev,
>>> + flow_indr_block_bind_cb_t *cb,
>>> + void *cb_priv,
>>> + enum flow_block_command command)
>>> +{
>>> + struct flow_indr_block_ing_entry *entry;
>>> +
>>> + rcu_read_lock();
>>> + list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
>>> + entry->cb(dev, cb, cb_priv, command);
>>> + }
>>> + rcu_read_unlock();
>>> +}
>> Hi,
>>
>> I'm getting following incorrect rcu usage warnings with this patch
>> caused by rcu_read_lock in flow_block_ing_cmd:
>>
>> [ 401.510948] =============================
>> [ 401.510952] WARNING: suspicious RCU usage
>> [ 401.510993] 5.3.0-rc3+ #589 Not tainted
>> [ 401.510996] -----------------------------
>> [ 401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
>> [ 401.511004]
>> other info that might help us debug this:
>>
>> [ 401.511008]
>> rcu_scheduler_active = 2, debug_locks = 1
>> [ 401.511012] 7 locks held by test-ecmp-add-v/7576:
>> [ 401.511015] #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
>> [ 401.511037] #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
>> [ 401.511051] #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
>> [ 401.511062] #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
>> [ 401.511079] #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
>> [ 401.511092] #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
>> [ 401.511101] #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
>> [ 401.511115]
>> stack backtrace:
>> [ 401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
>> [ 401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
>> [ 401.511127] Call Trace:
>> [ 401.511138] dump_stack+0x85/0xc0
>> [ 401.511146] ___might_sleep+0x100/0x180
>> [ 401.511154] __mutex_lock+0x5b/0x960
>> [ 401.511162] ? find_held_lock+0x2b/0x80
>> [ 401.511173] ? __tcf_get_next_chain+0x1d/0xb0
>> [ 401.511179] ? mark_held_locks+0x49/0x70
>> [ 401.511194] ? __tcf_get_next_chain+0x1d/0xb0
>> [ 401.511198] __tcf_get_next_chain+0x1d/0xb0
>> [ 401.511251] ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
>> [ 401.511261] tcf_block_playback_offloads+0x39/0x160
>> [ 401.511276] tcf_block_setup+0x1b0/0x240
>> [ 401.511312] ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
>> [ 401.511347] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>> [ 401.511359] tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
>> [ 401.511404] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>> [ 401.511414] flow_block_ing_cmd+0x7e/0x130
>> [ 401.511453] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>> [ 401.511462] __flow_indr_block_cb_unregister+0x7f/0xf0
>> [ 401.511502] mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
>> [ 401.511513] unregister_netdevice_notifier+0xe9/0x140
>> [ 401.511554] mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
>> [ 401.511597] mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
>> [ 401.511637] mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
>> [ 401.511679] esw_offloads_disable+0x5b/0x90 [mlx5_core]
>> [ 401.511724] mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
>> [ 401.511759] mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
>> [ 401.511794] mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
>> [ 401.511805] sriov_numvfs_store+0xf8/0x130
>> [ 401.511817] kernfs_fop_write+0x122/0x1b0
>> [ 401.511826] vfs_write+0xdb/0x1d0
>> [ 401.511835] ksys_write+0x65/0xe0
>> [ 401.511847] do_syscall_64+0x5c/0xb0
>> [ 401.511857] entry_SYSCALL_64_after_hwframe+0x49/0xbe
>> [ 401.511862] RIP: 0033:0x7fad892d30f8
>> [ 401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
>> ec 28 48 89
>> [ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
>> [ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
>> [ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
>> [ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
>> [ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
>> [ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
>>
>> I don't think it is correct approach to try to call these callbacks with
>> rcu protection because:
>>
>> - Cls API uses sleeping locks that cannot be used in rcu read section
>> (hence the included trace).
>>
>> - It assumes that all implementation of classifier ops reoffload() don't
>> sleep.
>>
>> - And that all driver offload callbacks (both block and classifier
>> setup) don't sleep, which is not the case.
>>
>> I don't see any straightforward way to fix this, besides using some
>> other locking mechanism to protect block_ing_cb_list.
>>
>> Regards,
>> Vlad
>
> Maybe get the mutex flow_indr_block_ing_cb_lock for both lookup, add, delete?
>
> the callbacks_lists. the add and delete is work only on modules init case. So the
>
> lookup is also not frequently(ony [un]register) and can protect with the locks.
That should do the job. I'll send the patch.
^ permalink raw reply
* [PATCH net-next] net: flow_offload: convert block_ing_cb_list to regular list type
From: Vlad Buslov @ 2019-08-16 15:06 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, wenxu, pablo, Vlad Buslov
RCU list block_ing_cb_list is protected by rcu read lock in
flow_block_ing_cmd() and with flow_indr_block_ing_cb_lock mutex in all
functions that use it. However, flow_block_ing_cmd() needs to call blocking
functions while iterating block_ing_cb_list which leads to following
suspicious RCU usage warning:
[ 401.510948] =============================
[ 401.510952] WARNING: suspicious RCU usage
[ 401.510993] 5.3.0-rc3+ #589 Not tainted
[ 401.510996] -----------------------------
[ 401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
[ 401.511004]
other info that might help us debug this:
[ 401.511008]
rcu_scheduler_active = 2, debug_locks = 1
[ 401.511012] 7 locks held by test-ecmp-add-v/7576:
[ 401.511015] #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
[ 401.511037] #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
[ 401.511051] #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
[ 401.511062] #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
[ 401.511079] #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
[ 401.511092] #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
[ 401.511101] #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
[ 401.511115]
stack backtrace:
[ 401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
[ 401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 401.511127] Call Trace:
[ 401.511138] dump_stack+0x85/0xc0
[ 401.511146] ___might_sleep+0x100/0x180
[ 401.511154] __mutex_lock+0x5b/0x960
[ 401.511162] ? find_held_lock+0x2b/0x80
[ 401.511173] ? __tcf_get_next_chain+0x1d/0xb0
[ 401.511179] ? mark_held_locks+0x49/0x70
[ 401.511194] ? __tcf_get_next_chain+0x1d/0xb0
[ 401.511198] __tcf_get_next_chain+0x1d/0xb0
[ 401.511251] ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
[ 401.511261] tcf_block_playback_offloads+0x39/0x160
[ 401.511276] tcf_block_setup+0x1b0/0x240
[ 401.511312] ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
[ 401.511347] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[ 401.511359] tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
[ 401.511404] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[ 401.511414] flow_block_ing_cmd+0x7e/0x130
[ 401.511453] ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[ 401.511462] __flow_indr_block_cb_unregister+0x7f/0xf0
[ 401.511502] mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
[ 401.511513] unregister_netdevice_notifier+0xe9/0x140
[ 401.511554] mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
[ 401.511597] mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
[ 401.511637] mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
[ 401.511679] esw_offloads_disable+0x5b/0x90 [mlx5_core]
[ 401.511724] mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
[ 401.511759] mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
[ 401.511794] mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
[ 401.511805] sriov_numvfs_store+0xf8/0x130
[ 401.511817] kernfs_fop_write+0x122/0x1b0
[ 401.511826] vfs_write+0xdb/0x1d0
[ 401.511835] ksys_write+0x65/0xe0
[ 401.511847] do_syscall_64+0x5c/0xb0
[ 401.511857] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 401.511862] RIP: 0033:0x7fad892d30f8
[ 401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
ec 28 48 89
[ 401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
[ 401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
[ 401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
[ 401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
[ 401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
To fix the described incorrect RCU usage, convert block_ing_cb_list from
RCU list to regular list and protect it with flow_indr_block_ing_cb_lock
mutex in flow_block_ing_cmd().
Fixes: 1150ab0f1b33 ("flow_offload: support get multi-subsystem block")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
net/core/flow_offload.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 64c3d4d72b9c..cf52d9c422fa 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -391,6 +391,8 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
kfree(indr_block_cb);
}
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+
static void flow_block_ing_cmd(struct net_device *dev,
flow_indr_block_bind_cb_t *cb,
void *cb_priv,
@@ -398,11 +400,11 @@ static void flow_block_ing_cmd(struct net_device *dev,
{
struct flow_indr_block_ing_entry *entry;
- rcu_read_lock();
- list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_for_each_entry(entry, &block_ing_cb_list, list) {
entry->cb(dev, cb, cb_priv, command);
}
- rcu_read_unlock();
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
}
int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -497,11 +499,10 @@ void flow_indr_block_call(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(flow_indr_block_call);
-static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
{
mutex_lock(&flow_indr_block_ing_cb_lock);
- list_add_tail_rcu(&entry->list, &block_ing_cb_list);
+ list_add_tail(&entry->list, &block_ing_cb_list);
mutex_unlock(&flow_indr_block_ing_cb_lock);
}
EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
@@ -509,7 +510,7 @@ EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
{
mutex_lock(&flow_indr_block_ing_cb_lock);
- list_del_rcu(&entry->list);
+ list_del(&entry->list);
mutex_unlock(&flow_indr_block_ing_cb_lock);
}
EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
--
2.21.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox