* [PATCH net-next 05/12] net: stmmac: Add a counter for Split Header packets
From: Jose Abreu @ 2019-08-09 18:36 UTC (permalink / raw)
To: netdev; +Cc: Joao Pinto, Jose Abreu
In-Reply-To: <cover.1565375521.git.joabreu@synopsys.com>
Add a counter that increments each time a packet with split header is
received.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
drivers/net/ethernet/stmicro/stmmac/common.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++
3 files changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 527f961579f4..1303ec81fd3d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -75,6 +75,7 @@ struct stmmac_extra_stats {
unsigned long rx_missed_cntr;
unsigned long rx_overflow_cntr;
unsigned long rx_vlan;
+ unsigned long rx_split_hdr_pkt_n;
/* Tx/Rx IRQ error info */
unsigned long tx_undeflow_irq;
unsigned long tx_process_stopped_irq;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2423160ab582..eb784fdb6d32 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -65,6 +65,7 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
STMMAC_STAT(rx_missed_cntr),
STMMAC_STAT(rx_overflow_cntr),
STMMAC_STAT(rx_vlan),
+ STMMAC_STAT(rx_split_hdr_pkt_n),
/* Tx/Rx IRQ error info */
STMMAC_STAT(tx_undeflow_irq),
STMMAC_STAT(tx_process_stopped_irq),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 61f67cd34f21..e74c1d822aaa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3498,6 +3498,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
if (!(status & rx_not_ls))
sec_len = sec_len - hlen;
len = hlen;
+
+ priv->xstats.rx_split_hdr_pkt_n++;
}
skb = netdev_alloc_skb_ip_align(priv->dev, len);
--
2.7.4
^ permalink raw reply related
* [PATCH net-next 07/12] net: stmmac: Add ethtool register dump for XGMAC cores
From: Jose Abreu @ 2019-08-09 18:36 UTC (permalink / raw)
To: netdev; +Cc: Joao Pinto, Jose Abreu
In-Reply-To: <cover.1565375521.git.joabreu@synopsys.com>
Add the ethtool interface to dump the register map in XGMAC cores.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 2 ++
.../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 11 +++++++++-
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 10 ++++++++-
.../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 24 ++++++++++++++++------
4 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index dbac63972faf..7fed3d2d4a95 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -244,6 +244,7 @@
#define XGMAC_RXOVFIS BIT(16)
#define XGMAC_ABPSIS BIT(1)
#define XGMAC_TXUNFIS BIT(0)
+#define XGMAC_MAC_REGSIZE (XGMAC_MTL_QINT_STATUS(15) / 4)
/* DMA Registers */
#define XGMAC_DMA_MODE 0x00003000
@@ -321,6 +322,7 @@
#define XGMAC_TBU BIT(2)
#define XGMAC_TPS BIT(1)
#define XGMAC_TI BIT(0)
+#define XGMAC_REGSIZE ((0x0000317c + (0x80 * 15)) / 4)
/* Descriptors */
#define XGMAC_TDES2_IOC BIT(31)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 3708cdb16ff7..2093c996a090 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -239,6 +239,15 @@ static void dwxgmac2_config_cbs(struct mac_device_info *hw,
writel(value, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue));
}
+static void dwxgmac2_dump_regs(struct mac_device_info *hw, u32 *reg_space)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ int i;
+
+ for (i = 0; i < XGMAC_MAC_REGSIZE; i++)
+ reg_space[i] = readl(ioaddr + i * 4);
+}
+
static int dwxgmac2_host_irq_status(struct mac_device_info *hw,
struct stmmac_extra_stats *x)
{
@@ -1086,7 +1095,7 @@ const struct stmmac_ops dwxgmac210_ops = {
.set_mtl_tx_queue_weight = dwxgmac2_set_mtl_tx_queue_weight,
.map_mtl_to_dma = dwxgmac2_map_mtl_to_dma,
.config_cbs = dwxgmac2_config_cbs,
- .dump_regs = NULL,
+ .dump_regs = dwxgmac2_dump_regs,
.host_irq_status = dwxgmac2_host_irq_status,
.host_mtl_irq_status = dwxgmac2_host_mtl_irq_status,
.flow_ctrl = dwxgmac2_flow_ctrl,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 0f3de4895cf7..42c13d144203 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -128,6 +128,14 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL);
}
+static void dwxgmac2_dma_dump_regs(void __iomem *ioaddr, u32 *reg_space)
+{
+ int i;
+
+ for (i = (XGMAC_DMA_MODE / 4); i < XGMAC_REGSIZE; i++)
+ reg_space[i] = readl(ioaddr + i * 4);
+}
+
static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
@@ -496,7 +504,7 @@ const struct stmmac_dma_ops dwxgmac210_dma_ops = {
.init_rx_chan = dwxgmac2_dma_init_rx_chan,
.init_tx_chan = dwxgmac2_dma_init_tx_chan,
.axi = dwxgmac2_dma_axi,
- .dump_regs = NULL,
+ .dump_regs = dwxgmac2_dma_dump_regs,
.dma_rx_mode = dwxgmac2_dma_rx_mode,
.dma_tx_mode = dwxgmac2_dma_tx_mode,
.enable_dma_irq = dwxgmac2_enable_dma_irq,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index eb784fdb6d32..8828a5481f61 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -18,10 +18,12 @@
#include "stmmac.h"
#include "dwmac_dma.h"
+#include "dwxgmac2.h"
#define REG_SPACE_SIZE 0x1060
#define MAC100_ETHTOOL_NAME "st_mac100"
#define GMAC_ETHTOOL_NAME "st_gmac"
+#define XGMAC_ETHTOOL_NAME "st_xgmac"
#define ETHTOOL_DMA_OFFSET 55
@@ -260,6 +262,8 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
if (priv->plat->has_gmac || priv->plat->has_gmac4)
strlcpy(info->driver, GMAC_ETHTOOL_NAME, sizeof(info->driver));
+ else if (priv->plat->has_xgmac)
+ strlcpy(info->driver, XGMAC_ETHTOOL_NAME, sizeof(info->driver));
else
strlcpy(info->driver, MAC100_ETHTOOL_NAME,
sizeof(info->driver));
@@ -405,23 +409,31 @@ static int stmmac_check_if_running(struct net_device *dev)
static int stmmac_ethtool_get_regs_len(struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
+
+ if (priv->plat->has_xgmac)
+ return XGMAC_REGSIZE * 4;
return REG_SPACE_SIZE;
}
static void stmmac_ethtool_gregs(struct net_device *dev,
struct ethtool_regs *regs, void *space)
{
- u32 *reg_space = (u32 *) space;
-
struct stmmac_priv *priv = netdev_priv(dev);
+ int size = stmmac_ethtool_get_regs_len(dev);
+ u32 *reg_space = (u32 *) space;
- memset(reg_space, 0x0, REG_SPACE_SIZE);
+ memset(reg_space, 0x0, size);
stmmac_dump_mac_regs(priv, priv->hw, reg_space);
stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);
- /* Copy DMA registers to where ethtool expects them */
- memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4],
- NUM_DWMAC1000_DMA_REGS * 4);
+
+ if (!priv->plat->has_xgmac) {
+ /* Copy DMA registers to where ethtool expects them */
+ memcpy(®_space[ETHTOOL_DMA_OFFSET],
+ ®_space[DMA_BUS_MODE / 4],
+ NUM_DWMAC1000_DMA_REGS * 4);
+ }
}
static int stmmac_nway_reset(struct net_device *dev)
--
2.7.4
^ permalink raw reply related
* [PATCH net-next 12/12] net: stmmac: selftests: Add selftest for VLAN TX Offload
From: Jose Abreu @ 2019-08-09 18:36 UTC (permalink / raw)
To: netdev; +Cc: Joao Pinto, Jose Abreu
In-Reply-To: <cover.1565375521.git.joabreu@synopsys.com>
Add 2 new selftests for VLAN Insertion offloading. Tests are for inner
and outer VLAN offloading.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
.../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 96 +++++++++++++++++++++-
1 file changed, 94 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index acfab86431b1..ecc8602c6799 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -296,7 +296,9 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv,
tpriv->pt.dev = priv->dev;
tpriv->pt.af_packet_priv = tpriv;
tpriv->packet = attr;
- dev_add_pack(&tpriv->pt);
+
+ if (!attr->dont_wait)
+ dev_add_pack(&tpriv->pt);
skb = stmmac_test_get_udp_skb(priv, attr);
if (!skb) {
@@ -319,7 +321,8 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv,
ret = !tpriv->ok;
cleanup:
- dev_remove_pack(&tpriv->pt);
+ if (!attr->dont_wait)
+ dev_remove_pack(&tpriv->pt);
kfree(tpriv);
return ret;
}
@@ -731,6 +734,9 @@ static int stmmac_test_vlan_validate(struct sk_buff *skb,
struct ethhdr *ehdr;
struct udphdr *uhdr;
struct iphdr *ihdr;
+ u16 proto;
+
+ proto = tpriv->double_vlan ? ETH_P_8021AD : ETH_P_8021Q;
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb)
@@ -740,6 +746,12 @@ static int stmmac_test_vlan_validate(struct sk_buff *skb,
goto out;
if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
goto out;
+ if (tpriv->vlan_id) {
+ if (skb->vlan_proto != htons(proto))
+ goto out;
+ if (skb->vlan_tci != tpriv->vlan_id)
+ goto out;
+ }
ehdr = (struct ethhdr *)skb_mac_header(skb);
if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
@@ -1084,6 +1096,78 @@ static int stmmac_test_reg_sar(struct stmmac_priv *priv)
return ret;
}
+static int stmmac_test_vlanoff_common(struct stmmac_priv *priv, bool svlan)
+{
+ struct stmmac_packet_attrs attr = { };
+ struct stmmac_test_priv *tpriv;
+ struct sk_buff *skb = NULL;
+ int ret = 0;
+ u16 proto;
+
+ if (!priv->dma_cap.vlins)
+ return -EOPNOTSUPP;
+
+ tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+ if (!tpriv)
+ return -ENOMEM;
+
+ proto = svlan ? ETH_P_8021AD : ETH_P_8021Q;
+
+ tpriv->ok = false;
+ tpriv->double_vlan = svlan;
+ init_completion(&tpriv->comp);
+
+ tpriv->pt.type = svlan ? htons(ETH_P_8021Q) : htons(ETH_P_IP);
+ tpriv->pt.func = stmmac_test_vlan_validate;
+ tpriv->pt.dev = priv->dev;
+ tpriv->pt.af_packet_priv = tpriv;
+ tpriv->packet = &attr;
+ tpriv->vlan_id = 0x123;
+ dev_add_pack(&tpriv->pt);
+
+ ret = vlan_vid_add(priv->dev, htons(proto), tpriv->vlan_id);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = priv->dev->dev_addr;
+
+ skb = stmmac_test_get_udp_skb(priv, &attr);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto vlan_del;
+ }
+
+ __vlan_hwaccel_put_tag(skb, htons(proto), tpriv->vlan_id);
+ skb->protocol = htons(proto);
+
+ skb_set_queue_mapping(skb, 0);
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ goto vlan_del;
+
+ wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+ ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+vlan_del:
+ vlan_vid_del(priv->dev, htons(proto), tpriv->vlan_id);
+cleanup:
+ dev_remove_pack(&tpriv->pt);
+ kfree(tpriv);
+ return ret;
+}
+
+static int stmmac_test_vlanoff(struct stmmac_priv *priv)
+{
+ return stmmac_test_vlanoff_common(priv, false);
+}
+
+static int stmmac_test_svlanoff(struct stmmac_priv *priv)
+{
+ if (!priv->dma_cap.dvlan)
+ return -EOPNOTSUPP;
+ return stmmac_test_vlanoff_common(priv, true);
+}
+
#define STMMAC_LOOPBACK_NONE 0
#define STMMAC_LOOPBACK_MAC 1
#define STMMAC_LOOPBACK_PHY 2
@@ -1161,6 +1245,14 @@ static const struct stmmac_test {
.name = "SA Replacement (reg)",
.lb = STMMAC_LOOPBACK_PHY,
.fn = stmmac_test_reg_sar,
+ }, {
+ .name = "VLAN TX Insertion ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_vlanoff,
+ }, {
+ .name = "SVLAN TX Insertion ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_svlanoff,
},
};
--
2.7.4
^ permalink raw reply related
* Re: [PATCH v2 2/2] tcp: Update TCP_BASE_MSS comment
From: Josh Hunt @ 2019-08-09 18:38 UTC (permalink / raw)
To: netdev, davem; +Cc: Eric Dumazet, edumazet, ncardwell
In-Reply-To: <c1c2febd-742d-4e13-af9f-a7d7ec936ed9@gmail.com>
On 8/7/19 11:13 PM, Eric Dumazet wrote:
>
>
> On 8/8/19 1:52 AM, Josh Hunt wrote:
>> TCP_BASE_MSS is used as the default initial MSS value when MTU probing is
>> enabled. Update the comment to reflect this.
>>
>> Suggested-by: Neal Cardwell <ncardwell@google.com>
>> Signed-off-by: Josh Hunt <johunt@akamai.com>
>> ---
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
>
Dave
I forgot to tag these at net-next. Do I need to resubmit a v3 with
net-next in the subject?
Thanks
Josh
^ permalink raw reply
* Re: [Potential Spoof] Re: [PATCH net-next] net/ncsi: allow to customize BMC MAC Address offset
From: Vijay Khemka @ 2019-08-09 18:38 UTC (permalink / raw)
To: Tao Ren, Andrew Lunn
Cc: Jakub Kicinski, netdev@vger.kernel.org, openbmc@lists.ozlabs.org,
linux-kernel@vger.kernel.org, Samuel Mendoza-Jonas,
David S . Miller, William Kennington
On 8/8/19, 3:27 PM, "openbmc on behalf of Tao Ren" <openbmc-bounces+vijaykhemka=fb.com@lists.ozlabs.org on behalf of taoren@fb.com> wrote:
On 8/8/19 2:16 PM, Andrew Lunn wrote:
> On Thu, Aug 08, 2019 at 07:02:54PM +0000, Tao Ren wrote:
>> Hi Andrew,
>>
>> On 8/8/19 6:32 AM, Andrew Lunn wrote:
>>>> Let me prepare patch v2 using device tree. I'm not sure if standard
>>>> "mac-address" fits this situation because all we need is an offset
>>>> (integer) and BMC MAC is calculated by adding the offset to NIC's
>>>> MAC address. Anyways, let me work out v2 patch we can discuss more
>>>> then.
>>>
>>> Hi Tao
>>>
>>> I don't know BMC terminology. By NICs MAC address, you are referring
>>> to the hosts MAC address? The MAC address the big CPU is using for its
>>> interface? Where does this NIC get its MAC address from? If the BMCs
>>> bootloader has access to it, it can set the mac-address property in
>>> the device tree.
>>
>> Sorry for the confusion and let me clarify more:
>>
>
>> The NIC here refers to the Network controller which provide network
>> connectivity for both BMC (via NC-SI) and Host (for example, via
>> PCIe).
>>
>
>> On Facebook Yamp BMC, BMC sends NCSI_OEM_GET_MAC command (as an
>> ethernet packet) to the Network Controller while bringing up eth0,
>> and the (Broadcom) Network Controller replies with the Base MAC
>> Address reserved for the platform. As for Yamp, Base-MAC and
>> Base-MAC+1 are used by Host (big CPU) and Base-MAC+2 are assigned to
>> BMC. In my opinion, Base MAC and MAC address assignments are
>> controlled by Network Controller, which is transparent to both BMC
>> and Host.
>
> Hi Tao
>
> I've not done any work in the BMC field, so thanks for explaining
> this.
>
> In a typical embedded system, each network interface is assigned a MAC
> address by the vendor. But here, things are different. The BMC SoC
> network interface has not been assigned a MAC address, it needs to ask
> the network controller for its MAC address, and then do some magical
> transformation on the answer to derive a MAC address for
> itself. Correct?
Yes. It's correct.
> It seems like a better design would of been, the BMC sends a
> NCSI_OEM_GET_BMC_MAC and the answer it gets back is the MAC address
> the BMC should use. No magic involved. But i guess it is too late to
> do that now.
Some NCSI Network Controllers support such OEM command (Get Provisioned BMC MAC Address), but unfortunately it's not supported on Yamp.
>> I'm not sure if I understand your suggestion correctly: do you mean
>> we should move the logic (GET_MAC from Network Controller, adding
>> offset and configuring BMC MAC) from kernel to boot loader?
>
> In general, the kernel is generic. It probably boots on any ARM system
> which is has the needed modules for. The bootloader is often much more
> specific. It might not be fully platform specific, but it will be at
> least specific to the general family of BMC SoCs. If you consider the
> combination of the BMC bootloader and the device tree blob, you have
> something specific to the platform. This magical transformation of
> adding 2 seems to be very platform specific. So having this magic in
> the bootloader+DT seems like the best place to put it.
I understand your concern now. Thank you for the explanation.
> However, how you pass the resulting MAC address to the kernel should
> be as generic as possible. The DT "mac-address" property is very
> generic, many MAC drivers understand it. Using it also allows for
> vendors which actually assign a MAC address to the BMC to pass it to
> the BMC, avoiding all this NCSI_OEM_GET_MAC handshake. Having an API
> which just passing '2' is not generic at all.
After giving it more thought, I'm thinking about adding ncsi dt node with following structure (mac/ncsi similar to mac/mdio/phy):
&mac0 {
/* MAC properties... */
use-ncsi;
ncsi {
/* ncsi level properties if any */
Tao, I like this idea but keep this only hardware specific.
package@0 {
/* package level properties if any */
channel@0 {
/* channel level properties if any */
bmc-mac-offset = <2>;
Every NIC vendor doesn't need this offset so it is specific to BCM card only as you see this
increment has been done only for BCM card so you may want to pass bcm specific only.
};
channel@1 {
/* channel #1 properties */
};
};
/* package #1 properties start here.. */
};
};
The reasons behind this are:
1) mac driver doesn't need to parse "mac-offset" stuff: these ncsi-network-controller specific settings should be parsed in ncsi stack.
2) get_bmc_mac_address command is a channel specific command, and technically people can configure different offset/formula for different channels.
Any concerns or suggestions?
Thanks,
Tao
^ permalink raw reply
* [PATCH net-next v2 0/4] net: phy: realtek: add support for integrated 2.5Gbps PHY in RTL8125
From: Heiner Kallweit @ 2019-08-09 18:41 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
This series adds support for the integrated 2.5Gbps PHY in RTL8125.
First three patches add necessary functionality to phylib.
Changes in v2:
- added patch 1
- changed patch 4 to use a fake PHY ID that is injected by the
network driver. This allows to use a dedicated PHY driver.
Heiner Kallweit (4):
net: phy: simplify genphy_config_advert by using the
linkmode_adv_to_xxx_t functions
net: phy: prepare phylib to deal with PHY's extending Clause 22
net: phy: add phy_modify_paged_changed
net: phy: realtek: add support for the 2.5Gbps PHY in RTL8125
drivers/net/phy/phy-core.c | 29 ++++++++++++++---
drivers/net/phy/phy_device.c | 49 +++++++++++-----------------
drivers/net/phy/realtek.c | 62 ++++++++++++++++++++++++++++++++++++
include/linux/phy.h | 10 +++++-
4 files changed, 113 insertions(+), 37 deletions(-)
--
2.22.0
^ permalink raw reply
* RE: Clause 73 and USXGMII
From: Jose Abreu @ 2019-08-09 18:42 UTC (permalink / raw)
To: Russell King - ARM Linux admin, Jose Abreu
Cc: netdev@vger.kernel.org, Andrew Lunn, Florian Fainelli,
Heiner Kallweit
In-Reply-To: <20190808120903.GF5193@shell.armlinux.org.uk>
From: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Date: Aug/08/2019, 13:09:03 (UTC+00:00)
> On Thu, Aug 08, 2019 at 11:45:29AM +0000, Jose Abreu wrote:
> > From: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> > Date: Aug/08/2019, 10:23:13 (UTC+00:00)
> >
> > > On Thu, Aug 08, 2019 at 09:02:57AM +0000, Jose Abreu wrote:
> > > > From: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> > > > Date: Aug/08/2019, 09:26:26 (UTC+00:00)
> > > >
> > > > > Hi,
> > > > >
> > > > > Have you tried enabling debug mode in phylink (add #define DEBUG at the
> > > > > top of the file) ?
> > > >
> > > > Yes:
> > > >
> > > > [ With > 2.5G modes removed ]
> > > > # dmesg | grep -i phy
> > > > libphy: stmmac: probed
> > > > stmmaceth 0000:04:00.0 enp4s0: PHY [stmmac-1:00] driver [Synopsys 10G]
> > > > stmmaceth 0000:04:00.0 enp4s0: phy: setting supported
> > > > 00,00000000,0002e040 advertising 00,00000000,0002e040
> > > > stmmaceth 0000:04:00.0 enp4s0: configuring for phy/usxgmii link mode
> > > > stmmaceth 0000:04:00.0 enp4s0: phylink_mac_config:
> > > > mode=phy/usxgmii/Unknown/Unknown adv=00,00000000,0002e040 pause=10
> > > > link=0 an=1
> > > > stmmaceth 0000:04:00.0 enp4s0: phy link down usxgmii/Unknown/Unknown
> > >
> > > This shows that the PHY isn't reporting that the link came up. Did
> > > the PHY negotiate link? If so, why isn't it reporting that the link
> > > came up? Maybe something is mis-programming the capability bits in
> > > the PHY? Maybe disabling the 10G speeds disables everything faster
> > > than 1G?
> >
> > Autoneg was started but never finishes and disabling 10G modes is
> > causing autoneg to fail.
> >
> > >
> > > > [ Without any limit ]
> > > > # dmesg | grep -i phy
> > > > libphy: stmmac: probed
> > > > stmmaceth 0000:04:00.0 enp4s0: PHY [stmmac-1:00] driver [Synopsys 10G]
> > > > stmmaceth 0000:04:00.0 enp4s0: phy: setting supported
> > > > 00,00000000,000ee040 advertising 00,00000000,000ee040
> > > > stmmaceth 0000:04:00.0 enp4s0: configuring for phy/usxgmii link mode
> > > > stmmaceth 0000:04:00.0 enp4s0: phylink_mac_config:
> > > > mode=phy/usxgmii/Unknown/Unknown adv=00,00000000,000ee040 pause=10
> > > > link=0 an=1
> > > > stmmaceth 0000:04:00.0 enp4s0: phy link down usxgmii/Unknown/Unknown
> > > > stmmaceth 0000:04:00.0 enp4s0: phy link up usxgmii/2.5Gbps/Full
> > > > stmmaceth 0000:04:00.0 enp4s0: phylink_mac_config:
> > > > mode=phy/usxgmii/2.5Gbps/Full adv=00,00000000,00000000 pause=0f link=1
> > > > an=0
> > > >
> > > > I'm thinking on whether this can be related with USXGMII. As link is
> > > > operating in 10G but I configure USXGMII for 2.5G maybe autoneg outcome
> > > > should always be 10G ?
> > >
> > > As I understand USXGMII (which isn't very well, because the spec isn't
> > > available) I believe that it operates in a similar way to SGMII where
> > > data is replicated the appropriate number of times to achieve the link
> > > speed. So, the USXGMII link always operates at a bit rate equivalent
> > > to 10G, but data is replicated twice for 5G, four times for 2.5G, ten
> > > times for 1G, etc.
> > >
> > > I notice that you don't say that you support any copper speeds, which
> > > brings up the question about what the PHY's media is...
> >
> > I just added the speeds that XPCS supports within Clause 73
> > specification:
> > Technology Ability field. Indicates the supported technologies:
> > A0: When this bit is set to 1, the 1000BASE-KX technology is supported
> > A1: When this bit is set to 1, the 10GBASE-KX4 technology is supported
> > A2: When this bit is set to 1, the 10GBASE-KR technology is supported
> > A11: When this bit is set to 1, the 2.5GBASE-KX technology is supported
> > A12: When this bit is set to 1, the 5GBASE-KR technology is supported
> >
> > And, within USXGMII, XPCS supports the following:
> > Single Port: 10G-SXGMII, 5G-SXGMII, 2.5G-SXGMII
> > Dual Port: 10G-DXGMII, 5G-DXGMII
> > Quad Port: 10G-XGMII
> >
> > My HW is currently fixed for USXGMII at 2.5G.
>
> So what do you actually have?
>
> +-----+ +----------+
> | STM | USXGMII | Synopsis | ????????
> | MAC | <----------> | PHY | <----------> ????
> | | link | |
> +-----+ +----------+ (media side)
>
> Does the above refer to what the STM MAC and Synopsis PHY support for
> the USXGMII link? What about the media side?
This is my setup:
XGMAC <-> XPCS <-> Xilinx SERDES <-> SFP
I'm not sure about the media side. I use a passive SFP cable if that
helps ...
>
> If you just tell phylink what the USXGMII part is capable of, there's
> no way for the media side to do anything unless it also supports the
> capabilities that the USXGMII link supports.
>
> phylink doesn't do any kind of translation of capabilities of the
> MAC-PHY link to media capabilities; this is why the documentation for
> the validate callback has this note:
>
> * Note that the PHY may be able to transform from one connection
> * technology to another, so, eg, don't clear 1000BaseX just
> * because the MAC is unable to BaseX mode. This is more about
> * clearing unsupported speeds and duplex settings.
>
> To put it another way - if the link between the MAC and PHY supports
> 10G speed, the MAC should indicate that _all_ 10G ethtool link modes
> that support 10G speed are set in the supported mask. If the link
> supports 1G speeds, then all 1G ethtool link modes that can be
> supported irrespective of technology should be set. This is because
> the capabilities of the overall setup is the logical union of the
> capabilities of each device in the setup.
>
> So, if, say, the USXGMII link can support 2.5Gbps, and the PHY
> supports copper media at 2.5Gbps via the NBASE-T specifications,
> then the system as a whole can support
> ETHTOOL_LINK_MODE_2500baseT_Full_BIT. If the MAC decides to clear
> that bit, then the system can't support 2.5Gbps even if the PHY
> does.
>
> Maybe what we need to do with phylink is move away from exposing
> ethtool link modes to the MAC validate callback, and instead
> devise a way for the MAC to indicate merely the speeds and duplexes
> it supports without any of the technology stuff getting in the way.
>
> --
> RMK's Patch system: https://urldefense.proofpoint.com/v2/url?u=https-3A__www.armlinux.org.uk_developer_patches_&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=WXDOl4mhp4Xwg4de0KTKH9RNulfBLbkA6MQGZ1G9_FI&s=DbL6asfMKlMoCCeLhWoeBLrUQ0FSIrLkJCKoVbKVEQg&e=
> FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
> According to speedtest.net: 11.9Mbps down 500kbps up
---
Thanks,
Jose Miguel Abreu
^ permalink raw reply
* Re: [PATCH v2 2/2] tcp: Update TCP_BASE_MSS comment
From: David Miller @ 2019-08-09 18:43 UTC (permalink / raw)
To: johunt; +Cc: netdev, eric.dumazet, edumazet, ncardwell
In-Reply-To: <3353da97-e015-7303-6e10-7e3e99a50a8f@akamai.com>
From: Josh Hunt <johunt@akamai.com>
Date: Fri, 9 Aug 2019 11:38:05 -0700
> I forgot to tag these at net-next. Do I need to resubmit a v3 with
> net-next in the subject?
No need.
^ permalink raw reply
* Re: [PATCH v2 2/2] tcp: Update TCP_BASE_MSS comment
From: Josh Hunt @ 2019-08-09 18:43 UTC (permalink / raw)
To: David Miller; +Cc: netdev, eric.dumazet, edumazet, ncardwell
In-Reply-To: <20190809.114314.1715215925183719227.davem@davemloft.net>
On 8/9/19 11:43 AM, David Miller wrote:
> From: Josh Hunt <johunt@akamai.com>
> Date: Fri, 9 Aug 2019 11:38:05 -0700
>
>> I forgot to tag these at net-next. Do I need to resubmit a v3 with
>> net-next in the subject?
>
> No need.
>
Thanks
^ permalink raw reply
* [PATCH net-next v2 1/4] net: phy: simplify genphy_config_advert by using the linkmode_adv_to_xxx_t functions
From: Heiner Kallweit @ 2019-08-09 18:43 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <755b2bc9-22cb-f529-4188-0f4b6e48efbd@gmail.com>
Using linkmode_adv_to_mii_adv_t and linkmode_adv_to_mii_ctrl1000_t
allows to simplify the code. In addition avoiding the conversion to
the legacy u32 advertisement format allows to remove the warning.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Suggested-by: Andrew Lunn <andrew@lunn.ch>
---
Changes in v2:
- added to series
---
drivers/net/phy/phy_device.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 7ddd91df9..a70a98dc9 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1564,24 +1564,20 @@ EXPORT_SYMBOL(phy_reset_after_clk_enable);
*/
static int genphy_config_advert(struct phy_device *phydev)
{
- u32 advertise;
- int bmsr, adv;
- int err, changed = 0;
+ int err, bmsr, changed = 0;
+ u32 adv;
/* Only allow advertising what this PHY supports */
linkmode_and(phydev->advertising, phydev->advertising,
phydev->supported);
- if (!ethtool_convert_link_mode_to_legacy_u32(&advertise,
- phydev->advertising))
- phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n",
- __ETHTOOL_LINK_MODE_MASK_NBITS,
- phydev->advertising);
+
+ adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
/* Setup standard advertisement */
err = phy_modify_changed(phydev, MII_ADVERTISE,
ADVERTISE_ALL | ADVERTISE_100BASE4 |
ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
- ethtool_adv_to_mii_adv_t(advertise));
+ adv);
if (err < 0)
return err;
if (err > 0)
@@ -1598,13 +1594,7 @@ static int genphy_config_advert(struct phy_device *phydev)
if (!(bmsr & BMSR_ESTATEN))
return changed;
- /* Configure gigabit if it's supported */
- adv = 0;
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- phydev->supported) ||
- linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- phydev->supported))
- adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
+ adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
err = phy_modify_changed(phydev, MII_CTRL1000,
ADVERTISE_1000FULL | ADVERTISE_1000HALF,
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 3/4] net: phy: add phy_modify_paged_changed
From: Heiner Kallweit @ 2019-08-09 18:44 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <755b2bc9-22cb-f529-4188-0f4b6e48efbd@gmail.com>
Add helper function phy_modify_paged_changed, behavios is the same
as for phy_modify_changed.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/phy/phy-core.c | 29 ++++++++++++++++++++++++-----
include/linux/phy.h | 2 ++
2 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 16667fbac..9ae3abb2d 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -783,24 +783,43 @@ int phy_write_paged(struct phy_device *phydev, int page, u32 regnum, u16 val)
EXPORT_SYMBOL(phy_write_paged);
/**
- * phy_modify_paged() - Convenience function for modifying a paged register
+ * phy_modify_paged_changed() - Function for modifying a paged register
* @phydev: a pointer to a &struct phy_device
* @page: the page for the phy
* @regnum: register number
* @mask: bit mask of bits to clear
* @set: bit mask of bits to set
*
- * Same rules as for phy_read() and phy_write().
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
*/
-int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
- u16 mask, u16 set)
+int phy_modify_paged_changed(struct phy_device *phydev, int page, u32 regnum,
+ u16 mask, u16 set)
{
int ret = 0, oldpage;
oldpage = phy_select_page(phydev, page);
if (oldpage >= 0)
- ret = __phy_modify(phydev, regnum, mask, set);
+ ret = __phy_modify_changed(phydev, regnum, mask, set);
return phy_restore_page(phydev, oldpage, ret);
}
+EXPORT_SYMBOL(phy_modify_paged_changed);
+
+/**
+ * phy_modify_paged() - Convenience function for modifying a paged register
+ * @phydev: a pointer to a &struct phy_device
+ * @page: the page for the phy
+ * @regnum: register number
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * Same rules as for phy_read() and phy_write().
+ */
+int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
+ u16 mask, u16 set)
+{
+ int ret = phy_modify_paged_changed(phydev, page, regnum, mask, set);
+
+ return ret < 0 ? ret : 0;
+}
EXPORT_SYMBOL(phy_modify_paged);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7117825ee..781f4810c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -984,6 +984,8 @@ int phy_select_page(struct phy_device *phydev, int page);
int phy_restore_page(struct phy_device *phydev, int oldpage, int ret);
int phy_read_paged(struct phy_device *phydev, int page, u32 regnum);
int phy_write_paged(struct phy_device *phydev, int page, u32 regnum, u16 val);
+int phy_modify_paged_changed(struct phy_device *phydev, int page, u32 regnum,
+ u16 mask, u16 set);
int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
u16 mask, u16 set);
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 2/4] net: phy: prepare phylib to deal with PHY's extending Clause 22
From: Heiner Kallweit @ 2019-08-09 18:43 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <755b2bc9-22cb-f529-4188-0f4b6e48efbd@gmail.com>
The integrated PHY in 2.5Gbps chip RTL8125 is the first (known to me)
PHY that uses standard Clause 22 for all modes up to 1Gbps and adds
2.5Gbps control using vendor-specific registers. To use phylib for
the standard part little extensions are needed:
- Move most of genphy_config_aneg to a new function
__genphy_config_aneg that takes a parameter whether restarting
auto-negotiation is needed (depending on whether content of
vendor-specific advertisement register changed).
- Don't clear phydev->lp_advertising in genphy_read_status so that
we can set non-C22 mode flags before.
Basically both changes mimic the behavior of the equivalent Clause 45
functions.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/phy/phy_device.c | 27 ++++++++++++---------------
include/linux/phy.h | 8 +++++++-
2 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index a70a98dc9..b039632de 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1671,18 +1671,20 @@ int genphy_restart_aneg(struct phy_device *phydev)
EXPORT_SYMBOL(genphy_restart_aneg);
/**
- * genphy_config_aneg - restart auto-negotiation or write BMCR
+ * __genphy_config_aneg - restart auto-negotiation or write BMCR
* @phydev: target phy_device struct
+ * @changed: whether autoneg is requested
*
* Description: If auto-negotiation is enabled, we configure the
* advertising, and then restart auto-negotiation. If it is not
* enabled, then we write the BMCR.
*/
-int genphy_config_aneg(struct phy_device *phydev)
+int __genphy_config_aneg(struct phy_device *phydev, bool changed)
{
- int err, changed;
+ int err;
- changed = genphy_config_eee_advert(phydev);
+ if (genphy_config_eee_advert(phydev))
+ changed = true;
if (AUTONEG_ENABLE != phydev->autoneg)
return genphy_setup_forced(phydev);
@@ -1690,10 +1692,10 @@ int genphy_config_aneg(struct phy_device *phydev)
err = genphy_config_advert(phydev);
if (err < 0) /* error */
return err;
+ else if (err)
+ changed = true;
- changed |= err;
-
- if (changed == 0) {
+ if (!changed) {
/* Advertisement hasn't changed, but maybe aneg was never on to
* begin with? Or maybe phy was isolated?
*/
@@ -1703,18 +1705,15 @@ int genphy_config_aneg(struct phy_device *phydev)
return ctl;
if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
- changed = 1; /* do restart aneg */
+ changed = true; /* do restart aneg */
}
/* Only restart aneg if we are advertising something different
* than we were before.
*/
- if (changed > 0)
- return genphy_restart_aneg(phydev);
-
- return 0;
+ return changed ? genphy_restart_aneg(phydev) : 0;
}
-EXPORT_SYMBOL(genphy_config_aneg);
+EXPORT_SYMBOL(__genphy_config_aneg);
/**
* genphy_aneg_done - return auto-negotiation status
@@ -1801,8 +1800,6 @@ int genphy_read_status(struct phy_device *phydev)
phydev->pause = 0;
phydev->asym_pause = 0;
- linkmode_zero(phydev->lp_advertising);
-
if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
if (phydev->is_gigabit_capable) {
lpagb = phy_read(phydev, MII_STAT1000);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 462b90b73..7117825ee 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1069,7 +1069,7 @@ int genphy_read_abilities(struct phy_device *phydev);
int genphy_setup_forced(struct phy_device *phydev);
int genphy_restart_aneg(struct phy_device *phydev);
int genphy_config_eee_advert(struct phy_device *phydev);
-int genphy_config_aneg(struct phy_device *phydev);
+int __genphy_config_aneg(struct phy_device *phydev, bool changed);
int genphy_aneg_done(struct phy_device *phydev);
int genphy_update_link(struct phy_device *phydev);
int genphy_read_status(struct phy_device *phydev);
@@ -1077,6 +1077,12 @@ int genphy_suspend(struct phy_device *phydev);
int genphy_resume(struct phy_device *phydev);
int genphy_loopback(struct phy_device *phydev, bool enable);
int genphy_soft_reset(struct phy_device *phydev);
+
+static inline int genphy_config_aneg(struct phy_device *phydev)
+{
+ return __genphy_config_aneg(phydev, false);
+}
+
static inline int genphy_no_soft_reset(struct phy_device *phydev)
{
return 0;
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 4/4] net: phy: realtek: add support for the 2.5Gbps PHY in RTL8125
From: Heiner Kallweit @ 2019-08-09 18:45 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <755b2bc9-22cb-f529-4188-0f4b6e48efbd@gmail.com>
This adds support for the integrated 2.5Gbps PHY in Realtek RTL8125.
Advertisement of 2.5Gbps mode is done via a vendor-specific register.
Same applies to reading NBase-T link partner advertisement.
Unfortunately this 2.5Gbps PHY shares the PHY ID with the integrated
1Gbps PHY's in other Realtek network chips and so far no method is
known to differentiate them. As a workaround use a dedicated fake PHY ID
that is set by the network driver by intercepting the MDIO PHY ID read.
v2:
- Create dedicated PHY driver and use a fake PHY ID that is injected by
the network driver. Suggested by Andrew Lunn.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/phy/realtek.c | 62 +++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index a669945eb..5b466e80d 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -39,6 +39,11 @@
#define RTL8366RB_POWER_SAVE 0x15
#define RTL8366RB_POWER_SAVE_ON BIT(12)
+#define RTL_ADV_2500FULL BIT(7)
+#define RTL_LPADV_10000FULL BIT(11)
+#define RTL_LPADV_5000FULL BIT(6)
+#define RTL_LPADV_2500FULL BIT(5)
+
MODULE_DESCRIPTION("Realtek PHY driver");
MODULE_AUTHOR("Johnson Leung");
MODULE_LICENSE("GPL");
@@ -256,6 +261,53 @@ static int rtl8366rb_config_init(struct phy_device *phydev)
return ret;
}
+static int rtl8125_get_features(struct phy_device *phydev)
+{
+ linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ phydev->supported);
+
+ return genphy_read_abilities(phydev);
+}
+
+static int rtl8125_config_aneg(struct phy_device *phydev)
+{
+ int ret = 0;
+
+ if (phydev->autoneg == AUTONEG_ENABLE) {
+ u16 adv2500 = 0;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ phydev->advertising))
+ adv2500 = RTL_ADV_2500FULL;
+
+ ret = phy_modify_paged_changed(phydev, 0xa5d, 0x12,
+ RTL_ADV_2500FULL, adv2500);
+ if (ret < 0)
+ return ret;
+ }
+
+ return __genphy_config_aneg(phydev, ret);
+}
+
+static int rtl8125_read_status(struct phy_device *phydev)
+{
+ if (phydev->autoneg == AUTONEG_ENABLE) {
+ int lpadv = phy_read_paged(phydev, 0xa5d, 0x13);
+
+ if (lpadv < 0)
+ return lpadv;
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ phydev->lp_advertising, lpadv & RTL_LPADV_10000FULL);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ phydev->lp_advertising, lpadv & RTL_LPADV_5000FULL);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ phydev->lp_advertising, lpadv & RTL_LPADV_2500FULL);
+ }
+
+ return genphy_read_status(phydev);
+}
+
static struct phy_driver realtek_drvs[] = {
{
PHY_ID_MATCH_EXACT(0x00008201),
@@ -332,6 +384,16 @@ static struct phy_driver realtek_drvs[] = {
.resume = genphy_resume,
.read_page = rtl821x_read_page,
.write_page = rtl821x_write_page,
+ }, {
+ PHY_ID_MATCH_EXACT(0x001cca50),
+ .name = "RTL8125 2.5Gbps internal",
+ .get_features = rtl8125_get_features,
+ .config_aneg = rtl8125_config_aneg,
+ .read_status = rtl8125_read_status,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .read_page = rtl821x_read_page,
+ .write_page = rtl821x_write_page,
}, {
PHY_ID_MATCH_EXACT(0x001cc961),
.name = "RTL8366RB Gigabit Ethernet",
--
2.22.0
^ permalink raw reply related
* Re: [PATCH bpf-next v5 0/6] xdp: Add devmap_hash map type
From: Toke Høiland-Jørgensen @ 2019-08-09 18:45 UTC (permalink / raw)
To: Jesper Dangaard Brouer, Y Song
Cc: Alexei Starovoitov, Daniel Borkmann, Alexei Starovoitov,
Network Development, David Miller, Jakub Kicinski,
Björn Töpel, Yonghong Song, brouer
In-Reply-To: <20190808220516.1adeca9a@carbon>
Jesper Dangaard Brouer <brouer@redhat.com> writes:
> On Thu, 8 Aug 2019 12:57:05 -0700
> Y Song <ys114321@gmail.com> wrote:
>
>> On Thu, Aug 8, 2019 at 12:43 PM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>> >
>> > Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
>> >
>> > > On Fri, Jul 26, 2019 at 9:06 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>> > >>
>> > >> This series adds a new map type, devmap_hash, that works like the existing
>> > >> devmap type, but using a hash-based indexing scheme. This is useful for the use
>> > >> case where a devmap is indexed by ifindex (for instance for use with the routing
>> > >> table lookup helper). For this use case, the regular devmap needs to be sized
>> > >> after the maximum ifindex number, not the number of devices in it. A hash-based
>> > >> indexing scheme makes it possible to size the map after the number of devices it
>> > >> should contain instead.
>> > >>
>> > >> This was previously part of my patch series that also turned the regular
>> > >> bpf_redirect() helper into a map-based one; for this series I just pulled out
>> > >> the patches that introduced the new map type.
>> > >>
>> > >> Changelog:
>> > >>
>> > >> v5:
>> > >>
>> > >> - Dynamically set the number of hash buckets by rounding up max_entries to the
>> > >> nearest power of two (mirroring the regular hashmap), as suggested by Jesper.
>> > >
>> > > fyi I'm waiting for Jesper to review this new version.
>> >
>> > Ping Jesper? :)
>>
>> Toke, the patch set has been merged to net-next.
>> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=d3406913561c322323ec2898cc58f55e79786be7
>>
>
> Yes, and I did review this... :-)
Oops, my bad; seems I accidentally muted this thread and didn't see
Jesper's review and Alexei's message about merging it. Sorry about
that...
-Toke
^ permalink raw reply
* Re: [PATCH 3/3] tipc: fix issue of calling smp_processor_id() in preemptible
From: Jakub Kicinski @ 2019-08-09 18:49 UTC (permalink / raw)
To: Ying Xue; +Cc: davem, netdev, jon.maloy, hdanton, tipc-discussion,
syzkaller-bugs
In-Reply-To: <1565335017-21302-4-git-send-email-ying.xue@windriver.com>
On Fri, 9 Aug 2019 15:16:57 +0800, Ying Xue wrote:
> Fixes: e9c1a793210f ("tipc: add dst_cache support for udp media")
> syzbot+1a68504d96cd17b33a05@syzkaller.appspotmail.com
^
Reported-by: missing here?
> Signed-off-by: Hillf Danton <hdanton@sina.com>
> Signed-off-by: Ying Xue <ying.xue@windriver.com>
^ permalink raw reply
* Re: [net 01/12] net/mlx5e: Use flow keys dissector to parse packets for ARFS
From: Saeed Mahameed @ 2019-08-09 18:49 UTC (permalink / raw)
To: jakub.kicinski@netronome.com
Cc: davem@davemloft.net, netdev@vger.kernel.org, Tariq Toukan,
Maxim Mikityanskiy
In-Reply-To: <20190808181514.4cd68a37@cakuba.netronome.com>
On Thu, 2019-08-08 at 18:15 -0700, Jakub Kicinski wrote:
> On Thu, 8 Aug 2019 20:22:00 +0000, Saeed Mahameed wrote:
> > From: Maxim Mikityanskiy <maximmi@mellanox.com>
> >
> > The current ARFS code relies on certain fields to be set in the SKB
> > (e.g. transport_header) and extracts IP addresses and ports by
> > custom
> > code that parses the packet. The necessary SKB fields, however, are
> > not
> > always set at that point, which leads to an out-of-bounds access.
> > Use
> > skb_flow_dissect_flow_keys() to get the necessary information
> > reliably,
> > fix the out-of-bounds access and reuse the code.
>
> The whole series LGTM, FWIW.
>
> I'd be curious to hear which path does not have the skb fully
> set up, could you elaborate? (I'm certainly no aRFC expert this
> is pure curiosity).
In our regression we found two use cases that might lead aRFS using un-
initialized values.
1) GRO Disabled, Usually GRO fills the necessary fields.
2) Raw socket type of tests.
And i am sure there are many other use cases. So drivers must use
skb_flow_dissect_flow_keys() for aRFS parsing and eliminate all
uncertainties.
^ permalink raw reply
* Re: [PATCH net-next] gso: enable udp gso for virtual devices
From: Josh Hunt @ 2019-08-09 18:58 UTC (permalink / raw)
To: Willem de Bruijn, Jason Baron
Cc: Alexander Duyck, David Miller, Netdev, Willem de Bruijn,
Paolo Abeni
In-Reply-To: <CAF=yD-+zYGzTTYC-oYr392qugWiYpbgykMh1p8UrrgZ2ciR=aw@mail.gmail.com>
On 6/26/19 4:41 PM, Willem de Bruijn wrote:
> On Wed, Jun 26, 2019 at 3:17 PM Jason Baron <jbaron@akamai.com> wrote:
>>
>>
>>
>> On 6/14/19 4:53 PM, Jason Baron wrote:
>>>
>>>
>>> On 6/13/19 5:20 PM, Willem de Bruijn wrote:
>>>>>>> @@ -237,6 +237,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
>>>>>>> NETIF_F_GSO_GRE_CSUM | \
>>>>>>> NETIF_F_GSO_IPXIP4 | \
>>>>>>> NETIF_F_GSO_IPXIP6 | \
>>>>>>> + NETIF_F_GSO_UDP_L4 | \
>>>>>>> NETIF_F_GSO_UDP_TUNNEL | \
>>>>>>> NETIF_F_GSO_UDP_TUNNEL_CSUM)
>>>>>>
>>>>>> Are you adding this to NETIF_F_GSO_ENCAP_ALL? Wouldn't it make more
>>>>>> sense to add it to NETIF_F_GSO_SOFTWARE?
>>>>>>
>>>>>
>>>>> Yes, I'm adding to NETIF_F_GSO_ENCAP_ALL (not very clear from the
>>>>> context). I will fix the commit log.
>>>>>
>>>>> In: 83aa025 udp: add gso support to virtual devices, the support was
>>>>> also added to NETIF_F_GSO_ENCAP_ALL (although subsequently reverted due
>>>>> to UDP GRO not being in place), so I wonder what the reason was for that?
>>>>
>>>> That was probably just a bad choice on my part.
>>>>
>>>> It worked in practice, but if NETIF_F_GSO_SOFTWARE works the same
>>>> without unexpected side effects, then I agree that it is the better choice.
>>>>
>>>> That choice does appear to change behavior when sending over tunnel
>>>> devices. Might it send tunneled GSO packets over loopback?
>>>>
>>>>
>>>
>>> I set up a test case using fou tunneling through a bridge device using
>>> the udpgso_bench_tx test where packets are not received correctly if
>>> NETIF_F_GSO_UDP_L4 is added to NETIF_F_GSO_SOFTWARE. If I have it added
>>> to NETIF_F_GSO_ENCAP_ALL, it does work correctly. So there are more
>>> fixes required to include it in NETIF_F_GSO_SOFTWARE.
>>>
>>> The use-case I have only requires it to be in NETIF_F_GSO_ENCAP_ALL, but
>>> if it needs to go in NETIF_F_GSO_SOFTWARE, I can look at what's required
>>> more next week.
>>>
>>
>> Hi,
>>
>> I haven't had a chance to investigate what goes wrong with including
>> NETIF_F_GSO_UDP_L4 in NETIF_F_GSO_SOFTWARE - but I was just wondering if
>> people are ok with NETIF_F_GSO_UDP_L4 being added to
>> NETIF_F_GSO_ENCAP_ALL and not NETIF_F_GSO_SOFTWARE (ie the original
>> patch as posted)?
>>
>> As I mentioned that is sufficient for my use-case, and its how Willem
>> originally proposed this.
>
> Indeed, based on the previous discussion this sounds fine to me.
>
Willem
Are you OK to ACK this? If not, is there something else you'd rather see
here?
Thanks
Josh
^ permalink raw reply
* Re: [net 01/12] net/mlx5e: Use flow keys dissector to parse packets for ARFS
From: Jakub Kicinski @ 2019-08-09 19:01 UTC (permalink / raw)
To: Saeed Mahameed
Cc: davem@davemloft.net, netdev@vger.kernel.org, Tariq Toukan,
Maxim Mikityanskiy
In-Reply-To: <02ebed305b6bb50f272c7f3decfa204dc72311f0.camel@mellanox.com>
On Fri, 9 Aug 2019 18:49:50 +0000, Saeed Mahameed wrote:
> On Thu, 2019-08-08 at 18:15 -0700, Jakub Kicinski wrote:
> > On Thu, 8 Aug 2019 20:22:00 +0000, Saeed Mahameed wrote:
> > > From: Maxim Mikityanskiy <maximmi@mellanox.com>
> > >
> > > The current ARFS code relies on certain fields to be set in the SKB
> > > (e.g. transport_header) and extracts IP addresses and ports by
> > > custom
> > > code that parses the packet. The necessary SKB fields, however, are
> > > not
> > > always set at that point, which leads to an out-of-bounds access.
> > > Use
> > > skb_flow_dissect_flow_keys() to get the necessary information
> > > reliably,
> > > fix the out-of-bounds access and reuse the code.
> >
> > The whole series LGTM, FWIW.
> >
> > I'd be curious to hear which path does not have the skb fully
> > set up, could you elaborate? (I'm certainly no aRFC expert this
> > is pure curiosity).
>
> In our regression we found two use cases that might lead aRFS using un-
> initialized values.
> 1) GRO Disabled, Usually GRO fills the necessary fields.
> 2) Raw socket type of tests.
>
> And i am sure there are many other use cases. So drivers must use
> skb_flow_dissect_flow_keys() for aRFS parsing and eliminate all
> uncertainties.
Looking at the code now it makes perfect sense. We could probably
refactor to call the dissector in the core at some point.
Thanks for the explanation!
^ permalink raw reply
* Re: [PATCH] dpaa2-ethsw: move the DPAA2 Ethernet Switch driver out of staging
From: Andrew Lunn @ 2019-08-09 19:04 UTC (permalink / raw)
To: Ioana Ciornei
Cc: davem, gregkh, linux-kernel, netdev, f.fainelli,
ruxandra.radulescu
In-Reply-To: <1565366213-20063-1-git-send-email-ioana.ciornei@nxp.com>
Hi Ioana
> +static int
> +ethsw_get_link_ksettings(struct net_device *netdev,
> + struct ethtool_link_ksettings *link_ksettings)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + struct dpsw_link_state state = {0};
> + int err = 0;
> +
> + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
> + port_priv->ethsw_data->dpsw_handle,
> + port_priv->idx,
> + &state);
> + if (err) {
> + netdev_err(netdev, "ERROR %d getting link state", err);
> + goto out;
> + }
> +
> + /* At the moment, we have no way of interrogating the DPMAC
> + * from the DPSW side or there may not exist a DPMAC at all.
What use is a switch port without a MAC?
> + * Report only autoneg state, duplexity and speed.
> + */
> + if (state.options & DPSW_LINK_OPT_AUTONEG)
> + link_ksettings->base.autoneg = AUTONEG_ENABLE;
> + if (!(state.options & DPSW_LINK_OPT_HALF_DUPLEX))
> + link_ksettings->base.duplex = DUPLEX_FULL;
> + link_ksettings->base.speed = state.rate;
> +
> +out:
> + return err;
> +}
> +
> +static int
> +ethsw_set_link_ksettings(struct net_device *netdev,
> + const struct ethtool_link_ksettings *link_ksettings)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + struct dpsw_link_cfg cfg = {0};
> + int err = 0;
> +
> + netdev_dbg(netdev, "Setting link parameters...");
> +
> + /* Due to a temporary MC limitation, the DPSW port must be down
> + * in order to be able to change link settings. Taking steps to let
> + * the user know that.
> + */
> + if (netif_running(netdev)) {
> + netdev_info(netdev, "Sorry, interface must be brought down first.\n");
> + return -EACCES;
> + }
This is quite common. The Marvell switches require the port is
disabled while reconfiguring the port. So we just disable it,
reconfigure it, and enable it again.
Why are you making the user do this?
> +
> + cfg.rate = link_ksettings->base.speed;
> + if (link_ksettings->base.autoneg == AUTONEG_ENABLE)
> + cfg.options |= DPSW_LINK_OPT_AUTONEG;
> + else
> + cfg.options &= ~DPSW_LINK_OPT_AUTONEG;
> + if (link_ksettings->base.duplex == DUPLEX_HALF)
> + cfg.options |= DPSW_LINK_OPT_HALF_DUPLEX;
> + else
> + cfg.options &= ~DPSW_LINK_OPT_HALF_DUPLEX;
> +
> + err = dpsw_if_set_link_cfg(port_priv->ethsw_data->mc_io, 0,
> + port_priv->ethsw_data->dpsw_handle,
> + port_priv->idx,
> + &cfg);
> + if (err)
> + /* ethtool will be loud enough if we return an error; no point
> + * in putting our own error message on the console by default
> + */
> + netdev_dbg(netdev, "ERROR %d setting link cfg", err);
Why even bother with a dbg message?
> +static void ethsw_ethtool_get_stats(struct net_device *netdev,
> + struct ethtool_stats *stats,
> + u64 *data)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + int i, err;
> +
> + memset(data, 0,
> + sizeof(u64) * ETHSW_NUM_COUNTERS);
Is this really needed? It seems like the core should be doing this?
> +static int ethsw_add_vlan(struct ethsw_core *ethsw, u16 vid)
> +{
> + int err;
> +
> + struct dpsw_vlan_cfg vcfg = {
> + .fdb_id = 0,
> + };
> +
> + if (ethsw->vlans[vid]) {
> + dev_err(ethsw->dev, "VLAN already configured\n");
> + return -EEXIST;
> + }
Can this happen? It seems like the core should be preventing this.
> +
> + err = dpsw_vlan_add(ethsw->mc_io, 0,
> + ethsw->dpsw_handle, vid, &vcfg);
> + if (err) {
> + dev_err(ethsw->dev, "dpsw_vlan_add err %d\n", err);
> + return err;
> + }
> + ethsw->vlans[vid] = ETHSW_VLAN_MEMBER;
> +
> + return 0;
> +}
> +
> +static int ethsw_port_set_pvid(struct ethsw_port_priv *port_priv, u16 pvid)
> +{
> + struct ethsw_core *ethsw = port_priv->ethsw_data;
> + struct net_device *netdev = port_priv->netdev;
> + struct dpsw_tci_cfg tci_cfg = { 0 };
> + bool is_oper;
> + int err, ret;
> +
> + err = dpsw_if_get_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
> + port_priv->idx, &tci_cfg);
> + if (err) {
> + netdev_err(netdev, "dpsw_if_get_tci err %d\n", err);
> + return err;
> + }
> +
> + tci_cfg.vlan_id = pvid;
> +
> + /* Interface needs to be down to change PVID */
> + is_oper = netif_oper_up(netdev);
> + if (is_oper) {
> + err = dpsw_if_disable(ethsw->mc_io, 0,
> + ethsw->dpsw_handle,
> + port_priv->idx);
> + if (err) {
> + netdev_err(netdev, "dpsw_if_disable err %d\n", err);
> + return err;
> + }
> + }
Is this not inconsistent with ethsw_set_link_ksettings()?
> +
> + err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
> + port_priv->idx, &tci_cfg);
> + if (err) {
> + netdev_err(netdev, "dpsw_if_set_tci err %d\n", err);
> + goto set_tci_error;
> + }
> +
> + /* Delete previous PVID info and mark the new one */
> + port_priv->vlans[port_priv->pvid] &= ~ETHSW_VLAN_PVID;
> + port_priv->vlans[pvid] |= ETHSW_VLAN_PVID;
> + port_priv->pvid = pvid;
> +
> +set_tci_error:
> + if (is_oper) {
> + ret = dpsw_if_enable(ethsw->mc_io, 0,
> + ethsw->dpsw_handle,
> + port_priv->idx);
> + if (ret) {
> + netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
> + return ret;
> + }
> + }
> +
> + return err;
> +}
> +
> +static int ethsw_set_learning(struct ethsw_core *ethsw, u8 flag)
> +{
Seems like a bool would be better than u8 for flag. An call it enable?
> + enum dpsw_fdb_learning_mode learn_mode;
> + int err;
> +
> + if (flag)
> + learn_mode = DPSW_FDB_LEARNING_MODE_HW;
> + else
> + learn_mode = DPSW_FDB_LEARNING_MODE_DIS;
> +
> + err = dpsw_fdb_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
> + learn_mode);
> + if (err) {
> + dev_err(ethsw->dev, "dpsw_fdb_set_learning_mode err %d\n", err);
> + return err;
> + }
> + ethsw->learning = !!flag;
> +
> + return 0;
> +}
> +
> +static int ethsw_port_set_flood(struct ethsw_port_priv *port_priv, u8 flag)
> +{
Another bool?
> +static int port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
> + struct net_device *dev, const unsigned char *addr,
> + u16 vid, u16 flags,
> + struct netlink_ext_ack *extack)
> +{
> + if (is_unicast_ether_addr(addr))
> + return ethsw_port_fdb_add_uc(netdev_priv(dev),
> + addr);
> + else
> + return ethsw_port_fdb_add_mc(netdev_priv(dev),
> + addr);
Do you need to do anything special with link local MAC addresses?
Often they are considered as UC addresses.
> +static int port_carrier_state_sync(struct net_device *netdev)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + struct dpsw_link_state state;
> + int err;
> +
> + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
> + port_priv->ethsw_data->dpsw_handle,
> + port_priv->idx, &state);
> + if (err) {
> + netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
> + return err;
> + }
> +
> + WARN_ONCE(state.up > 1, "Garbage read into link_state");
> +
> + if (state.up != port_priv->link_state) {
> + if (state.up)
> + netif_carrier_on(netdev);
> + else
> + netif_carrier_off(netdev);
> + port_priv->link_state = state.up;
> + }
> + return 0;
> +}
> +
> +static int port_open(struct net_device *netdev)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + int err;
> +
> + /* No need to allow Tx as control interface is disabled */
> + netif_tx_stop_all_queues(netdev);
> +
> + err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
> + port_priv->ethsw_data->dpsw_handle,
> + port_priv->idx);
> + if (err) {
> + netdev_err(netdev, "dpsw_if_enable err %d\n", err);
> + return err;
> + }
> +
> + /* sync carrier state */
> + err = port_carrier_state_sync(netdev);
> + if (err) {
> + netdev_err(netdev,`<
> + "port_carrier_state_sync err %d\n", err);
port_carrier_state_sync() already does a netdev_err(). There are a lot
of netdev_err() in this code. I wonder how many are really needed? And
how often you get a cascade of error message like this?
I think many of them can be downgraded to netdev_dbg(), or removed.
> + goto err_carrier_sync;
> + }
> +
> + return 0;
> +
> +err_carrier_sync:
> + dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
> + port_priv->ethsw_data->dpsw_handle,
> + port_priv->idx);
> + return err;
> +}
> +
> +static int port_stop(struct net_device *netdev)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + int err;
> +
> + err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
> + port_priv->ethsw_data->dpsw_handle,
> + port_priv->idx);
> + if (err) {
> + netdev_err(netdev, "dpsw_if_disable err %d\n", err);
> + return err;
> + }
> +
> + return 0;
> +}
> +
> +static netdev_tx_t port_dropframe(struct sk_buff *skb,
> + struct net_device *netdev)
> +{
> + /* we don't support I/O for now, drop the frame */
> + dev_kfree_skb_any(skb);
> +
Ah. Does this also mean it cannot receive?
That makes some of this code pointless and untested.
I'm not sure we would be willing to move this out of staging until it
can transmit and receive. The whole idea is that switch ports are just
linux interfaces. Some actions can be accelerated using hardware, and
what cannot be accelerated the network stack does. However, if you
cannot receive and transmit, you break that whole model. The network
stack is mostly pointless.
> +static void ethsw_links_state_update(struct ethsw_core *ethsw)
> +{
> + int i;
> +
> + for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
> + port_carrier_state_sync(ethsw->ports[i]->netdev);
> +}
> +
> +static irqreturn_t ethsw_irq0_handler_thread(int irq_num, void *arg)
> +{
> + struct device *dev = (struct device *)arg;
> + struct ethsw_core *ethsw = dev_get_drvdata(dev);
> +
> + /* Mask the events and the if_id reserved bits to be cleared on read */
> + u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
> + int err;
> +
> + err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
> + DPSW_IRQ_INDEX_IF, &status);
> + if (err) {
> + dev_err(dev, "Can't get irq status (err %d)", err);
> +
> + err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
> + DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
> + if (err)
> + dev_err(dev, "Can't clear irq status (err %d)", err);
> + goto out;
> + }
> +
> + if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
> + ethsw_links_state_update(ethsw);
So there are no per-port events? You have no idea which port went
up/down, you have to poll them all?
> +
> +out:
> + return IRQ_HANDLED;
> +}
> +
> +static int port_lookup_address(struct net_device *netdev, int is_uc,
> + const unsigned char *addr)
> +{
> + struct netdev_hw_addr_list *list = (is_uc) ? &netdev->uc : &netdev->mc;
> + struct netdev_hw_addr *ha;
> +
> + netif_addr_lock_bh(netdev);
> + list_for_each_entry(ha, &list->list, list) {
> + if (ether_addr_equal(ha->addr, addr)) {
> + netif_addr_unlock_bh(netdev);
> + return 1;
> + }
> + }
> + netif_addr_unlock_bh(netdev);
> + return 0;
I know i have shot myself in the foot a few times with this structure
of returning in the middle of a loop while holding a lock, forgetting
to unlock, and then later deadlocking. I always do something like:
ret = 0;
netif_addr_lock_bh(netdev);
list_for_each_entry(ha, &list->list, list) {
if (ether_addr_equal(ha->addr, addr)) {
ret = 1;
break;
}
}
netif_addr_unlock_bh(netdev);
return ret;
}
Also, this function should probably return a bool, not int.
> +}
> +
> +static int port_mdb_add(struct net_device *netdev,
> + const struct switchdev_obj_port_mdb *mdb,
> + struct switchdev_trans *trans)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + int err;
> +
> + if (switchdev_trans_ph_prepare(trans))
> + return 0;
> +
> + /* Check if address is already set on this port */
> + if (port_lookup_address(netdev, 0, mdb->addr))
> + return -EEXIST;
You are looking at core data structures to determine if the address is
already on the port. Is it possible for the core to ask you to add
this address, if the core has the information needed to determine
itself if the port already has the address.
This seems to be a general theme in this code. You don't trust the
core. If you have real examples of the core doing the wrong thing,
please point them out.
> +/* For the moment, only flood setting needs to be updated */
> +static int port_bridge_join(struct net_device *netdev,
> + struct net_device *upper_dev)
> +{
> + struct ethsw_port_priv *port_priv = netdev_priv(netdev);
> + struct ethsw_core *ethsw = port_priv->ethsw_data;
> + int i, err;
> +
> + for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
> + if (ethsw->ports[i]->bridge_dev &&
> + (ethsw->ports[i]->bridge_dev != upper_dev)) {
> + netdev_err(netdev,
> + "Another switch port is connected to %s\n",
> + ethsw->ports[i]->bridge_dev->name);
> + return -EINVAL;
> + }
Am i reading this correct? You only support a single bridge? The
error message is not very informative. Also, i think you should be
returning EOPNOTSUPP, indicating the offload is not possible. Linux
will then do it in software. If it could actually receive/transmit the
frames....
> +static int ethsw_open(struct ethsw_core *ethsw)
> +{
> + struct ethsw_port_priv *port_priv = NULL;
> + int i, err;
> +
> + err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
> + if (err) {
> + dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
> + return err;
> + }
> +
> + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
> + port_priv = ethsw->ports[i];
> + err = dev_open(port_priv->netdev, NULL);
> + if (err) {
> + netdev_err(port_priv->netdev, "dev_open err %d\n", err);
> + return err;
> + }
> + }
Why is this needed? When the user configures the interface up, won't
the core call dev_open()?
> +
> + return 0;
> +}
> +
> +static int ethsw_stop(struct ethsw_core *ethsw)
> +{
> + struct ethsw_port_priv *port_priv = NULL;
> + int i, err;
> +
> + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
> + port_priv = ethsw->ports[i];
> + dev_close(port_priv->netdev);
> + }
> +
> + err = dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
> + if (err) {
> + dev_err(ethsw->dev, "dpsw_disable err %d\n", err);
> + return err;
> + }
> +
> + return 0;
> +}
> +
> +static int ethsw_init(struct fsl_mc_device *sw_dev)
> +{
> + stp_cfg.vlan_id = DEFAULT_VLAN_ID;
> + stp_cfg.state = DPSW_STP_STATE_FORWARDING;
> +
> + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
> + err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i,
> + &stp_cfg);
Maybe you should actually configure the STP state to blocked? You can
move it to forwarding when the interface is opened.
> +static int ethsw_port_init(struct ethsw_port_priv *port_priv, u16 port)
> +{
> + const char def_mcast[ETH_ALEN] = {0x01, 0x00, 0x5e, 0x00, 0x00, 0x01};
There should be some explanation about what the MAC address is, and
why it needs adding.
> +static int ethsw_probe_port(struct ethsw_core *ethsw, u16 port_idx)
> +{
> + struct ethsw_port_priv *port_priv;
> + struct device *dev = ethsw->dev;
> + struct net_device *port_netdev;
> + int err;
> +
> + port_netdev = alloc_etherdev(sizeof(struct ethsw_port_priv));
> + if (!port_netdev) {
> + dev_err(dev, "alloc_etherdev error\n");
> + return -ENOMEM;
> + }
> +
> + port_priv = netdev_priv(port_netdev);
> + port_priv->netdev = port_netdev;
> + port_priv->ethsw_data = ethsw;
> +
> + port_priv->idx = port_idx;
> + port_priv->stp_state = BR_STATE_FORWARDING;
> +
> + /* Flooding is implicitly enabled */
> + port_priv->flood = true;
> +
> + SET_NETDEV_DEV(port_netdev, dev);
> + port_netdev->netdev_ops = ðsw_port_ops;
> + port_netdev->ethtool_ops = ðsw_port_ethtool_ops;
> +
> + /* Set MTU limits */
> + port_netdev->min_mtu = ETH_MIN_MTU;
> + port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
> +
> + err = register_netdev(port_netdev);
> + if (err < 0) {
> + dev_err(dev, "register_netdev error %d\n", err);
> + goto err_register_netdev;
> + }
At this point, the interface if live.
> +
> + ethsw->ports[port_idx] = port_priv;
> +
> + err = ethsw_port_init(port_priv, port_idx);
> + if (err)
> + goto err_ethsw_port_init;
What would happen if the interface was asked to do something before
these two happen? You should only call register_netdev() when you
really are ready to go.
> +static int ethsw_probe(struct fsl_mc_device *sw_dev)
> +{
> +
> + /* Switch starts up enabled */
> + rtnl_lock();
> + err = ethsw_open(ethsw);
> + rtnl_unlock();
What exactly do you mean by that?
Andrew
^ permalink raw reply
* Re: [PATCH net-next v2 1/4] net: phy: simplify genphy_config_advert by using the linkmode_adv_to_xxx_t functions
From: Andrew Lunn @ 2019-08-09 19:07 UTC (permalink / raw)
To: Heiner Kallweit; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <6ba42ade-5874-9bc9-4f4d-b80f79c0deca@gmail.com>
On Fri, Aug 09, 2019 at 08:43:04PM +0200, Heiner Kallweit wrote:
> Using linkmode_adv_to_mii_adv_t and linkmode_adv_to_mii_ctrl1000_t
> allows to simplify the code. In addition avoiding the conversion to
> the legacy u32 advertisement format allows to remove the warning.
>
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> Suggested-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Andrew
^ permalink raw reply
* Re: [PATCH net-next v2 3/4] net: phy: add phy_modify_paged_changed
From: Andrew Lunn @ 2019-08-09 19:09 UTC (permalink / raw)
To: Heiner Kallweit; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <741a9493-e9a1-be4e-a2e9-e15294362005@gmail.com>
On Fri, Aug 09, 2019 at 08:44:22PM +0200, Heiner Kallweit wrote:
> Add helper function phy_modify_paged_changed, behavios is the same
> as for phy_modify_changed.
>
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Andrew
^ permalink raw reply
* Re: [PATCH net-next] gso: enable udp gso for virtual devices
From: Willem de Bruijn @ 2019-08-09 19:13 UTC (permalink / raw)
To: Josh Hunt
Cc: Willem de Bruijn, Jason Baron, Alexander Duyck, David Miller,
Netdev, Paolo Abeni
In-Reply-To: <314b4ae8-ef99-e7a4-cb95-87b7ea74427f@akamai.com>
On Fri, Aug 9, 2019 at 2:58 PM Josh Hunt <johunt@akamai.com> wrote:
>
> On 6/26/19 4:41 PM, Willem de Bruijn wrote:
> > On Wed, Jun 26, 2019 at 3:17 PM Jason Baron <jbaron@akamai.com> wrote:
> >>
> >>
> >>
> >> On 6/14/19 4:53 PM, Jason Baron wrote:
> >>>
> >>>
> >>> On 6/13/19 5:20 PM, Willem de Bruijn wrote:
> >>>>>>> @@ -237,6 +237,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
> >>>>>>> NETIF_F_GSO_GRE_CSUM | \
> >>>>>>> NETIF_F_GSO_IPXIP4 | \
> >>>>>>> NETIF_F_GSO_IPXIP6 | \
> >>>>>>> + NETIF_F_GSO_UDP_L4 | \
> >>>>>>> NETIF_F_GSO_UDP_TUNNEL | \
> >>>>>>> NETIF_F_GSO_UDP_TUNNEL_CSUM)
> >>>>>>
> >>>>>> Are you adding this to NETIF_F_GSO_ENCAP_ALL? Wouldn't it make more
> >>>>>> sense to add it to NETIF_F_GSO_SOFTWARE?
> >>>>>>
> >>>>>
> >>>>> Yes, I'm adding to NETIF_F_GSO_ENCAP_ALL (not very clear from the
> >>>>> context). I will fix the commit log.
> >>>>>
> >>>>> In: 83aa025 udp: add gso support to virtual devices, the support was
> >>>>> also added to NETIF_F_GSO_ENCAP_ALL (although subsequently reverted due
> >>>>> to UDP GRO not being in place), so I wonder what the reason was for that?
> >>>>
> >>>> That was probably just a bad choice on my part.
> >>>>
> >>>> It worked in practice, but if NETIF_F_GSO_SOFTWARE works the same
> >>>> without unexpected side effects, then I agree that it is the better choice.
> >>>>
> >>>> That choice does appear to change behavior when sending over tunnel
> >>>> devices. Might it send tunneled GSO packets over loopback?
> >>>>
> >>>>
> >>>
> >>> I set up a test case using fou tunneling through a bridge device using
> >>> the udpgso_bench_tx test where packets are not received correctly if
> >>> NETIF_F_GSO_UDP_L4 is added to NETIF_F_GSO_SOFTWARE. If I have it added
> >>> to NETIF_F_GSO_ENCAP_ALL, it does work correctly. So there are more
> >>> fixes required to include it in NETIF_F_GSO_SOFTWARE.
> >>>
> >>> The use-case I have only requires it to be in NETIF_F_GSO_ENCAP_ALL, but
> >>> if it needs to go in NETIF_F_GSO_SOFTWARE, I can look at what's required
> >>> more next week.
> >>>
> >>
> >> Hi,
> >>
> >> I haven't had a chance to investigate what goes wrong with including
> >> NETIF_F_GSO_UDP_L4 in NETIF_F_GSO_SOFTWARE - but I was just wondering if
> >> people are ok with NETIF_F_GSO_UDP_L4 being added to
> >> NETIF_F_GSO_ENCAP_ALL and not NETIF_F_GSO_SOFTWARE (ie the original
> >> patch as posted)?
> >>
> >> As I mentioned that is sufficient for my use-case, and its how Willem
> >> originally proposed this.
> >
> > Indeed, based on the previous discussion this sounds fine to me.
> >
>
> Willem
>
> Are you OK to ACK this? If not, is there something else you'd rather see
> here?
Sure. Unless Alex still has objections, feel free to resubmit with my Acked-by.
^ permalink raw reply
* Re: [PATCH net-next v2 4/4] net: phy: realtek: add support for the 2.5Gbps PHY in RTL8125
From: Andrew Lunn @ 2019-08-09 19:18 UTC (permalink / raw)
To: Heiner Kallweit; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <49454e5b-465d-540e-cc01-07717a773e33@gmail.com>
> + }, {
> + PHY_ID_MATCH_EXACT(0x001cca50),
Hi Heiner
With the Marvell driver, i looked at the range of IDs the PHYs where
using. The switch, being MDIO based, also has ID values. The PHY range
and the switch range are well separated, and it seems unlikely Marvell
would reuse a switch ID in a PHY which was not compatible with the
PHY.
Could you explain why you picked this value for the PHY? What makes
you think it is not in use by another Realtek PHY?
Thanks
Andrew
^ permalink raw reply
* Re: Clause 73 and USXGMII
From: Russell King - ARM Linux admin @ 2019-08-09 19:25 UTC (permalink / raw)
To: Jose Abreu
Cc: netdev@vger.kernel.org, Andrew Lunn, Florian Fainelli,
Heiner Kallweit
In-Reply-To: <BN8PR12MB32664B2DE18A30311A19E5B8D3D60@BN8PR12MB3266.namprd12.prod.outlook.com>
On Fri, Aug 09, 2019 at 06:42:39PM +0000, Jose Abreu wrote:
> From: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> Date: Aug/08/2019, 13:09:03 (UTC+00:00)
>
> > On Thu, Aug 08, 2019 at 11:45:29AM +0000, Jose Abreu wrote:
> > > From: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> > > Date: Aug/08/2019, 10:23:13 (UTC+00:00)
> > >
> > > > On Thu, Aug 08, 2019 at 09:02:57AM +0000, Jose Abreu wrote:
> > > > > From: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> > > > > Date: Aug/08/2019, 09:26:26 (UTC+00:00)
> > > > >
> > > > > > Hi,
> > > > > >
> > > > > > Have you tried enabling debug mode in phylink (add #define DEBUG at the
> > > > > > top of the file) ?
> > > > >
> > > > > Yes:
> > > > >
> > > > > [ With > 2.5G modes removed ]
> > > > > # dmesg | grep -i phy
> > > > > libphy: stmmac: probed
> > > > > stmmaceth 0000:04:00.0 enp4s0: PHY [stmmac-1:00] driver [Synopsys 10G]
> > > > > stmmaceth 0000:04:00.0 enp4s0: phy: setting supported
> > > > > 00,00000000,0002e040 advertising 00,00000000,0002e040
> > > > > stmmaceth 0000:04:00.0 enp4s0: configuring for phy/usxgmii link mode
> > > > > stmmaceth 0000:04:00.0 enp4s0: phylink_mac_config:
> > > > > mode=phy/usxgmii/Unknown/Unknown adv=00,00000000,0002e040 pause=10
> > > > > link=0 an=1
> > > > > stmmaceth 0000:04:00.0 enp4s0: phy link down usxgmii/Unknown/Unknown
> > > >
> > > > This shows that the PHY isn't reporting that the link came up. Did
> > > > the PHY negotiate link? If so, why isn't it reporting that the link
> > > > came up? Maybe something is mis-programming the capability bits in
> > > > the PHY? Maybe disabling the 10G speeds disables everything faster
> > > > than 1G?
> > >
> > > Autoneg was started but never finishes and disabling 10G modes is
> > > causing autoneg to fail.
> > >
> > > >
> > > > > [ Without any limit ]
> > > > > # dmesg | grep -i phy
> > > > > libphy: stmmac: probed
> > > > > stmmaceth 0000:04:00.0 enp4s0: PHY [stmmac-1:00] driver [Synopsys 10G]
> > > > > stmmaceth 0000:04:00.0 enp4s0: phy: setting supported
> > > > > 00,00000000,000ee040 advertising 00,00000000,000ee040
> > > > > stmmaceth 0000:04:00.0 enp4s0: configuring for phy/usxgmii link mode
> > > > > stmmaceth 0000:04:00.0 enp4s0: phylink_mac_config:
> > > > > mode=phy/usxgmii/Unknown/Unknown adv=00,00000000,000ee040 pause=10
> > > > > link=0 an=1
> > > > > stmmaceth 0000:04:00.0 enp4s0: phy link down usxgmii/Unknown/Unknown
> > > > > stmmaceth 0000:04:00.0 enp4s0: phy link up usxgmii/2.5Gbps/Full
> > > > > stmmaceth 0000:04:00.0 enp4s0: phylink_mac_config:
> > > > > mode=phy/usxgmii/2.5Gbps/Full adv=00,00000000,00000000 pause=0f link=1
> > > > > an=0
> > > > >
> > > > > I'm thinking on whether this can be related with USXGMII. As link is
> > > > > operating in 10G but I configure USXGMII for 2.5G maybe autoneg outcome
> > > > > should always be 10G ?
> > > >
> > > > As I understand USXGMII (which isn't very well, because the spec isn't
> > > > available) I believe that it operates in a similar way to SGMII where
> > > > data is replicated the appropriate number of times to achieve the link
> > > > speed. So, the USXGMII link always operates at a bit rate equivalent
> > > > to 10G, but data is replicated twice for 5G, four times for 2.5G, ten
> > > > times for 1G, etc.
> > > >
> > > > I notice that you don't say that you support any copper speeds, which
> > > > brings up the question about what the PHY's media is...
> > >
> > > I just added the speeds that XPCS supports within Clause 73
> > > specification:
> > > Technology Ability field. Indicates the supported technologies:
> > > A0: When this bit is set to 1, the 1000BASE-KX technology is supported
> > > A1: When this bit is set to 1, the 10GBASE-KX4 technology is supported
> > > A2: When this bit is set to 1, the 10GBASE-KR technology is supported
> > > A11: When this bit is set to 1, the 2.5GBASE-KX technology is supported
> > > A12: When this bit is set to 1, the 5GBASE-KR technology is supported
> > >
> > > And, within USXGMII, XPCS supports the following:
> > > Single Port: 10G-SXGMII, 5G-SXGMII, 2.5G-SXGMII
> > > Dual Port: 10G-DXGMII, 5G-DXGMII
> > > Quad Port: 10G-XGMII
> > >
> > > My HW is currently fixed for USXGMII at 2.5G.
> >
> > So what do you actually have?
> >
> > +-----+ +----------+
> > | STM | USXGMII | Synopsis | ????????
> > | MAC | <----------> | PHY | <----------> ????
> > | | link | |
> > +-----+ +----------+ (media side)
> >
> > Does the above refer to what the STM MAC and Synopsis PHY support for
> > the USXGMII link? What about the media side?
>
> This is my setup:
>
> XGMAC <-> XPCS <-> Xilinx SERDES <-> SFP
>
> I'm not sure about the media side. I use a passive SFP cable if that
> helps ...
And at the other end of the passive SFP cable? I guess some kind of
standard networking device.
If the SFP is running at 10G, the protocol to the SFP is expected to be
10GBASE-R - which is fixed at 10G speed. Unless the Xilinx serdes
(which you previously stated was a PHY) is capable of speed conversion
between 2.5G to the XPCS over USXGMII and 10G to the SFP, then I don't
see how you can expect this setup to work - at least not in a standard
environment.
In any case, I don't think it is phylink causing your problems. Without
knowing more about the "phy", what the Xilinx serdes is capable of, and
how that is being programmed, I don't see how I can provide further
help.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
^ permalink raw reply
* Re: [PATCH net-next v2 4/4] net: phy: realtek: add support for the 2.5Gbps PHY in RTL8125
From: Heiner Kallweit @ 2019-08-09 19:31 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <20190809191822.GZ27917@lunn.ch>
On 09.08.2019 21:18, Andrew Lunn wrote:
>> + }, {
>> + PHY_ID_MATCH_EXACT(0x001cca50),
>
> Hi Heiner
>
Hi Andrew,
> With the Marvell driver, i looked at the range of IDs the PHYs where
> using. The switch, being MDIO based, also has ID values. The PHY range
> and the switch range are well separated, and it seems unlikely Marvell
> would reuse a switch ID in a PHY which was not compatible with the
> PHY.
>
> Could you explain why you picked this value for the PHY? What makes
> you think it is not in use by another Realtek PHY?
>
0x001cc800 being the Realtek OUI, I've seen only PHY's with ID
0x001cc8XX and 0x001cc9XX so far. Realtek doesn't seem to have such
a clear separation between PHY and switch PHY ID's.
Example:
0x001cc961 (RTL8366, switch)
0x001cc916 (RTL8211F, PHY)
Last digit of the model is used as model number.
I did the same and used 5 as model number (from RTL8125).
Revision number is set to 0 because RTL8125 is brand-new.
I chose a PHY ID in 0x001ccaXX range because it isn't used by
Realtek AFAIK.
> Thanks
> Andrew
>
Heiner
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox