Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v1 1/3] net: phy: Allow mdio buses to auto-probe c45 devices
From: Calvin Johnson @ 2020-06-17 17:15 UTC (permalink / raw)
  To: Jeremy Linton, Russell King - ARM Linux admin, Jon,
	Cristi Sovaiala, Ioana Ciornei, Andrew Lunn, Andy Shevchenko,
	Florian Fainelli, Madalin Bucur
  Cc: netdev, linux.cj, Calvin Johnson
In-Reply-To: <20200617171536.12014-1-calvin.johnson@oss.nxp.com>

From: Jeremy Linton <jeremy.linton@arm.com>

The mdiobus_scan logic is currently hardcoded to only
work with c22 devices. This works fairly well in most
cases, but its possible a c45 device doesn't respond
despite being a standard phy. If the parent hardware
is capable, it makes sense to scan for c22 devices before
falling back to c45.

As we want this to reflect the capabilities of the STA,
lets add a field to the mii_bus structure to represent
the capability. That way devices can opt into the extended
scanning. Existing users should continue to default to c22
only scanning as long as they are zero'ing the structure
before use.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
---

 drivers/net/phy/mdio_bus.c | 17 +++++++++++++++--
 include/linux/phy.h        |  7 +++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6ceee82b2839..e6c179b89907 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -739,10 +739,23 @@ EXPORT_SYMBOL(mdiobus_free);
  */
 struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
 {
-	struct phy_device *phydev;
+	struct phy_device *phydev = ERR_PTR(-ENODEV);
 	int err;
 
-	phydev = get_phy_device(bus, addr, false);
+	switch (bus->probe_capabilities) {
+	case MDIOBUS_C22:
+		phydev = get_phy_device(bus, addr, false);
+		break;
+	case MDIOBUS_C45:
+		phydev = get_phy_device(bus, addr, true);
+		break;
+	case MDIOBUS_C22_C45:
+		phydev = get_phy_device(bus, addr, false);
+		if (IS_ERR(phydev))
+			phydev = get_phy_device(bus, addr, true);
+		break;
+	}
+
 	if (IS_ERR(phydev))
 		return phydev;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9248dd2ce4ca..50e5312b2304 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -298,6 +298,13 @@ struct mii_bus {
 	/* RESET GPIO descriptor pointer */
 	struct gpio_desc *reset_gpiod;
 
+	/* bus capabilities, used for probing */
+	enum {
+		MDIOBUS_C22 = 0,
+		MDIOBUS_C45,
+		MDIOBUS_C22_C45,
+	} probe_capabilities;
+
 	/* protect access to the shared element */
 	struct mutex shared_lock;
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v1 2/3] net/fsl: acpize xgmac_mdio
From: Calvin Johnson @ 2020-06-17 17:15 UTC (permalink / raw)
  To: Jeremy Linton, Russell King - ARM Linux admin, Jon,
	Cristi Sovaiala, Ioana Ciornei, Andrew Lunn, Andy Shevchenko,
	Florian Fainelli, Madalin Bucur
  Cc: netdev, linux.cj, Calvin Johnson
In-Reply-To: <20200617171536.12014-1-calvin.johnson@oss.nxp.com>

From: Jeremy Linton <jeremy.linton@arm.com>

Add ACPI support for xgmac MDIO bus registration while maintaining
the existing DT support.

The function mdiobus_register() inside of_mdiobus_register(), brings
up all the PHYs on the mdio bus and attach them to the bus.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
---

 drivers/net/ethernet/freescale/xgmac_mdio.c | 27 +++++++++++++--------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index c82c85ef5fb3..fb7f8caff643 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -245,14 +245,14 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct mii_bus *bus;
-	struct resource res;
+	struct resource *res;
 	struct mdio_fsl_priv *priv;
 	int ret;
 
-	ret = of_address_to_resource(np, 0, &res);
-	if (ret) {
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
 		dev_err(&pdev->dev, "could not obtain address\n");
-		return ret;
+		return -EINVAL;
 	}
 
 	bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
@@ -263,21 +263,21 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 	bus->read = xgmac_mdio_read;
 	bus->write = xgmac_mdio_write;
 	bus->parent = &pdev->dev;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res.start);
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res->start);
 
 	/* Set the PHY base address */
 	priv = bus->priv;
-	priv->mdio_base = of_iomap(np, 0);
+	priv->mdio_base = ioremap(res->start, resource_size(res));
 	if (!priv->mdio_base) {
 		ret = -ENOMEM;
 		goto err_ioremap;
 	}
 
-	priv->is_little_endian = of_property_read_bool(pdev->dev.of_node,
-						       "little-endian");
+	priv->is_little_endian = device_property_read_bool(&pdev->dev,
+							   "little-endian");
 
-	priv->has_a011043 = of_property_read_bool(pdev->dev.of_node,
-						  "fsl,erratum-a011043");
+	priv->has_a011043 = device_property_read_bool(&pdev->dev,
+						      "fsl,erratum-a011043");
 
 	ret = of_mdiobus_register(bus, np);
 	if (ret) {
@@ -320,10 +320,17 @@ static const struct of_device_id xgmac_mdio_match[] = {
 };
 MODULE_DEVICE_TABLE(of, xgmac_mdio_match);
 
+static const struct acpi_device_id xgmac_acpi_match[] = {
+	{ "NXP0006", (kernel_ulong_t)NULL },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, xgmac_acpi_match);
+
 static struct platform_driver xgmac_mdio_driver = {
 	.driver = {
 		.name = "fsl-fman_xmdio",
 		.of_match_table = xgmac_mdio_match,
+		.acpi_match_table = xgmac_acpi_match,
 	},
 	.probe = xgmac_mdio_probe,
 	.remove = xgmac_mdio_remove,
-- 
2.17.1


^ permalink raw reply related

* [PATCH v1 3/3] net/fsl: enable extended scanning in xgmac_mdio
From: Calvin Johnson @ 2020-06-17 17:15 UTC (permalink / raw)
  To: Jeremy Linton, Russell King - ARM Linux admin, Jon,
	Cristi Sovaiala, Ioana Ciornei, Andrew Lunn, Andy Shevchenko,
	Florian Fainelli, Madalin Bucur
  Cc: netdev, linux.cj, Calvin Johnson
In-Reply-To: <20200617171536.12014-1-calvin.johnson@oss.nxp.com>

From: Jeremy Linton <jeremy.linton@arm.com>

Since we know the xgmac hardware always has a c45
complaint bus, lets try scanning for c22 capable
phys first. If we fail to find any, then it with
fall back to c45 automatically.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>

---

 drivers/net/ethernet/freescale/xgmac_mdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index fb7f8caff643..5732ca13b821 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -263,6 +263,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 	bus->read = xgmac_mdio_read;
 	bus->write = xgmac_mdio_write;
 	bus->parent = &pdev->dev;
+	bus->probe_capabilities = MDIOBUS_C22_C45;
 	snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res->start);
 
 	/* Set the PHY base address */
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next] net: tso: double TSO_HEADER_SIZE value
From: Eric Dumazet @ 2020-06-17 17:19 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Antoine Tenart

Transport header size could be 60 bytes, and network header
size can also be 60 bytes. Add the Ethernet header and we
are above 128 bytes.

Since drivers using net/core/tso.c usually allocates
one DMA coherent piece of memory per RX queue, this patch
might cause issues if a driver was using too many slots.

For 1024 slots, we would need 256 KB of physically
contiguous memory instead of 128 KB.

Alternative fix would be to add checks in the fast path,
but this involves more work in all drivers using net/core/tso.c.

Fixes: f9cbe9a556af ("net: define the TSO header size in net/tso.h")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Antoine Tenart <antoine.tenart@bootlin.com>
---
Note: probably needs to stay in net-next for one release cycle.

 include/net/tso.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/tso.h b/include/net/tso.h
index 7e166a5703497fadf4662acc474f827b2754da78..c33dd00c161f7a6aa65f586b0ceede46af2e8730 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -4,7 +4,7 @@

 #include <net/ip.h>

-#define TSO_HEADER_SIZE		128
+#define TSO_HEADER_SIZE		256

 struct tso_t {
 	int next_frag_idx;
-- 
2.27.0.290.gba653c62da-goog

^ permalink raw reply related

* Re: [PATCH net-next] net: tso: double TSO_HEADER_SIZE value
From: Eric Dumazet @ 2020-06-17 17:20 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Antoine Tenart
In-Reply-To: <20200617171912.224416-1-edumazet@google.com>

On Wed, Jun 17, 2020 at 10:19 AM Eric Dumazet <edumazet@google.com> wrote:
>
> Transport header size could be 60 bytes, and network header
> size can also be 60 bytes. Add the Ethernet header and we
> are above 128 bytes.
>
> Since drivers using net/core/tso.c usually allocates
> one DMA coherent piece of memory per RX queue, this patch
                                                  typo here : TX queue

>
> might cause issues if a driver was using too many slots.
>
> For 1024 slots, we would need 256 KB of physically
> contiguous memory instead of 128 KB.
>
> Alternative fix would be to add checks in the fast path,
> but this involves more work in all drivers using net/core/tso.c.
>
> Fixes: f9cbe9a556af ("net: define the TSO header size in net/tso.h")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Antoine Tenart <antoine.tenart@bootlin.com>
> ---
> Note: probably needs to stay in net-next for one release cycle.
>
>  include/net/tso.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/net/tso.h b/include/net/tso.h
> index 7e166a5703497fadf4662acc474f827b2754da78..c33dd00c161f7a6aa65f586b0ceede46af2e8730 100644
> --- a/include/net/tso.h
> +++ b/include/net/tso.h
> @@ -4,7 +4,7 @@
>
>  #include <net/ip.h>
>
> -#define TSO_HEADER_SIZE                128
> +#define TSO_HEADER_SIZE                256
>
>  struct tso_t {
>         int next_frag_idx;
> --
> 2.27.0.290.gba653c62da-goog
>

^ permalink raw reply

* Re: qmi_wwan not using netif_carrier_*()
From: Andrew Lunn @ 2020-06-17 17:24 UTC (permalink / raw)
  To: Dan Williams; +Cc: Tanjeff-Nicolai Moos, netdev
In-Reply-To: <1f5ecd2a1138dd39893b8d2b9e608067468de891.camel@redhat.com>

On Wed, Jun 17, 2020 at 11:59:33AM -0500, Dan Williams wrote:
> On Wed, 2020-06-17 at 18:48 +0200, Andrew Lunn wrote:
> > On Wed, Jun 17, 2020 at 03:21:53PM +0200, Tanjeff-Nicolai Moos wrote:
> > > Hi netdevs,
> > > 
> > > Kernel version:
> > > 
> > >   I'm working with kernel 4.14.137 (OpenWRT project). But I looked
> > > at
> > >   the source of kernel 5.7 and found the same situation.
> > > 
> > > Problem:
> > > 
> > >   I'm using the qmi_wwan driver for a Sierra Wireless EM7455 LTE
> > >   modem. This driver does not use
> > >   netif_carrier_on()/netif_carrier_off() to update its link status.
> > >   This confuses ledtrig_netdev which uses netif_carrier_ok() to
> > > obtain
> > >   the link status.
> > > 
> > > My solution:
> > > 
> > >   As a solution (or workaround?) I would try:
> > > 
> > >   1) In drivers/net/usb/qmi_wwan.c, lines 904/913: Add the flag
> > >      FLAG_LINK_INTR.
> > > 
> > >   2) In drivers/net/usb/usbnet.c, functions usbnet_open() and
> > >      usbnet_stop(): Add a call to netif_carrier_*(),
> > >      but only if FLAG_LINK_INTR is set.
> > > 
> > > Question:
> > > 
> > >   Is this the intended way to use FLAG_LINK_INTR and
> > > netif_carrier_*()?
> > >   Or is there another recommended way to obtain the link status of
> > >   network devices (I could change ledtrig_netdev)?
> > 
> > Hi Tanjeff
> > 
> > With Ethernet, having a carrier means there is a link partner, the
> > layer 2 of the OSI 7 layer stack model is working. If the interface
> > is
> > not open()ed, it clearly should not have carrier. However, just
> > because it is open, does not mean it has carrier. The cable could be
> > unplugged, etc.
> > 
> > This is an LTE modem. What does carrier mean here? I don't know if it
> > is well defined, but i would guess it is connected to a base station
> > which is offering service. I'm assuming you are interested in data
> > here, not wanting to make a 911/999/112/$EMERGENCY_SERVICE call which
> > in theory all base stations should accept.
> > 
> > Is there a way to get this state information from the hardware? That
> > would be the correct way to set the carrier.
> 
> There isn't. All the setup that would result in IFF_LOWER_UP (eg
> ability to pass packets to the cellular network) happens over channels
> *other* than the ethernet one. eg CDC-WDM, CDC-ACM, CDC-MBIM, AT
> commands, QMI commands, MBIM commands, etc.
> 
> Something in userspace handles the actual IP-level connection setup and
> once that's done, only then do you really have IFF_LOWER_UP. One way to
> solve this could be to require userspace connection managers to manage
> the carrier state of the device, which is possible for some drivers
> already IIRC.

So Tanjeff, what is you real use case here? I assume you want to
control an LED so it is on when the LTE modem is connected? Could you
export the LED to user space and have a dhclient-enter/exit script change
the state of the LED?

    Andrew

^ permalink raw reply

* Re: [PATCH v1 2/3] net/fsl: acpize xgmac_mdio
From: Andy Shevchenko @ 2020-06-17 17:24 UTC (permalink / raw)
  To: Calvin Johnson
  Cc: Jeremy Linton, Russell King - ARM Linux admin, Jon,
	Cristi Sovaiala, Ioana Ciornei, Andrew Lunn, Florian Fainelli,
	Madalin Bucur, netdev, linux.cj
In-Reply-To: <20200617171536.12014-3-calvin.johnson@oss.nxp.com>

On Wed, Jun 17, 2020 at 8:16 PM Calvin Johnson
<calvin.johnson@oss.nxp.com> wrote:
>
> From: Jeremy Linton <jeremy.linton@arm.com>
>
> Add ACPI support for xgmac MDIO bus registration while maintaining
> the existing DT support.
>
> The function mdiobus_register() inside of_mdiobus_register(), brings
> up all the PHYs on the mdio bus and attach them to the bus.

...

> -       snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res.start);
> +       snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res->start);

Since you are here, better to change the specifier to the dedicated
one, i.e. "%pa".
But I don't remember if it adds 0x to it...

...

> +static const struct acpi_device_id xgmac_acpi_match[] = {
> +       { "NXP0006", (kernel_ulong_t)NULL },

       { "NXP0006" },

is enough. And I suppose this is the official ID.

> +       { },

No need to have comma in terminator line.

> +};

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH AUTOSEL 5.7 264/274] vxlan: Avoid infinite loop when suppressing NS messages with invalid options
From: Ido Schimmel @ 2020-06-17 17:29 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Ido Schimmel, linux-kernel, stable, Nikolay Aleksandrov,
	David S . Miller, netdev
In-Reply-To: <20200617162823.GR1931@sasha-vm>

On Wed, Jun 17, 2020 at 12:28:23PM -0400, Sasha Levin wrote:
> On Tue, Jun 09, 2020 at 09:55:48AM +0300, Ido Schimmel wrote:
> > On Mon, Jun 08, 2020 at 07:05:57PM -0400, Sasha Levin wrote:
> > > From: Ido Schimmel <idosch@mellanox.com>
> > > 
> > > [ Upstream commit 8066e6b449e050675df48e7c4b16c29f00507ff0 ]
> > 
> > Hi,
> > 
> > In the same patch set I also included a similar fix for the bridge
> > module:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=53fc685243bd6fb90d90305cea54598b78d3cbfc
> > 
> > But I don't see it in the patch sets you sent.
> > 
> > Don't see it here as well:
> > https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git/tree/queue-5.7
> > https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/log/?h=linux-5.7.y
> > 
> > Did it get lost or it's just pending somewhere else?
> 
> AUTOSEL ignores net/ patches that are maintained by David Miller.
> 
> I can pick it up manually.

No need. Dave queued both patches to stable and they are already in
5.7.y:

https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.7.y&id=f3f4183f6d36df54f5a867653c30852ec6b5ab9d
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.7.y&id=ecf0b3c5a6684fcf27073365d576164390bc000e

> 
> -- 
> Thanks,
> Sasha

^ permalink raw reply

* Re: [PATCH] net: macb: undo operations in case of failure
From: Nicolas Ferre @ 2020-06-17 17:31 UTC (permalink / raw)
  To: Jakub Kicinski, Claudiu Beznea
  Cc: davem, linux, antoine.tenart, netdev, linux-kernel
In-Reply-To: <20200617092935.4e3616c1@kicinski-fedora-PC1C0HJN>

On 17/06/2020 at 18:29, Jakub Kicinski wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> On Wed, 17 Jun 2020 16:23:55 +0300 Claudiu Beznea wrote:
>> Undo previously done operation in case macb_phylink_connect()
>> fails. Since macb_reset_hw() is the 1st undo operation the
>> napi_exit label was renamed to reset_hw.
>>
>> Fixes: b2b041417299 ("net: macb: convert to phylink")
>> Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
> 
> Fixes tag: Fixes: b2b041417299 ("net: macb: convert to phylink")
> Has these problem(s):
>          - Target SHA1 does not exist

It must be:
Fixes: 7897b071ac3b ("net: macb: convert to phylink")


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH v1 2/3] net/fsl: acpize xgmac_mdio
From: Andrew Lunn @ 2020-06-17 17:34 UTC (permalink / raw)
  To: Calvin Johnson
  Cc: Jeremy Linton, Russell King - ARM Linux admin, Jon,
	Cristi Sovaiala, Ioana Ciornei, Andy Shevchenko, Florian Fainelli,
	Madalin Bucur, netdev, linux.cj
In-Reply-To: <20200617171536.12014-3-calvin.johnson@oss.nxp.com>

On Wed, Jun 17, 2020 at 10:45:34PM +0530, Calvin Johnson wrote:
> From: Jeremy Linton <jeremy.linton@arm.com>

> +static const struct acpi_device_id xgmac_acpi_match[] = {
> +	{ "NXP0006", (kernel_ulong_t)NULL },

Hi Jeremy

What exactly does NXP0006 represent? An XGMAC MDIO bus master? Some
NXP MDIO bus master? An XGMAC Ethernet controller which has an NXP
MDIO bus master? A cluster of Ethernet controllers?

Is this documented somewhere? In the DT world we have a clear
documentation for all the compatible strings. Is there anything
similar in the ACPI world for these magic numbers?

Thanks
     Andrew

^ permalink raw reply

* [PATCH bpf-next] bpf: sk_storage: Prefer to get a free cache_idx
From: Martin KaFai Lau @ 2020-06-17 17:42 UTC (permalink / raw)
  To: bpf; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev

The cache_idx is currently picked by RR.  There is chance that
the same cache_idx will be picked by multiple sk_storage_maps while
other cache_idx is still unused.  e.g. It could happen when the
sk_storage_map is recreated during the restart of the user
space process.

This patch tracks the usage count for each cache_idx.  There is
16 of them now (defined in BPF_SK_STORAGE_CACHE_SIZE).
It will try to pick the free cache_idx.  If none was found,
it would pick one with the minimal usage count.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 net/core/bpf_sk_storage.c | 41 +++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d2c4d16dadba..1dae4b543243 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -11,8 +11,6 @@
 #include <uapi/linux/sock_diag.h>
 #include <uapi/linux/btf.h>
 
-static atomic_t cache_idx;
-
 #define SK_STORAGE_CREATE_FLAG_MASK					\
 	(BPF_F_NO_PREALLOC | BPF_F_CLONE)
 
@@ -81,6 +79,9 @@ struct bpf_sk_storage_elem {
 #define SDATA(_SELEM) (&(_SELEM)->sdata)
 #define BPF_SK_STORAGE_CACHE_SIZE	16
 
+static DEFINE_SPINLOCK(cache_idx_lock);
+static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
+
 struct bpf_sk_storage {
 	struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
 	struct hlist_head list;	/* List of bpf_sk_storage_elem */
@@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
 	return 0;
 }
 
+static u16 cache_idx_get(void)
+{
+	u64 min_usage = U64_MAX;
+	u16 i, res = 0;
+
+	spin_lock(&cache_idx_lock);
+
+	for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
+		if (cache_idx_usage_counts[i] < min_usage) {
+			min_usage = cache_idx_usage_counts[i];
+			res = i;
+
+			/* Found a free cache_idx */
+			if (!min_usage)
+				break;
+		}
+	}
+	cache_idx_usage_counts[res]++;
+
+	spin_unlock(&cache_idx_lock);
+
+	return res;
+}
+
+static void cache_idx_free(u16 idx)
+{
+	spin_lock(&cache_idx_lock);
+	cache_idx_usage_counts[idx]--;
+	spin_unlock(&cache_idx_lock);
+}
+
 /* Called by __sk_destruct() & bpf_sk_storage_clone() */
 void bpf_sk_storage_free(struct sock *sk)
 {
@@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_sk_storage_map *)map;
 
+	cache_idx_free(smap->cache_idx);
+
 	/* Note that this map might be concurrently cloned from
 	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
 	 * RCU read section to finish before proceeding. New RCU
@@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
 	}
 
 	smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
-	smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
-		BPF_SK_STORAGE_CACHE_SIZE;
+	smap->cache_idx = cache_idx_get();
 
 	return &smap->map;
 }
-- 
2.24.1


^ permalink raw reply related

* Re: [Patch net] net: change addr_list_lock back to static key
From: Cong Wang @ 2020-06-17 17:42 UTC (permalink / raw)
  To: Taehee Yoo; +Cc: Netdev, syzbot+f3a0e80c34b3fc28ac5e, Dmitry Vyukov
In-Reply-To: <CAMArcTV6YQPC6uo2NHER41H++XnCG+LW7ZXDgvF_snYmMZON3Q@mail.gmail.com>

On Tue, Jun 16, 2020 at 8:03 AM Taehee Yoo <ap420073@gmail.com> wrote:
>
> I agree with that.
> And, do you have any plan to replace netif_addr_lock_bh() with
> netif_addr_lock_nested()?
> (Of course, it needs BH handling code)
> I'm not sure but I think it would be needed.

Yeah, I agree it's needed. I have a patch now and will send it
out after some testing.

Thanks.

^ permalink raw reply

* Re: [PATCH v1 1/3] net: phy: Allow mdio buses to auto-probe c45 devices
From: Russell King - ARM Linux admin @ 2020-06-17 17:44 UTC (permalink / raw)
  To: Calvin Johnson
  Cc: Jeremy Linton, Jon, Cristi Sovaiala, Ioana Ciornei, Andrew Lunn,
	Andy Shevchenko, Florian Fainelli, Madalin Bucur, netdev,
	linux.cj
In-Reply-To: <20200617171536.12014-2-calvin.johnson@oss.nxp.com>

On Wed, Jun 17, 2020 at 10:45:33PM +0530, Calvin Johnson wrote:
> From: Jeremy Linton <jeremy.linton@arm.com>
> 
> The mdiobus_scan logic is currently hardcoded to only
> work with c22 devices. This works fairly well in most
> cases, but its possible a c45 device doesn't respond
> despite being a standard phy. If the parent hardware
> is capable, it makes sense to scan for c22 devices before
> falling back to c45.
> 
> As we want this to reflect the capabilities of the STA,
> lets add a field to the mii_bus structure to represent
> the capability. That way devices can opt into the extended
> scanning. Existing users should continue to default to c22
> only scanning as long as they are zero'ing the structure
> before use.
> 
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
> Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>

I know that we've hashed this out quite a bit already, but I would like
to point out that include/linux/mdio.h contains:

 * struct mdio_if_info - Ethernet controller MDIO interface
 * @mode_support: MDIO modes supported.  If %MDIO_SUPPORTS_C22 is set then
 *      MII register access will be passed through with @devad =
 *      %MDIO_DEVAD_NONE.  If %MDIO_EMULATE_C22 is set then access to
 *      commonly used clause 22 registers will be translated into
 *      clause 45 registers.

#define MDIO_SUPPORTS_C22               1
#define MDIO_SUPPORTS_C45               2
#define MDIO_EMULATE_C22                4

While this structure is not applicable to phylib or mii_bus, it may be
worth considering that there already exist definitions for identifying
the properties of the underlying bus.

> ---
> 
>  drivers/net/phy/mdio_bus.c | 17 +++++++++++++++--
>  include/linux/phy.h        |  7 +++++++
>  2 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
> index 6ceee82b2839..e6c179b89907 100644
> --- a/drivers/net/phy/mdio_bus.c
> +++ b/drivers/net/phy/mdio_bus.c
> @@ -739,10 +739,23 @@ EXPORT_SYMBOL(mdiobus_free);
>   */
>  struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
>  {
> -	struct phy_device *phydev;
> +	struct phy_device *phydev = ERR_PTR(-ENODEV);
>  	int err;
>  
> -	phydev = get_phy_device(bus, addr, false);
> +	switch (bus->probe_capabilities) {
> +	case MDIOBUS_C22:
> +		phydev = get_phy_device(bus, addr, false);
> +		break;
> +	case MDIOBUS_C45:
> +		phydev = get_phy_device(bus, addr, true);
> +		break;
> +	case MDIOBUS_C22_C45:
> +		phydev = get_phy_device(bus, addr, false);
> +		if (IS_ERR(phydev))
> +			phydev = get_phy_device(bus, addr, true);
> +		break;
> +	}
> +
>  	if (IS_ERR(phydev))
>  		return phydev;
>  
> diff --git a/include/linux/phy.h b/include/linux/phy.h
> index 9248dd2ce4ca..50e5312b2304 100644
> --- a/include/linux/phy.h
> +++ b/include/linux/phy.h
> @@ -298,6 +298,13 @@ struct mii_bus {
>  	/* RESET GPIO descriptor pointer */
>  	struct gpio_desc *reset_gpiod;
>  
> +	/* bus capabilities, used for probing */
> +	enum {
> +		MDIOBUS_C22 = 0,
> +		MDIOBUS_C45,
> +		MDIOBUS_C22_C45,
> +	} probe_capabilities;

I think it would be better to reserve "0" to mean that no capabilities
have been declared.  We hae the situation where we have mii_bus that
exist which do support C45, but as they stand, probe_capabilities will
be zero, and with your definitions above, that means MDIOBUS_C22.

It seems this could lock in some potential issues later down the line
if we want to use this elsewhere.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH bpf v5 1/3] bpf: don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE
From: sdf @ 2020-06-17 17:45 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: netdev, bpf, davem, ast, daniel, David Laight
In-Reply-To: <20200617170909.koev3t5fmngla3c4@ast-mbp.dhcp.thefacebook.com>

On 06/17, Alexei Starovoitov wrote:
> On Tue, Jun 16, 2020 at 06:04:14PM -0700, Stanislav Fomichev wrote:
> > Attaching to these hooks can break iptables because its optval is
> > usually quite big, or at least bigger than the current PAGE_SIZE limit.
> > David also mentioned some SCTP options can be big (around 256k).
> >
> > For such optvals we expose only the first PAGE_SIZE bytes to
> > the BPF program. BPF program has two options:
> > 1. Set ctx->optlen to 0 to indicate that the BPF's optval
> >    should be ignored and the kernel should use original userspace
> >    value.
> > 2. Set ctx->optlen to something that's smaller than the PAGE_SIZE.
> >
> > v5:
> > * use ctx->optlen == 0 with trimmed buffer (Alexei Starovoitov)
> > * update the docs accordingly
> >
> > v4:
> > * use temporary buffer to avoid optval == optval_end == NULL;
> >   this removes the corner case in the verifier that might assume
> >   non-zero PTR_TO_PACKET/PTR_TO_PACKET_END.
> >
> > v3:
> > * don't increase the limit, bypass the argument
> >
> > v2:
> > * proper comments formatting (Jakub Kicinski)
> >
> > Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks")
> > Cc: David Laight <David.Laight@ACULAB.COM>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  kernel/bpf/cgroup.c | 53 ++++++++++++++++++++++++++++-----------------
> >  1 file changed, 33 insertions(+), 20 deletions(-)
> >
> > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> > index 4d76f16524cc..ac53102e244a 100644
> > --- a/kernel/bpf/cgroup.c
> > +++ b/kernel/bpf/cgroup.c
> > @@ -1276,16 +1276,23 @@ static bool  
> __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
> >
> >  static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int  
> max_optlen)
> >  {
> > -	if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
> > +	if (unlikely(max_optlen < 0))
> >  		return -EINVAL;
> >
> > +	if (unlikely(max_optlen > PAGE_SIZE)) {
> > +		/* We don't expose optvals that are greater than PAGE_SIZE
> > +		 * to the BPF program.
> > +		 */
> > +		max_optlen = PAGE_SIZE;
> > +	}
> > +
> >  	ctx->optval = kzalloc(max_optlen, GFP_USER);
> >  	if (!ctx->optval)
> >  		return -ENOMEM;
> >
> >  	ctx->optval_end = ctx->optval + max_optlen;
> >
> > -	return 0;
> > +	return max_optlen;
> >  }
> >
> >  static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
> > @@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct  
> sock *sk, int *level,
> >  	 */
> >  	max_optlen = max_t(int, 16, *optlen);
> >
> > -	ret = sockopt_alloc_buf(&ctx, max_optlen);
> > -	if (ret)
> > -		return ret;
> > +	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
> > +	if (max_optlen < 0)
> > +		return max_optlen;
> >
> >  	ctx.optlen = *optlen;
> >
> > -	if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
> > +	if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) !=  
> 0) {
> >  		ret = -EFAULT;
> >  		goto out;
> >  	}
> > @@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct  
> sock *sk, int *level,
> >  		/* export any potential modifications */
> >  		*level = ctx.level;
> >  		*optname = ctx.optname;
> > -		*optlen = ctx.optlen;
> > -		*kernel_optval = ctx.optval;
> > +
> > +		/* optlen == 0 from BPF indicates that we should
> > +		 * use original userspace data.
> > +		 */
> > +		if (ctx.optlen != 0) {
> > +			*optlen = ctx.optlen;

> I think it should be:
> *optlen = min(ctx.optlen, max_optlen);
We do have the following (existing) check above:
	} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
		/* optlen is out of bounds */
		ret = -EFAULT;
	} else {

So we shouldn't need any min here? Or am I missing something?

> Otherwise when bpf prog doesn't adjust ctx.oplen the kernel will see
> 4k only in kernel_optval whereas optlen will be > 4k.
> I suspect iptables sockopt should have crashed at this point.
> How did you test it?
The selftests that I've attached in the series. The test is passing
two pages and for IP_TOS we bypass the value via optlen=0 and
for IP_FREEBIND we trim the buffer to 1 byte. I think this should
cover this check here.

One thing I didn't really test is getsockopt when the kernel
returns really large buffer (iptables). Right now, the test
gets 4 bytes (trimmed) from the kernel. I think that's the only
place that I didn't properly test. I wonder whether I should
do a real iptables-like setsockopt/getsockopt :-/

^ permalink raw reply

* Re: [PATCH v1 2/3] net/fsl: acpize xgmac_mdio
From: Russell King - ARM Linux admin @ 2020-06-17 17:49 UTC (permalink / raw)
  To: Calvin Johnson
  Cc: Jeremy Linton, Jon, Cristi Sovaiala, Ioana Ciornei, Andrew Lunn,
	Andy Shevchenko, Florian Fainelli, Madalin Bucur, netdev,
	linux.cj
In-Reply-To: <20200617171536.12014-3-calvin.johnson@oss.nxp.com>

On Wed, Jun 17, 2020 at 10:45:34PM +0530, Calvin Johnson wrote:
> From: Jeremy Linton <jeremy.linton@arm.com>
> 
> Add ACPI support for xgmac MDIO bus registration while maintaining
> the existing DT support.
> 
> The function mdiobus_register() inside of_mdiobus_register(), brings
> up all the PHYs on the mdio bus and attach them to the bus.
> 
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
> Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
> ---
> 
>  drivers/net/ethernet/freescale/xgmac_mdio.c | 27 +++++++++++++--------
>  1 file changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
> index c82c85ef5fb3..fb7f8caff643 100644
> --- a/drivers/net/ethernet/freescale/xgmac_mdio.c
> +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
> @@ -245,14 +245,14 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
>  {
>  	struct device_node *np = pdev->dev.of_node;
>  	struct mii_bus *bus;
> -	struct resource res;
> +	struct resource *res;
>  	struct mdio_fsl_priv *priv;
>  	int ret;
>  
> -	ret = of_address_to_resource(np, 0, &res);
> -	if (ret) {
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!res) {
>  		dev_err(&pdev->dev, "could not obtain address\n");
> -		return ret;
> +		return -EINVAL;
>  	}

I think, as you're completely rewriting the resource handling, it would
be a good idea to switch over to using devm_* stuff here.

	void __iomem *regs;

	regs = devm_platform_ioremap_resource(pdev, 0);
	if (IS_ERR(regs)) {
		dev_err(&pdev->dev, "could not map resource: %pe\n",
			regs);
		return PTR_ERR(regs);
	}

This also gets rid of the error checking on priv->mdio_base below, and
of course the unmap/resource handling for priv->mdio_base in the error
paths and device removal paths.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net v1] net: phy: smsc: fix printing too many logs
From: Dejin Zheng @ 2020-06-17 17:50 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: f.fainelli, hkallweit1, linux, davem, kuba, netdev, linux-kernel,
	Kevin Groeneveld
In-Reply-To: <20200617161925.GE205574@lunn.ch>

On Wed, Jun 17, 2020 at 06:19:25PM +0200, Andrew Lunn wrote:
> On Wed, Jun 17, 2020 at 11:33:40PM +0800, Dejin Zheng wrote:
> > Commit 7ae7ad2f11ef47 ("net: phy: smsc: use phy_read_poll_timeout()
> > to simplify the code") will print a lot of logs as follows when Ethernet
> > cable is not connected:
> > 
> > [    4.473105] SMSC LAN8710/LAN8720 2188000.ethernet-1:00: lan87xx_read_status failed: -110
> > 
> > So fix it by read_poll_timeout().
> 
> Do you have a more detailed explanation of what is going on here?
> 
> After a lot of thought, i think i can see how this happens. But the
> commit message should really spell out why this is the correct fix.
>
Hi Andrew:

Kevin report a bug for me in link[1], I check the Commit 7ae7ad2f11ef47
("net: phy: smsc: use phy_read_poll_timeout() to simplify the code") and
found it change the original behavior in smsc driver. It does not has
any error message whether it is timeout or phy_read fails, but this Commit
will changed it and will print some error messages by
phy_read_poll_timeout() when it is timeout or phy_read fails. so use the
read_poll_timeout() to replace phy_read_poll_timeout() to fix this
issue. the read_poll_timeout() does not print any log when it goes
wrong.

the original codes is that:

	/* Wait max 640 ms to detect energy */
	for (i = 0; i < 64; i++) {
	        /* Sleep to allow link test pulses to be sent */
	        msleep(10);
	        rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
	        if (rc < 0)
	                return rc;
	        if (rc & MII_LAN83C185_ENERGYON)
	                break;
	}

Commit 7ae7ad2f11ef47 modify it as this:

	phy_read_poll_timeout(phydev, MII_LAN83C185_CTRL_STATUS,
	                      rc & MII_LAN83C185_ENERGYON, 10000,
	                      640000, true);
	if (rc < 0)
	        return rc;

the phy_read_poll_timeout() will print a error log by phydev_err()
when timeout or rc < 0. read_poll_timeout() just return timeout
error and does not print any error log.

#define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \
                                timeout_us, sleep_before_read) \
({ \
        int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \
                sleep_us, timeout_us, sleep_before_read, phydev, regnum); \
        if (val <  0) \
                __ret = val; \
        if (__ret) \
                phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \
        __ret; \
})

So use read_poll_timeout Use read_poll_timeout() to be consistent with the
original code.

link[1]: https://lkml.org/lkml/2020/6/1/1408

> Thanks
> 	Andrew

^ permalink raw reply

* Re: mlxsw: spectrum: Adjust headroom buffers for 8x ports
From: Ido Schimmel @ 2020-06-17 17:52 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Jiri Pirko, David S. Miller, Jakub Kicinski, Petr Machata, netdev,
	linux-kernel@vger.kernel.org
In-Reply-To: <bae3b4f6-3e9b-bdde-72b0-b8f1e7575fd4@canonical.com>

On Wed, Jun 17, 2020 at 06:15:35PM +0100, Colin Ian King wrote:
> Hi
> 
> Static analysis with Coverity has detected an issue that relies on the
> machine endianness to work. The commit in question is:
> 
> commit 60833d54d56c21e7538296eb2e00e104768fd047
> Author: Ido Schimmel <idosch@mellanox.com>
> Date:   Tue Jun 16 10:14:58 2020 +0300
> 
>     mlxsw: spectrum: Adjust headroom buffers for 8x ports
> 
> in line:
>     mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, (u16 *) &buffsize);
> 
> 
> The cast of the u32 buffsize to (u16 *) to scale buffsize in the call to
> to mlxsw_sp_port_headroom_8x_adjust() will behave differently on big
> endian architectures to that of little endian architectures.  I'm not
> sure if this is intentional or not.

Not intentional. The hardware interface is quite weird. The buffer size
can be configured via two registers. One takes size as 24 bits and the
second as 16 bits. Either way, the max size is much lower than 2^16-1.

> One solution is to either make buffsize a u16, but I am concerned this
> may be incorrect as the buffsize is assigned from the call
> mlxsw_sp_span_buffsize_get() and this returns a u32 so we may have
> overflow issues. Probably better to make
> mlxsw_sp_port_headroom_8x_adjust handle u32 integers and to return the
> adjusted value rather than modifying it by pass-by-reference.

Thanks for the report, will fix.

> 
> Colin
> 

^ permalink raw reply

* Re: [PATCH v1 2/3] net/fsl: acpize xgmac_mdio
From: Andy Shevchenko @ 2020-06-17 17:54 UTC (permalink / raw)
  To: Russell King - ARM Linux admin
  Cc: Calvin Johnson, Jeremy Linton, Jon, Cristi Sovaiala,
	Ioana Ciornei, Andrew Lunn, Florian Fainelli, Madalin Bucur,
	netdev, linux.cj
In-Reply-To: <20200617174930.GU1551@shell.armlinux.org.uk>

On Wed, Jun 17, 2020 at 8:49 PM Russell King - ARM Linux admin
<linux@armlinux.org.uk> wrote:
> On Wed, Jun 17, 2020 at 10:45:34PM +0530, Calvin Johnson wrote:

...

> > -     ret = of_address_to_resource(np, 0, &res);
> > -     if (ret) {
> > +     res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +     if (!res) {
> >               dev_err(&pdev->dev, "could not obtain address\n");
> > -             return ret;
> > +             return -EINVAL;
> >       }
>
> I think, as you're completely rewriting the resource handling, it would
> be a good idea to switch over to using devm_* stuff here.
>
>         void __iomem *regs;
>
>         regs = devm_platform_ioremap_resource(pdev, 0);
>         if (IS_ERR(regs))
{
>                 dev_err(&pdev->dev, "could not map resource: %pe\n",
>                         regs);

And just in case, this message is dup. The API has few of them
depending on the error conditions.

>                 return PTR_ERR(regs);
>         }


-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH bpf v5 1/3] bpf: don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE
From: Alexei Starovoitov @ 2020-06-17 17:59 UTC (permalink / raw)
  To: sdf; +Cc: netdev, bpf, davem, ast, daniel, David Laight
In-Reply-To: <20200617174508.GA246265@google.com>

On Wed, Jun 17, 2020 at 10:45:08AM -0700, sdf@google.com wrote:
> On 06/17, Alexei Starovoitov wrote:
> > On Tue, Jun 16, 2020 at 06:04:14PM -0700, Stanislav Fomichev wrote:
> > > Attaching to these hooks can break iptables because its optval is
> > > usually quite big, or at least bigger than the current PAGE_SIZE limit.
> > > David also mentioned some SCTP options can be big (around 256k).
> > >
> > > For such optvals we expose only the first PAGE_SIZE bytes to
> > > the BPF program. BPF program has two options:
> > > 1. Set ctx->optlen to 0 to indicate that the BPF's optval
> > >    should be ignored and the kernel should use original userspace
> > >    value.
> > > 2. Set ctx->optlen to something that's smaller than the PAGE_SIZE.
> > >
> > > v5:
> > > * use ctx->optlen == 0 with trimmed buffer (Alexei Starovoitov)
> > > * update the docs accordingly
> > >
> > > v4:
> > > * use temporary buffer to avoid optval == optval_end == NULL;
> > >   this removes the corner case in the verifier that might assume
> > >   non-zero PTR_TO_PACKET/PTR_TO_PACKET_END.
> > >
> > > v3:
> > > * don't increase the limit, bypass the argument
> > >
> > > v2:
> > > * proper comments formatting (Jakub Kicinski)
> > >
> > > Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks")
> > > Cc: David Laight <David.Laight@ACULAB.COM>
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  kernel/bpf/cgroup.c | 53 ++++++++++++++++++++++++++++-----------------
> > >  1 file changed, 33 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> > > index 4d76f16524cc..ac53102e244a 100644
> > > --- a/kernel/bpf/cgroup.c
> > > +++ b/kernel/bpf/cgroup.c
> > > @@ -1276,16 +1276,23 @@ static bool
> > __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
> > >
> > >  static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int
> > max_optlen)
> > >  {
> > > -	if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
> > > +	if (unlikely(max_optlen < 0))
> > >  		return -EINVAL;
> > >
> > > +	if (unlikely(max_optlen > PAGE_SIZE)) {
> > > +		/* We don't expose optvals that are greater than PAGE_SIZE
> > > +		 * to the BPF program.
> > > +		 */
> > > +		max_optlen = PAGE_SIZE;
> > > +	}
> > > +
> > >  	ctx->optval = kzalloc(max_optlen, GFP_USER);
> > >  	if (!ctx->optval)
> > >  		return -ENOMEM;
> > >
> > >  	ctx->optval_end = ctx->optval + max_optlen;
> > >
> > > -	return 0;
> > > +	return max_optlen;
> > >  }
> > >
> > >  static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
> > > @@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct
> > sock *sk, int *level,
> > >  	 */
> > >  	max_optlen = max_t(int, 16, *optlen);
> > >
> > > -	ret = sockopt_alloc_buf(&ctx, max_optlen);
> > > -	if (ret)
> > > -		return ret;
> > > +	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
> > > +	if (max_optlen < 0)
> > > +		return max_optlen;
> > >
> > >  	ctx.optlen = *optlen;
> > >
> > > -	if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
> > > +	if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) !=
> > 0) {
> > >  		ret = -EFAULT;
> > >  		goto out;
> > >  	}
> > > @@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct
> > sock *sk, int *level,
> > >  		/* export any potential modifications */
> > >  		*level = ctx.level;
> > >  		*optname = ctx.optname;
> > > -		*optlen = ctx.optlen;
> > > -		*kernel_optval = ctx.optval;
> > > +
> > > +		/* optlen == 0 from BPF indicates that we should
> > > +		 * use original userspace data.
> > > +		 */
> > > +		if (ctx.optlen != 0) {
> > > +			*optlen = ctx.optlen;
> 
> > I think it should be:
> > *optlen = min(ctx.optlen, max_optlen);
> We do have the following (existing) check above:
> 	} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
> 		/* optlen is out of bounds */
> 		ret = -EFAULT;
> 	} else {
> 
> So we shouldn't need any min here? Or am I missing something?

ahh. you're right.
Applied to bpf tree.

> > Otherwise when bpf prog doesn't adjust ctx.oplen the kernel will see
> > 4k only in kernel_optval whereas optlen will be > 4k.
> > I suspect iptables sockopt should have crashed at this point.
> > How did you test it?
> The selftests that I've attached in the series. The test is passing
> two pages and for IP_TOS we bypass the value via optlen=0 and
> for IP_FREEBIND we trim the buffer to 1 byte. I think this should
> cover this check here.
> 
> One thing I didn't really test is getsockopt when the kernel
> returns really large buffer (iptables). Right now, the test
> gets 4 bytes (trimmed) from the kernel. I think that's the only
> place that I didn't properly test. I wonder whether I should
> do a real iptables-like setsockopt/getsockopt :-/

would be nice :)

^ permalink raw reply

* Re: qmi_wwan not using netif_carrier_*()
From: Dan Williams @ 2020-06-17 18:01 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Tanjeff-Nicolai Moos, netdev
In-Reply-To: <20200617172434.GH205574@lunn.ch>

On Wed, 2020-06-17 at 19:24 +0200, Andrew Lunn wrote:
> On Wed, Jun 17, 2020 at 11:59:33AM -0500, Dan Williams wrote:
> > On Wed, 2020-06-17 at 18:48 +0200, Andrew Lunn wrote:
> > > On Wed, Jun 17, 2020 at 03:21:53PM +0200, Tanjeff-Nicolai Moos
> > > wrote:
> > > > Hi netdevs,
> > > > 
> > > > Kernel version:
> > > > 
> > > >   I'm working with kernel 4.14.137 (OpenWRT project). But I
> > > > looked
> > > > at
> > > >   the source of kernel 5.7 and found the same situation.
> > > > 
> > > > Problem:
> > > > 
> > > >   I'm using the qmi_wwan driver for a Sierra Wireless EM7455
> > > > LTE
> > > >   modem. This driver does not use
> > > >   netif_carrier_on()/netif_carrier_off() to update its link
> > > > status.
> > > >   This confuses ledtrig_netdev which uses netif_carrier_ok() to
> > > > obtain
> > > >   the link status.
> > > > 
> > > > My solution:
> > > > 
> > > >   As a solution (or workaround?) I would try:
> > > > 
> > > >   1) In drivers/net/usb/qmi_wwan.c, lines 904/913: Add the flag
> > > >      FLAG_LINK_INTR.
> > > > 
> > > >   2) In drivers/net/usb/usbnet.c, functions usbnet_open() and
> > > >      usbnet_stop(): Add a call to netif_carrier_*(),
> > > >      but only if FLAG_LINK_INTR is set.
> > > > 
> > > > Question:
> > > > 
> > > >   Is this the intended way to use FLAG_LINK_INTR and
> > > > netif_carrier_*()?
> > > >   Or is there another recommended way to obtain the link status
> > > > of
> > > >   network devices (I could change ledtrig_netdev)?
> > > 
> > > Hi Tanjeff
> > > 
> > > With Ethernet, having a carrier means there is a link partner,
> > > the
> > > layer 2 of the OSI 7 layer stack model is working. If the
> > > interface
> > > is
> > > not open()ed, it clearly should not have carrier. However, just
> > > because it is open, does not mean it has carrier. The cable could
> > > be
> > > unplugged, etc.
> > > 
> > > This is an LTE modem. What does carrier mean here? I don't know
> > > if it
> > > is well defined, but i would guess it is connected to a base
> > > station
> > > which is offering service. I'm assuming you are interested in
> > > data
> > > here, not wanting to make a 911/999/112/$EMERGENCY_SERVICE call
> > > which
> > > in theory all base stations should accept.
> > > 
> > > Is there a way to get this state information from the hardware?
> > > That
> > > would be the correct way to set the carrier.
> > 
> > There isn't. All the setup that would result in IFF_LOWER_UP (eg
> > ability to pass packets to the cellular network) happens over
> > channels
> > *other* than the ethernet one. eg CDC-WDM, CDC-ACM, CDC-MBIM, AT
> > commands, QMI commands, MBIM commands, etc.
> > 
> > Something in userspace handles the actual IP-level connection setup
> > and
> > once that's done, only then do you really have IFF_LOWER_UP. One
> > way to
> > solve this could be to require userspace connection managers to
> > manage
> > the carrier state of the device, which is possible for some drivers
> > already IIRC.
> 
> So Tanjeff, what is you real use case here? I assume you want to
> control an LED so it is on when the LTE modem is connected? Could you
> export the LED to user space and have a dhclient-enter/exit script
> change
> the state of the LED?

Most of these devices do not use DHCP either, but get the IP through
the control channel (QMI, MBIM, AT commands, etc).  I'd just watch for
an IP address on the interface instead.

Dan


^ permalink raw reply

* Re: [MPTCP] [PATCH net 1/2] mptcp: cache msk on MP_JOIN init_req
From: Mat Martineau @ 2020-06-17 18:13 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: netdev, David S. Miller, Jakub Kicinski, mptcp
In-Reply-To: <3213f22a1fa85217032ba3de482f2209c63f2870.1592388398.git.pabeni@redhat.com>

On Wed, 17 Jun 2020, Paolo Abeni wrote:

> The msk ownership is transferred to the child socket at
> 3rd ack time, so that we avoid more lookups later. If the
> request does not reach the 3rd ack, the MSK reference is
> dropped at request sock release time.
>
> As a side effect, fallback is now tracked by a NULL msk
> reference instead of zeroed 'mp_join' field. This will
> simplify the next patch.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/protocol.h |  1 +
> net/mptcp/subflow.c  | 39 +++++++++++++++++----------------------
> 2 files changed, 18 insertions(+), 22 deletions(-)
>

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>

--
Mat Martineau
Intel

^ permalink raw reply

* Re: [MPTCP] [PATCH net 2/2] mptcp: drop MP_JOIN request sock on syn cookies
From: Mat Martineau @ 2020-06-17 18:14 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: netdev, David S. Miller, Jakub Kicinski, mptcp
In-Reply-To: <3aff0acec0b89a55fdae02194036f34be56b545c.1592388398.git.pabeni@redhat.com>

On Wed, 17 Jun 2020, Paolo Abeni wrote:

> Currently any MPTCP socket using syn cookies will fallback to
> TCP at 3rd ack time. In case of MP_JOIN requests, the RFC mandate
> closing the child and sockets, but the existing error paths
> do not handle the syncookie scenario correctly.
>
> Address the issue always forcing the child shutdown in case of
> MP_JOIN fallback.
>
> Fixes: ae2dd7164943 ("mptcp: handle tcp fallback when using syn cookies")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/subflow.c | 18 ++++++++++--------
> 1 file changed, 10 insertions(+), 8 deletions(-)

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>

--
Mat Martineau
Intel

^ permalink raw reply

* [PATCH net-next v7 0/6] RGMII Internal delay common property
From: Dan Murphy @ 2020-06-17 18:20 UTC (permalink / raw)
  To: andrew, f.fainelli, hkallweit1, davem, robh
  Cc: netdev, linux-kernel, devicetree, Dan Murphy

Hello

The RGMII internal delay is a common setting found in most RGMII capable PHY
devices.  It was found that many vendor specific device tree properties exist
to do the same function. This creates a common property to be used for PHY's
that have internal delays for the Rx and Tx paths.

If the internal delay is tunable then the caller needs to pass the internal
delay array and the return will be the index in the array that was found in
the firmware node.

If the internal delay is fixed then the caller only needs to indicate which
delay to return and if the value is defined then the value in the firmware is
returned.

This series contains examples of both a configurable delay and a fixed delay.

Dan Murphy (6):
  dt-bindings: net: Add tx and rx internal delays
  net: phy: Add a helper to return the index for of the internal delay
  dt-bindings: net: Add RGMII internal delay for DP83869
  net: dp83869: Add RGMII internal delay configuration
  dt-bindings: net: dp83822: Add TI dp83822 phy
  net: phy: DP83822: Add ability to advertise Fiber connection

 .../devicetree/bindings/net/ethernet-phy.yaml |  11 ++
 .../devicetree/bindings/net/ti,dp83822.yaml   |  51 +++++++++
 .../devicetree/bindings/net/ti,dp83869.yaml   |  16 ++-
 drivers/net/phy/dp83822.c                     | 108 ++++++++++++++++--
 drivers/net/phy/dp83869.c                     |  53 ++++++++-
 drivers/net/phy/phy_device.c                  |  68 +++++++++++
 include/linux/phy.h                           |   4 +
 7 files changed, 299 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/ti,dp83822.yaml

-- 
2.26.2

^ permalink raw reply

* [PATCH net-next v7 3/6] dt-bindings: net: Add RGMII internal delay for DP83869
From: Dan Murphy @ 2020-06-17 18:20 UTC (permalink / raw)
  To: andrew, f.fainelli, hkallweit1, davem, robh
  Cc: netdev, linux-kernel, devicetree, Dan Murphy
In-Reply-To: <20200617182019.6790-1-dmurphy@ti.com>

Add the internal delay values into the header and update the binding
with the internal delay properties.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
---
 .../devicetree/bindings/net/ti,dp83869.yaml      | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
index 5b69ef03bbf7..71e90a3e4652 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83869.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
@@ -8,7 +8,7 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 title: TI DP83869 ethernet PHY
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: "ethernet-phy.yaml#"
 
 maintainers:
   - Dan Murphy <dmurphy@ti.com>
@@ -64,6 +64,18 @@ properties:
        Operational mode for the PHY.  If this is not set then the operational
        mode is set by the straps. see dt-bindings/net/ti-dp83869.h for values
 
+  rx-internal-delay-ps:
+    description: Delay is in pico seconds
+    enum: [ 250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000,
+            3250, 3500, 3750, 4000 ]
+    default: 2000
+
+  tx-internal-delay-ps:
+    description: Delay is in pico seconds
+    enum: [ 250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000,
+            3250, 3500, 3750, 4000 ]
+    default: 2000
+
 required:
   - reg
 
@@ -80,5 +92,7 @@ examples:
         ti,op-mode = <DP83869_RGMII_COPPER_ETHERNET>;
         ti,max-output-impedance = "true";
         ti,clk-output-sel = <DP83869_CLK_O_SEL_CHN_A_RCLK>;
+        rx-internal-delay-ps = <2000>;
+        tx-internal-delay-ps = <2000>;
       };
     };
-- 
2.26.2


^ permalink raw reply related

* [PATCH net-next v7 4/6] net: dp83869: Add RGMII internal delay configuration
From: Dan Murphy @ 2020-06-17 18:20 UTC (permalink / raw)
  To: andrew, f.fainelli, hkallweit1, davem, robh
  Cc: netdev, linux-kernel, devicetree, Dan Murphy
In-Reply-To: <20200617182019.6790-1-dmurphy@ti.com>

Add RGMII internal delay configuration for Rx and Tx.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
---
 drivers/net/phy/dp83869.c | 53 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
index 53ed3abc26c9..2c651bcc440d 100644
--- a/drivers/net/phy/dp83869.c
+++ b/drivers/net/phy/dp83869.c
@@ -64,6 +64,10 @@
 #define DP83869_RGMII_TX_CLK_DELAY_EN		BIT(1)
 #define DP83869_RGMII_RX_CLK_DELAY_EN		BIT(0)
 
+/* RGMIIDCTL */
+#define DP83869_RGMII_CLK_DELAY_SHIFT		4
+#define DP83869_CLK_DELAY_DEF			7
+
 /* STRAP_STS1 bits */
 #define DP83869_STRAP_OP_MODE_MASK		GENMASK(2, 0)
 #define DP83869_STRAP_STS1_RESERVED		BIT(11)
@@ -78,9 +82,6 @@
 #define DP83869_PHYCR_FIFO_DEPTH_MASK	GENMASK(15, 12)
 #define DP83869_PHYCR_RESERVED_MASK	BIT(11)
 
-/* RGMIIDCTL bits */
-#define DP83869_RGMII_TX_CLK_DELAY_SHIFT	4
-
 /* IO_MUX_CFG bits */
 #define DP83869_IO_MUX_CFG_IO_IMPEDANCE_CTRL	0x1f
 
@@ -99,6 +100,10 @@
 #define DP83869_OP_MODE_MII			BIT(5)
 #define DP83869_SGMII_RGMII_BRIDGE		BIT(6)
 
+static const int dp83869_internal_delay[] = {250, 500, 750, 1000, 1250, 1500,
+					     1750, 2000, 2250, 2500, 2750, 3000,
+					     3250, 3500, 3750, 4000};
+
 enum {
 	DP83869_PORT_MIRRORING_KEEP,
 	DP83869_PORT_MIRRORING_EN,
@@ -108,6 +113,8 @@ enum {
 struct dp83869_private {
 	int tx_fifo_depth;
 	int rx_fifo_depth;
+	s32 rx_int_delay;
+	s32 tx_int_delay;
 	int io_impedance;
 	int port_mirroring;
 	bool rxctrl_strap_quirk;
@@ -182,6 +189,7 @@ static int dp83869_of_init(struct phy_device *phydev)
 	struct dp83869_private *dp83869 = phydev->priv;
 	struct device *dev = &phydev->mdio.dev;
 	struct device_node *of_node = dev->of_node;
+	int delay_size = ARRAY_SIZE(dp83869_internal_delay);
 	int ret;
 
 	if (!of_node)
@@ -235,6 +243,20 @@ static int dp83869_of_init(struct phy_device *phydev)
 				 &dp83869->tx_fifo_depth))
 		dp83869->tx_fifo_depth = DP83869_PHYCR_FIFO_DEPTH_4_B_NIB;
 
+	dp83869->rx_int_delay = phy_get_internal_delay(phydev, dev,
+						     &dp83869_internal_delay[0],
+						      delay_size, true);
+	if (dp83869->rx_int_delay < 0)
+		dp83869->rx_int_delay =
+				dp83869_internal_delay[DP83869_CLK_DELAY_DEF];
+
+	dp83869->tx_int_delay = phy_get_internal_delay(phydev, dev,
+						     &dp83869_internal_delay[0],
+						      delay_size, false);
+	if (dp83869->tx_int_delay < 0)
+		dp83869->tx_int_delay =
+				dp83869_internal_delay[DP83869_CLK_DELAY_DEF];
+
 	return ret;
 }
 #else
@@ -397,6 +419,31 @@ static int dp83869_config_init(struct phy_device *phydev)
 				     dp83869->clk_output_sel <<
 				     DP83869_IO_MUX_CFG_CLK_O_SEL_SHIFT);
 
+	if (phy_interface_is_rgmii(phydev)) {
+		ret = phy_write_mmd(phydev, DP83869_DEVADDR, DP83869_RGMIIDCTL,
+				    dp83869->rx_int_delay |
+			dp83869->tx_int_delay << DP83869_RGMII_CLK_DELAY_SHIFT);
+		if (ret)
+			return ret;
+
+		val = phy_read_mmd(phydev, DP83869_DEVADDR, DP83869_RGMIICTL);
+		val &= ~(DP83869_RGMII_TX_CLK_DELAY_EN |
+			 DP83869_RGMII_RX_CLK_DELAY_EN);
+
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+			val |= (DP83869_RGMII_TX_CLK_DELAY_EN |
+				DP83869_RGMII_RX_CLK_DELAY_EN);
+
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+			val |= DP83869_RGMII_TX_CLK_DELAY_EN;
+
+		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+			val |= DP83869_RGMII_RX_CLK_DELAY_EN;
+
+		ret = phy_write_mmd(phydev, DP83869_DEVADDR, DP83869_RGMIICTL,
+				    val);
+	}
+
 	return ret;
 }
 
-- 
2.26.2


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox