Linux ACPI

Linux ACPI
 help / color / mirror / Atom feed

* [PATCH v6 1/4] PCI: Remove pci_get_legacy_ide_irq and asm-generic/pci.h
From: Stafford Horne @ 2022-07-22 21:49 UTC (permalink / raw)
  To: LKML
  Cc: Arnd Bergmann, Stafford Horne, Geert Uytterhoeven, Pierre Morel,
	Rafael J . Wysocki, Christoph Hellwig, Richard Henderson,
	Ivan Kokshaysky, Matt Turner, Russell King, Catalin Marinas,
	Will Deacon, Guo Ren, Thomas Bogendoerfer, James E.J. Bottomley,
	Helge Deller, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
	Christian Borntraeger, Sven Schnelle, Yoshinori Sato, Rich Felker,
	David S. Miller, Richard Weinberger, Anton Ivanov, Johannes Berg,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Chris Zankel, Max Filippov, Bjorn Helgaas,
	Sergio Paracuellos, Greg Kroah-Hartman, Tiezhu Yang, Nick Child,
	Niklas Schnelle, Matthew Rosato, Kees Cook, Gustavo A. R. Silva,
	linux-alpha, linux-arm-kernel, linux-csky, linux-ia64, linux-m68k,
	linux-mips, linux-parisc, linuxppc-dev, linux-riscv, linux-s390,
	linux-sh, sparclinux, linux-um, linux-acpi, linux-pci, linux-arch
In-Reply-To: <20220722214944.831438-1-shorne@gmail.com>

The definition of the pci header function pci_get_legacy_ide_irq is only
used in platforms that support PNP.  So many of the architecutres where
it is defined do not use it.  This also means we can remove
asm-generic/pci.h as all it provides is a definition of
pci_get_legacy_ide_irq.

Where referenced, replace the usage of pci_get_legacy_ide_irq with the
libata.h macros ATA_PRIMARY_IRQ and ATA_SECONDARY_IRQ which provide the
same functionality.  This allows removing pci_get_legacy_ide_irq from
headers where it is no longer used.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Pierre Morel <pmorel@linux.ibm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Co-developed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/alpha/include/asm/pci.h   |  6 ------
 arch/arm/include/asm/pci.h     |  5 -----
 arch/arm64/include/asm/pci.h   |  6 ------
 arch/csky/include/asm/pci.h    |  6 ------
 arch/ia64/include/asm/pci.h    |  6 ------
 arch/m68k/include/asm/pci.h    |  2 --
 arch/mips/include/asm/pci.h    |  6 ------
 arch/parisc/include/asm/pci.h  |  5 -----
 arch/powerpc/include/asm/pci.h |  1 -
 arch/riscv/include/asm/pci.h   |  6 ------
 arch/s390/include/asm/pci.h    |  1 -
 arch/sh/include/asm/pci.h      |  6 ------
 arch/sparc/include/asm/pci.h   |  9 ---------
 arch/um/include/asm/pci.h      |  8 --------
 arch/x86/include/asm/pci.h     |  3 ---
 arch/xtensa/include/asm/pci.h  |  3 ---
 drivers/pnp/resource.c         |  5 +++--
 include/asm-generic/pci.h      | 17 -----------------
 18 files changed, 3 insertions(+), 98 deletions(-)
 delete mode 100644 include/asm-generic/pci.h

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index cf6bc1e64d66..6312656279d7 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -56,12 +56,6 @@ struct pci_controller {
 
 /* IOMMU controls.  */
 
-/* TODO: integrate with include/asm-generic/pci.h ? */
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 68e6f25784a4..5916b88d4c94 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -22,11 +22,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
 
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-
 extern void pcibios_report_status(unsigned int status_mask, int warn);
 
 #endif /* __KERNEL__ */
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index b33ca260e3c9..0aebc3488c32 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -23,12 +23,6 @@
 extern int isa_dma_bridge_buggy;
 
 #ifdef CONFIG_PCI
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	/* no legacy IRQ on arm64 */
-	return -ENODEV;
-}
-
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
 	return 1;
diff --git a/arch/csky/include/asm/pci.h b/arch/csky/include/asm/pci.h
index ebc765b1f78b..0535f1aaae38 100644
--- a/arch/csky/include/asm/pci.h
+++ b/arch/csky/include/asm/pci.h
@@ -18,12 +18,6 @@
 extern int isa_dma_bridge_buggy;
 
 #ifdef CONFIG_PCI
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	/* no legacy IRQ on csky */
-	return -ENODEV;
-}
-
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
 	/* always show the domain in /proc */
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 8c163d1d0189..fa8f545c24c9 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -63,10 +63,4 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 	return (pci_domain_nr(bus) != 0);
 }
 
-#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14);
-}
-
 #endif /* _ASM_IA64_PCI_H */
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index 5a4bc223743b..ccdfa0dc8413 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_M68K_PCI_H
 #define _ASM_M68K_PCI_H
 
-#include <asm-generic/pci.h>
-
 #define	pcibios_assign_all_busses()	1
 
 #define	PCIBIOS_MIN_IO		0x00000100
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 9ffc8192adae..3fd6e22c108b 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -139,10 +139,4 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 /* Do platform specific device initialization at pci_enable_device() time */
 extern int pcibios_plat_dev_init(struct pci_dev *dev);
 
-/* Chances are this interrupt is wired PC-style ...  */
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-
 #endif /* _ASM_PCI_H */
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index f14465b84de4..127ed5021ae3 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -162,11 +162,6 @@ extern void pcibios_init_bridge(struct pci_dev *);
 #define PCIBIOS_MIN_IO          0x10
 #define PCIBIOS_MIN_MEM         0x1000 /* NBPG - but pci/setup-res.c dies */
 
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
 
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 915d6ee4b40a..f9da506751bb 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -39,7 +39,6 @@
 #define pcibios_assign_all_busses() \
 	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
 
-#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
 	if (ppc_md.pci_get_legacy_ide_irq)
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 7fd52a30e605..a7b8f0d0df7f 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -23,12 +23,6 @@
 extern int isa_dma_bridge_buggy;
 
 #ifdef CONFIG_PCI
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	/* no legacy IRQ on risc-v */
-	return -ENODEV;
-}
-
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
 	/* always show the domain in /proc */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index fdb9745ee998..5889ddcbc374 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,7 +6,6 @@
 #include <linux/mutex.h>
 #include <linux/iommu.h>
 #include <linux/pci_hotplug.h>
-#include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
 #include <asm/sclp.h>
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index ad22e88c6657..54c30126ea17 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -88,10 +88,4 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 	return hose->need_domain_info;
 }
 
-/* Chances are this interrupt is wired PC-style ...  */
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-
 #endif /* __ASM_SH_PCI_H */
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index 4deddf430e5d..0c58f65bd172 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -40,13 +40,4 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 #define get_pci_unmapped_area get_fb_unmapped_area
 #endif /* CONFIG_SPARC64 */
 
-#if defined(CONFIG_SPARC64) || defined(CONFIG_LEON_PCI)
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return PCI_IRQ_NONE;
-}
-#else
-#include <asm-generic/pci.h>
-#endif
-
 #endif /* ___ASM_SPARC_PCI_H */
diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h
index da13fd5519ef..26b96c02ef61 100644
--- a/arch/um/include/asm/pci.h
+++ b/arch/um/include/asm/pci.h
@@ -11,14 +11,6 @@
 
 extern int isa_dma_bridge_buggy;
 
-#ifdef CONFIG_PCI
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	/* no legacy IRQs */
-	return -ENODEV;
-}
-#endif
-
 #ifdef CONFIG_PCI_DOMAINS
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f3fd5928bcbb..736793d65bcb 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -105,9 +105,6 @@ static inline void early_quirks(void) { }
 
 extern void pci_iommu_alloc(void);
 
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
 static inline int __pcibus_to_node(const struct pci_bus *bus)
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index 8e2b48a268db..b56de9635b6c 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -43,7 +43,4 @@
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
 #define arch_can_pci_mmap_io()		1
 
-/* Generic PCI */
-#include <asm-generic/pci.h>
-
 #endif	/* _XTENSA_PCI_H */
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index 2fa0f7d55259..8f7695624c8c 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -17,6 +17,7 @@
 #include <asm/dma.h>
 #include <asm/irq.h>
 #include <linux/pci.h>
+#include <linux/libata.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 
@@ -322,8 +323,8 @@ static int pci_dev_uses_irq(struct pnp_dev *pnp, struct pci_dev *pci,
 		 * treat the compatibility IRQs as busy.
 		 */
 		if ((progif & 0x5) != 0x5)
-			if (pci_get_legacy_ide_irq(pci, 0) == irq ||
-			    pci_get_legacy_ide_irq(pci, 1) == irq) {
+			if (ATA_PRIMARY_IRQ(pci) == irq ||
+			    ATA_SECONDARY_IRQ(pci) == irq) {
 				pnp_dbg(&pnp->dev, "  legacy IDE device %s "
 					"using irq %d\n", pci_name(pci), irq);
 				return 1;
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
deleted file mode 100644
index 6bb3cd3d695a..000000000000
--- a/include/asm-generic/pci.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/asm-generic/pci.h
- *
- *  Copyright (C) 2003 Russell King
- */
-#ifndef _ASM_GENERIC_PCI_H
-#define _ASM_GENERIC_PCI_H
-
-#ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-#endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */
-
-#endif /* _ASM_GENERIC_PCI_H */
-- 
2.36.1


^ permalink raw reply related

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-22 21:20 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <20220722165600.lldukpdflv7cjp4j@skbuf>

On Fri, Jul 22, 2022 at 07:56:00PM +0300, Vladimir Oltean wrote:
> On Fri, Jul 22, 2022 at 02:16:04PM +0100, Russell King (Oracle) wrote:
> > > So mvneta at the stage of the commit you've mentioned calls
> > > mvneta_set_autoneg() with the value of pp->use_inband_status. There is
> > > then the exception to be made for the PCS being what's exposed to the
> > > medium, and in that case, ethtool may also override the pp->use_inband_status
> > > variable (which in turn affects the autoneg).
> > > 
> > > So if we take mvneta at this commit as the reference, what we learn is
> > > that using in-band status essentially depends on using in-band autoneg
> > > in the first place.
> > > 
> > > What is hard for me to comprehend is how we ever came to conclude that
> > > for SERDES protocols where clause 37 is possible (2500base-x should be
> > > part of this group), managed = "in-band-status" does not imply in-band
> > > autoneg, considering the mvneta precedent.
> > 
> > That is a recent addition, since the argument was made that when using
> > a 1000base-X fibre transceiver, using ethtool to disable autoneg is a
> > reasonable thing to do - and something that was supported with
> > mvneta_ethtool_set_link_ksettings() as it stands at the point in the
> > commit above.
> 
> I'm sorry, I don't understand. What is the recent addition, and recent
> relative to what? The 2500base-x link mode? Ok, but this is only
> tangentially related to the point overall, more below.

I'm talking about how we handle 1000base-X autoneg - specifically this
commit:

92817dad7dcb net: phylink: Support disabling autonegotiation for PCS

where we can be in 1000base-X with managed = "in-band-status" but we
have autoneg disabled. I thought that is what you were referring to.

As for 2500base-X, I had been raising the issue of AN in the past, for
example (have I said it's really difficult to find old emails even with
google?):

https://lwn.net/ml/netdev/20200618140623.GC1551@shell.armlinux.org.uk/

and eventually I stopped caring about it, as it became pointless to
raise it anymore when we had an established mixture of behaviours. This
is why we have ended up with PCS drivers configuring for no AN for a
firmware description of:

	managed = "in-band-status";
	phy-mode = "2500base-x";

and hence now have unclean semantics for this - some such as mvneta
and mvpp2 will have AN enabled. Others such as pcs-lynx will not.
However, both will request link status from the PCS side and use that
to determine whether the link is up, and use the parameters that the
PCS code returns for the link. Since 2500base-X can only operate at
2.5G, PCS code always reports SPEED_2500, and as half duplex is
virtually never supported above 1G, DUPLEX_FULL.

> > > And why would we essentially redefine its meaning by stating that no,
> > > it is only about the status, not about the autoneg, even though the
> > > status comes from the autoneg for these protocols.
> > 
> > I'm not sure I understand what you're getting at there.
> 
> Sorry if I haven't made my point clear.
> 
> My point is that drivers may have more restrictive interpretations of
> managed = "in-band-status", and the current logic of automatically
> create a fixed-link for DSA's CPU ports is going to cause problems when
> matched up with a DSA master that expects in-band autoneg for whatever
> SERDES protocol.
> 
> What I'd like to happen as a result is that no DSA driver except Marvell
> opts into this by default, and no driver opts into it without its maintainer
> understanding the implications. Otherwise we're going to normalize the
> expectation that a managed = "in-band-status" DSA master should be able
> to interoperate with a fixed-link CPU port, but during this discussion
> there was no argument being brought that a strict interpretation of
> "in-band-status" as "enable autoneg" is incorrect in any way.

I still don't understand your point - because you seem to be conflating
two different things (at least as I understand it.)

We have this:

		port@N {
			reg = <N>;
			label = "cpu";
			ethernet = <&ethX>;
		};

This specifies that the port operates at whatever interface mode and
settings gives the maximum speed. There is no mention of a "managed"
property, and therefore (Andrew, correct me if I'm wrong) in-band
negotiation is not expected to be used.

The configuration of the ethX parameters on the other end of the link
are up to the system integrator to get right, and the actual behaviour
would depend on the ethernet driver. As I've said in previous emails,
there is such a thing as "AN bypass" that can be implemented, and it
can default to be enabled, and drivers can ignore that such a bit even
exists. So, it's possible that even with "managed" set to
"in-band-status" in DT, a link to such a DSA switch will still come up
even though we've requested in DT for AN to be used.

If an ethernet driver is implemented to strictly require in-band AN in
this case, then the link won't come up, and the system integrator would
have to debug the problem.

I think this is actually true on Clearfog - if one specifies the CPU
port as I have above, and requests in-band on the host ethernet, then
the link doesn't come up, because mvneta turns off AN bypass.

> > Going back to the mvneta combined PCS+MAC implementation, we read the
> > link parameters from the PCS when operating in in-band mode and throw
> > them at the fixed PHY so that ethtool works, along with all the usual
> > link up/down state reporting, carrier etc.
> > 
> > If autoneg is disabled, then we effectively operate in fixed-link mode
> > (use_inband_status becomes false, and we start forcing the link up/down
> > and also force the speed and duplex parameters by disabling autoneg.)
> > 
> > Note that this version of mvneta does not support 1000base-X mode, only
> > SGMII is actually supported.
> > 
> > There's a few things that are rather confusing in the driver:
> > 
> > MVNETA_GMAC_INBAND_AN_ENABLE - this controls whether in-band negotiation
> > is performed or not.
> > MVNETA_GMAC_AN_SPEED_EN - this controls whether the result of in-band
> > negotiation for speed is used, or the manually programmed speed in this
> > register.
> > MVNETA_GMAC_AN_DUPLEX_EN - same for duplex.
> > MVNETA_GMAC_AN_FLOW_CTRL_EN - same for pause (only symmetric pause is
> > supported)
> > 
> > MVNETA_GMAC2_INBAND_AN_ENABLE - misnamed, it selects whether SGMII (set)
> > or 1000base-X (unset) format for the 16-bit control word is used.
> > 
> > There is another bit in MVNETA_GMAC_CTRL_0 that selects between
> > 1000base-X and SGMII operation mode, and when this bit is set for
> > 1000base-X. This version of the driver doesn't support 1000base-X,
> > so this bit is never set.
> 
> Thanks for this explanation, if nothing else, it seems to support the
> way in which I was interpreting managed = "in-band-status" to mean
> "enable in-band autoneg", but to be clear, I wasn't debating something
> about the way in which mvneta was doing things. But rather, I was
> debating why would *other* drivers do things differently such as to come
> to expect that a fixed-link master + an in-band-status CPU port, or the
> other way around, may be compatible with each other.

Please note that phylink makes a DT specification including both a
fixed-link descriptor and a managed in-band-status property illegal
because these are two different modes of operating the link, and they
conflict with each other.

The fact that the of_fixed_link_whateveritwas() function (sorry I can't
remember the name) returns true for both indicating that they're both
fixed-link is a historic artifact that has not been changed. As the
fixed-PHY code supporting that way was dropped, I suppose that should
have been cleaned up at some point, but I never got around to it
(remember, development in this space is a very slow process.) There
were always more pressing matters to be dealt with.

Maybe we should now make of_fixed_link_whateveritwas() no longer return
true, and introduce of_managed_in_band() or something like that which
drivers can test that separately. I'm not sure it's worth the driver
churn to make such a change, I'm not sure what the benefit would be.

> Anyway, before I comment any further on the other points, I have a board
> using armada-3720-turris-mox.dts on which I wanted to make a test, but I
> don't fully understand the results, could you help me do so?
> 
> By default, both the mvneta master and my 6390 top-most switch are
> configured for inband/2500base-x. In essence I'm perfectly fine with
> that. I don't care whether IEEE standardized inband/2500base-x, as long
> as both drivers come to expect to enable or disable inband depending on
> the device tree.
> 
> I've dumped the variables from mvneta_pcs_get_state() and it appears
> that the mvneta is reporting AN complete. This would suggest that there
> is indeed in-band autoneg taking place with the 6390 switch.
> 
> Then I modified the device tree to disable in-band autoneg (I've checked
> mv88e6390_serdes_pcs_config and it behaves how I'd expect, enabling
> BMCR_ANENABLE strictly according to phylink_autoneg_inband(mode)).
> Essentially what I'm trying is to intentionally break in-band autoneg by
> causing a mismatch, to prove that it is indeed taking place.
> 
> The results are interesting: state->an_complete is still reported as 1
> for eth1 (mvneta).
> 
> ip link set eth1 up
> [   70.809889] mvneta d0040000.ethernet eth1: configuring for inband/2500base-x link mode
> [   70.818086] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b4c state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
> [   70.836081] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b4c state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
> [   70.843748] mv88e6085 d0032004.mdio-mii:10: Link is Down
> [   70.859944] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_ADVERTISE 0xa0 adv 0x80 changed 1
> [   70.878737] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_BMCR 0x140 bmcr 0x140 phylink_autoneg_inband(mode) 0
> [   70.898302] mv88e6085 d0032004.mdio-mii:10: Link is Up - 2.5Gbps/Full - flow control off
> [   71.069532] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b4c state->an_complete 1 state->speed 2500 state->pause 0x3 state->link 1 state->duplex 1
> [   71.083376] mvneta d0040000.ethernet eth1: Link is Up - 2.5Gbps/Full - flow control rx/tx
> [   71.091672] IPv6: ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready
> 
> Then I studied MVNETA_GMAC_AUTONEG_CONFIG and I noticed that the bit
> you're talking about, MVNETA_GMAC_AN_BYPASS_ENABLE (bit 3) is indeed set
> by default (the driver doesn't set it).

Correct - because of history, and changing it could break setups that
have been working since before DT. The driver has never changed the
bypass bit, so playing with that when phylink was introduced risked
regressions.

> I've intentionally masked it off in mvneta_pcs_config() by setting it in
> the "mask" variable and nowhere else. Now I get:
> 
> ip link set eth1 up
> [  434.336679] mvneta d0040000.ethernet eth1: configuring for inband/2500base-x link mode
> [  434.342618] mv88e6085 d0032004.mdio-mii:10: Link is Down
> [  434.350020] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b44 state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
> [  434.350055] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b44 state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
> [  434.384794] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_ADVERTISE 0xa0 adv 0x80 changed 1
> [  434.403808] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_BMCR 0x140 bmcr 0x140 phylink_autoneg_inband(mode) 0
> [  434.423732] mv88e6085 d0032004.mdio-mii:10: Link is Up - 2.5Gbps/Full - flow control off
> 
> so state->an_complete now remains zero, and the link is down on the CPU
> port, and indeed I can no longer ping the board from the outside world.

We can see that the DSA switch thinks the link came up, but mvneta
didn't because autoneg never completed. We can see from the
MV88E6390_SGMII_BMCR value that AN is definitely disabled. So we
positively have one end of the link configured for no AN (a fixed
link) and the other side configured for AN.

That is expected with a strict and correct implementation. As you've
spotted, mvneta is not a strict implementation, because it defaults
to allowing bypass mode, which allows the dissimilar configuration
due to history.

However, what you will find is that you will get the same results
with or without these patches applied - because the DSA switch will
default to not using in-band AN on its CPU port, and that isn't
changed when the DSA port is brought up without phylink.

> So what this is telling me is that mvneta has some built-in resilience
> to in-band autoneg mismatches, via MVNETA_GMAC_AN_BYPASS_ENABLE. But that
> (a) doesn't make it valid to mix and match a fixed-link with a managed =
>     "in-band-status" mode
> (b) doesn't mean it's unspecified whether managed = "in-band-status"
>     should dictate whether to enable in-band autoneg or not
> (c) doesn't mean that other devices/drivers support "AN bypass" to save
>     the day and make an invalid DT description appear to work just fine
> 
> This further supports my idea that we should make a better attempt of
> matching the DSA master's mode with the node we're faking in DSA for
> phylink. For Marvell hardware you or Andrew are surely more knowledgeable
> to be able to say whether that's needed right now or not. But in the
> general case please don't push this to everyone, it just muddies the
> waters.

I really don't get this.

For a mv88e6xxx port which supports 1000base-X, with these patches
applied, then these are all effectively equivalent:

		port@N {
			reg = <N>;
			label = "cpu";
			ethernet = <&ethX>;
		};

		port@N {
			reg = <N>;
			label = "cpu";
			ethernet = <&ethX>;
			phy-mode = "1000base-x";
		};

		port@N {
			reg = <N>;
			label = "cpu";
			ethernet = <&ethX>;
			fixed-link {
				speed = <1000>;
				full-duplex;
			};
		};

		port@N {
			reg = <N>;
			label = "cpu";
			ethernet = <&ethX>;
			phy-mode = "1000base-x";
			fixed-link {
				speed = <1000>;
				full-duplex;
			};
		};

and all _should_ lead to inband AN being disabled.

That is my understanding of Andrew's statements on the defaulting
parameters for both the inter-switch and CPU ports. (Maybe Andrew can
clarify whether this is correct or not.)

However, this would not equivalent to any of the above:

		port@N {
			reg = <N>;
			label = "cpu";
			ethernet = <&ethX>;
			managed = "in-band-status";
		};

The reason this is not equivalent is - as you've recently spotted -
of_phy_is_fixed_link() will return true, and therefore phylink gets
passed the above node to work with - and we do not generate a swnode
fixed-link stanza for it. The behaviour in this case is completely
unaffected by these patches.

If a DSA driver defaults to AN enabled on the DSA/CPU ports, and makes
use of the defaulting firmware description, then this will break with
these patches, since we setup a fixed-link specifier that states that
no AN should be used. That's why I've been trying to get these tested,
and that's why there's a risk with them. However, that's got nothing
to do with whether the driver implements filling in this new
"default_interface" field.

We could go delving into the node pointed to by the phandle and retrieve
whatever parameters from there, but that is an entirely new behaviour
and would be a functional change to the behaviour that Andrew has been
promoting - and is itself not free of a risk of regressions caused by
that approach. What if there's an interface converter in the path between
the# CPU ethernet device and the DSA that hasn't needed to be described?
Digging out the phy-mode from the CPU side could be the wrong thing to
do.

Then there's the question whether DSA should have been validating that
the description on both ends of the link are compatible with each other.
The problem with that is just the same as the above - an undescribed
interface converter would make such validation problematical.

So, I don't think we could rely on the description on the other end of
the link to be capable of describing the setup on the DSA port end.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [GIT PULL] ACPI fix for v5.19-rc8
From: pr-tracker-bot @ 2022-07-22 20:07 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Linus Torvalds, ACPI Devel Maling List, Linux PM,
	Linux Kernel Mailing List
In-Reply-To: <CAJZ5v0guJm8F0myVa2DG3mxkStPQ-+vzuY6Gob2hk3Jk=guWCQ@mail.gmail.com>

The pull request you sent on Fri, 22 Jul 2022 20:17:13 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git acpi-5.19-rc8

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/ae21fbac18b980ecfd895ff32833a2543c157ee2

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply

* [GIT PULL] ACPI fix for v5.19-rc8
From: Rafael J. Wysocki @ 2022-07-22 18:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: ACPI Devel Maling List, Linux PM, Linux Kernel Mailing List

Hi Linus,

Please pull from the tag

 git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
 acpi-5.19-rc8

with top-most commit 09073396ea62d0a10b03f5661dcabfd8eca3f098

 ACPI: CPPC: Don't require flexible address space if X86_FEATURE_CPPC
is supported

on top of commit ff6992735ade75aae3e35d16b17da1008d753d28

 Linux 5.19-rc7

to receive an ACPI fix for 5.19-rc8.

This fixes yet another piece of ACPI CPPC changes fallout on AMD
platforms (Mario Limonciello).

Thanks!

---------------

Mario Limonciello (1):
      ACPI: CPPC: Don't require flexible address space if
X86_FEATURE_CPPC is supported

---------------

 drivers/acpi/cppc_acpi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

^ permalink raw reply

* Re: [PATCH] PCI/ACPI: Update link to PCI firmware specification
From: Bjorn Helgaas @ 2022-07-22 18:04 UTC (permalink / raw)
  To: Mario Limonciello
  Cc: Rafael J. Wysocki, Len Brown, Bjorn Helgaas, linux-acpi,
	linux-pci, linux-kernel
In-Reply-To: <20220722174754.27921-1-mario.limonciello@amd.com>

On Fri, Jul 22, 2022 at 12:47:54PM -0500, Mario Limonciello wrote:
> The previous link to the PCI firmware specification in the comments
> for drivers/pci/pci-acpi.c no longer works.  Update the comment
> to a current link to this specification.
> 
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>

Applied to pci/misc for v5.20, thanks, Mario!

> ---
>  drivers/pci/pci-acpi.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
> index 3760d85c10d2..a46fec776ad7 100644
> --- a/drivers/pci/pci-acpi.c
> +++ b/drivers/pci/pci-acpi.c
> @@ -21,8 +21,9 @@
>  #include "pci.h"
>  
>  /*
> - * The GUID is defined in the PCI Firmware Specification available here:
> - * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
> + * The GUID is defined in the PCI Firmware Specification available
> + * here to PCI-SIG members:
> + * https://members.pcisig.com/wg/PCI-SIG/document/15350
>   */
>  const guid_t pci_acpi_dsm_guid =
>  	GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
> -- 
> 2.34.1
> 

^ permalink raw reply

* [PATCH] PCI/ACPI: Update link to PCI firmware specification
From: Mario Limonciello @ 2022-07-22 17:47 UTC (permalink / raw)
  To: mario.limonciello, Rafael J. Wysocki, Len Brown, Bjorn Helgaas
  Cc: linux-acpi, linux-pci, linux-kernel

The previous link to the PCI firmware specification in the comments
for drivers/pci/pci-acpi.c no longer works.  Update the comment
to a current link to this specification.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 drivers/pci/pci-acpi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 3760d85c10d2..a46fec776ad7 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -21,8 +21,9 @@
 #include "pci.h"

 /*
- * The GUID is defined in the PCI Firmware Specification available here:
- * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
+ * The GUID is defined in the PCI Firmware Specification available
+ * here to PCI-SIG members:
+ * https://members.pcisig.com/wg/PCI-SIG/document/15350
  */
 const guid_t pci_acpi_dsm_guid =
 	GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
-- 
2.34.1

^ permalink raw reply related

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Vladimir Oltean @ 2022-07-22 16:56 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <YtqjFKUTsH4CK0L+@shell.armlinux.org.uk>

On Fri, Jul 22, 2022 at 02:16:04PM +0100, Russell King (Oracle) wrote:
> > So mvneta at the stage of the commit you've mentioned calls
> > mvneta_set_autoneg() with the value of pp->use_inband_status. There is
> > then the exception to be made for the PCS being what's exposed to the
> > medium, and in that case, ethtool may also override the pp->use_inband_status
> > variable (which in turn affects the autoneg).
> > 
> > So if we take mvneta at this commit as the reference, what we learn is
> > that using in-band status essentially depends on using in-band autoneg
> > in the first place.
> > 
> > What is hard for me to comprehend is how we ever came to conclude that
> > for SERDES protocols where clause 37 is possible (2500base-x should be
> > part of this group), managed = "in-band-status" does not imply in-band
> > autoneg, considering the mvneta precedent.
> 
> That is a recent addition, since the argument was made that when using
> a 1000base-X fibre transceiver, using ethtool to disable autoneg is a
> reasonable thing to do - and something that was supported with
> mvneta_ethtool_set_link_ksettings() as it stands at the point in the
> commit above.

I'm sorry, I don't understand. What is the recent addition, and recent
relative to what? The 2500base-x link mode? Ok, but this is only
tangentially related to the point overall, more below.

> > And why would we essentially redefine its meaning by stating that no,
> > it is only about the status, not about the autoneg, even though the
> > status comes from the autoneg for these protocols.
> 
> I'm not sure I understand what you're getting at there.

Sorry if I haven't made my point clear.

My point is that drivers may have more restrictive interpretations of
managed = "in-band-status", and the current logic of automatically
create a fixed-link for DSA's CPU ports is going to cause problems when
matched up with a DSA master that expects in-band autoneg for whatever
SERDES protocol.

What I'd like to happen as a result is that no DSA driver except Marvell
opts into this by default, and no driver opts into it without its maintainer
understanding the implications. Otherwise we're going to normalize the
expectation that a managed = "in-band-status" DSA master should be able
to interoperate with a fixed-link CPU port, but during this discussion
there was no argument being brought that a strict interpretation of
"in-band-status" as "enable autoneg" is incorrect in any way.

> Going back to the mvneta combined PCS+MAC implementation, we read the
> link parameters from the PCS when operating in in-band mode and throw
> them at the fixed PHY so that ethtool works, along with all the usual
> link up/down state reporting, carrier etc.
> 
> If autoneg is disabled, then we effectively operate in fixed-link mode
> (use_inband_status becomes false, and we start forcing the link up/down
> and also force the speed and duplex parameters by disabling autoneg.)
> 
> Note that this version of mvneta does not support 1000base-X mode, only
> SGMII is actually supported.
> 
> There's a few things that are rather confusing in the driver:
> 
> MVNETA_GMAC_INBAND_AN_ENABLE - this controls whether in-band negotiation
> is performed or not.
> MVNETA_GMAC_AN_SPEED_EN - this controls whether the result of in-band
> negotiation for speed is used, or the manually programmed speed in this
> register.
> MVNETA_GMAC_AN_DUPLEX_EN - same for duplex.
> MVNETA_GMAC_AN_FLOW_CTRL_EN - same for pause (only symmetric pause is
> supported)
> 
> MVNETA_GMAC2_INBAND_AN_ENABLE - misnamed, it selects whether SGMII (set)
> or 1000base-X (unset) format for the 16-bit control word is used.
> 
> There is another bit in MVNETA_GMAC_CTRL_0 that selects between
> 1000base-X and SGMII operation mode, and when this bit is set for
> 1000base-X. This version of the driver doesn't support 1000base-X,
> so this bit is never set.

Thanks for this explanation, if nothing else, it seems to support the
way in which I was interpreting managed = "in-band-status" to mean
"enable in-band autoneg", but to be clear, I wasn't debating something
about the way in which mvneta was doing things. But rather, I was
debating why would *other* drivers do things differently such as to come
to expect that a fixed-link master + an in-band-status CPU port, or the
other way around, may be compatible with each other.

Anyway, before I comment any further on the other points, I have a board
using armada-3720-turris-mox.dts on which I wanted to make a test, but I
don't fully understand the results, could you help me do so?

By default, both the mvneta master and my 6390 top-most switch are
configured for inband/2500base-x. In essence I'm perfectly fine with
that. I don't care whether IEEE standardized inband/2500base-x, as long
as both drivers come to expect to enable or disable inband depending on
the device tree.

I've dumped the variables from mvneta_pcs_get_state() and it appears
that the mvneta is reporting AN complete. This would suggest that there
is indeed in-band autoneg taking place with the 6390 switch.

Then I modified the device tree to disable in-band autoneg (I've checked
mv88e6390_serdes_pcs_config and it behaves how I'd expect, enabling
BMCR_ANENABLE strictly according to phylink_autoneg_inband(mode)).
Essentially what I'm trying is to intentionally break in-band autoneg by
causing a mismatch, to prove that it is indeed taking place.

The results are interesting: state->an_complete is still reported as 1
for eth1 (mvneta).

ip link set eth1 up
[   70.809889] mvneta d0040000.ethernet eth1: configuring for inband/2500base-x link mode
[   70.818086] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b4c state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
[   70.836081] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b4c state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
[   70.843748] mv88e6085 d0032004.mdio-mii:10: Link is Down
[   70.859944] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_ADVERTISE 0xa0 adv 0x80 changed 1
[   70.878737] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_BMCR 0x140 bmcr 0x140 phylink_autoneg_inband(mode) 0
[   70.898302] mv88e6085 d0032004.mdio-mii:10: Link is Up - 2.5Gbps/Full - flow control off
[   71.069532] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b4c state->an_complete 1 state->speed 2500 state->pause 0x3 state->link 1 state->duplex 1
[   71.083376] mvneta d0040000.ethernet eth1: Link is Up - 2.5Gbps/Full - flow control rx/tx
[   71.091672] IPv6: ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready

Then I studied MVNETA_GMAC_AUTONEG_CONFIG and I noticed that the bit
you're talking about, MVNETA_GMAC_AN_BYPASS_ENABLE (bit 3) is indeed set
by default (the driver doesn't set it).

I've intentionally masked it off in mvneta_pcs_config() by setting it in
the "mask" variable and nowhere else. Now I get:

ip link set eth1 up
[  434.336679] mvneta d0040000.ethernet eth1: configuring for inband/2500base-x link mode
[  434.342618] mv88e6085 d0032004.mdio-mii:10: Link is Down
[  434.350020] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b44 state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
[  434.350055] mvneta_pcs_get_state: MVNETA_GMAC_AUTONEG_CONFIG 0x9b44 state->an_complete 0 state->speed 2500 state->pause 0x3 state->link 0 state->duplex 1
[  434.384794] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_ADVERTISE 0xa0 adv 0x80 changed 1
[  434.403808] mv88e6085 d0032004.mdio-mii:10: mv88e6390_serdes_pcs_config: port 9 MV88E6390_SGMII_BMCR 0x140 bmcr 0x140 phylink_autoneg_inband(mode) 0
[  434.423732] mv88e6085 d0032004.mdio-mii:10: Link is Up - 2.5Gbps/Full - flow control off

so state->an_complete now remains zero, and the link is down on the CPU
port, and indeed I can no longer ping the board from the outside world.

So what this is telling me is that mvneta has some built-in resilience
to in-band autoneg mismatches, via MVNETA_GMAC_AN_BYPASS_ENABLE. But that
(a) doesn't make it valid to mix and match a fixed-link with a managed =
    "in-band-status" mode
(b) doesn't mean it's unspecified whether managed = "in-band-status"
    should dictate whether to enable in-band autoneg or not
(c) doesn't mean that other devices/drivers support "AN bypass" to save
    the day and make an invalid DT description appear to work just fine

This further supports my idea that we should make a better attempt of
matching the DSA master's mode with the node we're faking in DSA for
phylink. For Marvell hardware you or Andrew are surely more knowledgeable
to be able to say whether that's needed right now or not. But in the
general case please don't push this to everyone, it just muddies the
waters.

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-22 12:14 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <YtqNkSDLRDtuooy/@shell.armlinux.org.uk>

On Fri, Jul 22, 2022 at 12:44:17PM +0100, Russell King (Oracle) wrote:
> Given that this managed property was introduced for mvneta, mvneta's
> implementation of it is the best reference we have to work out what
> the intentions of it were beyond the commit text.
> 
> With in-band mode enabled, mvneta makes use of a fixed-link PHY, and
> updates the fixed-link PHY with the status from its GMAC block (which
> is the combined PCS+MAC).
> 
> So, when in-band mode is specified, the results from SGMII or 1000base-X
> negotiation are read from the MAC side of the link, pushed into the
> fixed-PHY, which then are reflected back into the driver via the usual
> phylib adjust_link().

... and I should have said that this is exactly why in-band mode is
treated as a fixed-link, even though it's nothing of the sort. It makes
use of the infrastructure that was present at the time (fixed-phy) to
implement this feature of reading the link status from the PCS/MAC end
of the link.

It may not have been the best design, but it's an evolved design based
on the code that was available and what people thought at the time (and
pre-dates my involvement with mvneta in mainline.)

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Marek Behún @ 2022-07-22 14:19 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Vladimir Oltean, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <YtqkxklImfR9gW1Z@shell.armlinux.org.uk>

On Fri, 22 Jul 2022 14:23:18 +0100
"Russell King (Oracle)" <linux@armlinux.org.uk> wrote:

> On Fri, Jul 22, 2022 at 02:59:36PM +0200, Marek Behún wrote:
> >   The 2.5GBASE-X PCS does not support Clause 37 Auto-Negotiation.
> >   Hence, the 1000BASE-X PCS is expected to have its Clause 37
> >   Auto-Negotiation functionality disabled so that the /C/ ordered set
> >   will not be transmitted. If a 2.5GBASE-X PCS receives /C/ ordered
> >   set, then undefined behavior may occur.
> >   ...  
> 
> The reason that's probably stated is because there hasn't been any
> standardisation of it, different implementations behave differently,
> so they can't define a standard behaviour without breaking what's
> already out there.

I think they are also considering clause 73 AN, which is supported with
1000base-KX (if I am not mistaken). Maybe the intention was to use
clause 73 with >1G speeds, and so they've forbid clause 37 on
2500base-x.


^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-22 11:44 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <20220722105238.qhfq5myqa4ixkvy4@skbuf>

On Fri, Jul 22, 2022 at 01:52:38PM +0300, Vladimir Oltean wrote:
> On Fri, Jul 22, 2022 at 09:28:08AM +0100, Russell King (Oracle) wrote:
> > On Fri, Jul 22, 2022 at 12:36:45AM +0300, Vladimir Oltean wrote:
> > > On Thu, Jul 21, 2022 at 10:14:00PM +0100, Russell King (Oracle) wrote:
> > > > > > So currently we try to enable C37 AN in 2500base-x mode, although
> > > > > > the standard says that it shouldn't be there, and it shouldn't be there
> > > > > > presumably because they want it to work with C73 AN.
> > > > > > 
> > > > > > I don't know how to solve this issue. Maybe declare a new PHY interface
> > > > > > mode constant, 2500base-x-no-c37-an ?
> > > > > 
> > > > > So this is essentially what I'm asking, and you didn't necessarily fully
> > > > > answer. I take it that there exist Marvell switches which enable in-band
> > > > > autoneg for 2500base-x and switches which don't, and managed = "in-band-status"
> > > > > has nothing to do with that decision. Right?
> > > > 
> > > > I think we're getting a little too het up over this.
> > > 
> > > No, I think it's relevant to this patch set.
> > > 
> > > > We have 1000base-X where, when we're not using in-band-status, we don't
> > > > use autoneg (some drivers that weren't caught in review annoyingly do
> > > > still use autoneg, but they shouldn't). We ignore the ethtool autoneg
> > > > bit.
> > > > 
> > > > We also have 1000base-X where we're using in-band-status, and we then
> > > > respect the ethtool autoneg bit.
> > > > 
> > > > So, wouldn't it be logical if 2500base-X were implemented the same way,
> > > > and on setups where 2500base-X does not support clause 37 AN, we
> > > > clear the ethtool autoneg bit? If we have 2500base-X being used as the
> > > > media link, surely this is the right behaviour?
> > > 
> > > The ethtool autoneg bit is only relevant when the PCS is the last thing
> > > before the medium. But if the SERDES protocol connects the MAC to the PHY,
> > > or the MAC to another MAC (such as the case here, CPU or DSA ports),
> > > there won't be any ethtool bit to take into consideration, and that's
> > > where my question is. Is there any expected correlation between enabling
> > > in-band autoneg and the presence or absence of managed = "in-band-status"?
> > 
> > This topic is something I was looking at back in November 2021, trying
> > to work out what the most sensible way of indicating to a PCS whether
> > it should enable in-band or not:
> > 
> > http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e4ea7d035e7e04e87dfd86702f59952e0cecc18d
> > http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e454bf101fa457dd5c2cea0b1aaab7ba33048089
> > http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e2c57490f205ae7c0e11fcf756675937f933be5e
> > 
> > The intention there was to move the decision about whether a PCS should
> > enable autoneg out of the PCS and into phylink, but doing that one comes
> > immediately on the problem of (e.g.) Marvell NETA/PP2 vs Lynx having
> > different interpretations for 2500base-X. There are also a number of
> > drivers that do not follow MLO_AN_INBAND-means-use-inband or not for
> > things such as SGMII or 1000base-X.
> > 
> > This means we have no standard interpretation amongst phylink users
> > about when in-band signalling should be enabled or disabled, which
> > means moving that decision into phylink today isn't possible.
> > 
> > The only thing we could do is provide the PCS with an additional bit
> > of information so it can make the decision - something like a boolean
> > "pcs_connects_to_medium" flag, and keep the decision making in the
> > PCS-specific code - sadly keeping the variability between different
> > PCS implementations.
> 
> The way I understand what you're saying is that there is no guarantee
> that the DSA master and CPU port will agree whether to use in-band
> autoneg or not here (and implicitly, there is no guarantee that this
> link will work):
> 
> 	&eth0 {
> 		phy-mode = "2500base-x";
> 		managed = "in-band-status";
> 	};
> 
> 	&switch_cpu_port {
> 		ethernet = <&eth0>;
> 		phy-mode = "25000base-x";

I'll assume that 25000 is a typo.

> 		managed = "in-band-status";
> 	};

Today, there is no guarantee - because it depends on how people have
chosen to implement 2500base-X, and whether the hardware requires the
use of in-band AN or prohibits it. This is what happens when stuff
isn't standardised - one ends up with differing implementations doing
different things, and this has happened not _only_ at hardware level
but also software level as well.

You have to also throw into this that various implementations also have
an "AN bypass" flag, which means if they see what looks like a valid
SERDES data stream, but do not see the AN data, after a certain timeout
they allow the link to come up - and again, whether that is enabled or
not is pot luck today.

> similarly, there is a good chance that the DT description below might
> result in a functional link:
> 
> 	&eth0 {
> 		phy-mode = "2500base-x";
> 		managed = "in-band-status";
> 	};
> 
> 	&switch_cpu_port {
> 		ethernet = <&eth0>;
> 		phy-mode = "25000base-x";
> 
> 		fixed-link {
> 			speed = <2500>;
> 			full-duplex;
> 		};
> 	};
> 
> There is no expectation from either DT description to use in-band
> autoneg or not.
> 
> The fact that of_phy_is_fixed_link() was made by Stas Sergeev to say
> that a 'managed' link with the value != 'auto' is fixed prompted me to
> study exactly what those changes were about.

From what I can see, there is no formal definition of "in-band-status"
beyond what it says on the tin. The description in the DT binding
specification, which is really where this should be formally documented,
is totally lacking.

>     This patch introduces the new string property 'managed' that allows
>     the user to set the management type explicitly.
>     The supported values are:
>     "auto" - default. Uses either MDIO or nothing, depending on the presence
>     of the fixed-link node
>     "in-band-status" - use in-band status

This, and how this is implemented by mvneta, is the best we have to go
on for the meaning of this.

> This is why I am asking whether there is any formal definition of what
> managed = "in-band-status" means. You've said it means about retrieving
> link status from the PCS. What are you basing upon when you are saying that?

Given that this managed property was introduced for mvneta, mvneta's
implementation of it is the best reference we have to work out what
the intentions of it were beyond the commit text.

With in-band mode enabled, mvneta makes use of a fixed-link PHY, and
updates the fixed-link PHY with the status from its GMAC block (which
is the combined PCS+MAC).

So, when in-band mode is specified, the results from SGMII or 1000base-X
negotiation are read from the MAC side of the link, pushed into the
fixed-PHY, which then are reflected back into the driver via the usual
phylib adjust_link().

Have a read through mvneta's code at this commit:

git show 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8:drivers/net/ethernet/marvell/mvneta.c

specifically, mvneta_fixed_link_update() and mvneta_adjust_link().
Note that when operating in in-band mode, there is actually no need
for the configuration of MVNETA_GMAC_AUTONEG_CONFIG to be touched
in any way since the values read from the MVNETA_GMAC_STATUS register
indicate what parameters the MAC is actually using. (The speed,
duplex, and pause bits in AUTONEG_CONFIG are ignored anyway if AN
is enabled.)

I know this is rather wooly, but not everything is defined in black and
white, and we need to do the best we can with the information that is
available.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-22 13:16 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <20220722124629.7y3p7nt6jmm5hecq@skbuf>

On Fri, Jul 22, 2022 at 03:46:29PM +0300, Vladimir Oltean wrote:
> On Fri, Jul 22, 2022 at 12:44:17PM +0100, Russell King (Oracle) wrote:
> > Today, there is no guarantee - because it depends on how people have
> > chosen to implement 2500base-X, and whether the hardware requires the
> > use of in-band AN or prohibits it. This is what happens when stuff
> > isn't standardised - one ends up with differing implementations doing
> > different things, and this has happened not _only_ at hardware level
> > but also software level as well.
> > 
> > You have to also throw into this that various implementations also have
> > an "AN bypass" flag, which means if they see what looks like a valid
> > SERDES data stream, but do not see the AN data, after a certain timeout
> > they allow the link to come up - and again, whether that is enabled or
> > not is pot luck today.
> 
> Interesting. After the timeout expires, does the lane ever transition
> back into the encoding required for AN mode, in case there appears at a
> later time someone willing to negotiate?

They don't document that it does.

> > > similarly, there is a good chance that the DT description below might
> > > result in a functional link:
> > > 
> > > 	&eth0 {
> > > 		phy-mode = "2500base-x";
> > > 		managed = "in-band-status";
> > > 	};
> > > 
> > > 	&switch_cpu_port {
> > > 		ethernet = <&eth0>;
> > > 		phy-mode = "25000base-x";
> > > 
> > > 		fixed-link {
> > > 			speed = <2500>;
> > > 			full-duplex;
> > > 		};
> > > 	};
> > > 
> > > There is no expectation from either DT description to use in-band
> > > autoneg or not.
> > > 
> > > The fact that of_phy_is_fixed_link() was made by Stas Sergeev to say
> > > that a 'managed' link with the value != 'auto' is fixed prompted me to
> > > study exactly what those changes were about.
> > 
> > From what I can see, there is no formal definition of "in-band-status"
> > beyond what it says on the tin. The description in the DT binding
> > specification, which is really where this should be formally documented,
> > is totally lacking.
> > 
> > >     This patch introduces the new string property 'managed' that allows
> > >     the user to set the management type explicitly.
> > >     The supported values are:
> > >     "auto" - default. Uses either MDIO or nothing, depending on the presence
> > >     of the fixed-link node
> > >     "in-band-status" - use in-band status
> > 
> > This, and how this is implemented by mvneta, is the best we have to go
> > on for the meaning of this.
> > 
> > > This is why I am asking whether there is any formal definition of what
> > > managed = "in-band-status" means. You've said it means about retrieving
> > > link status from the PCS. What are you basing upon when you are saying that?
> > 
> > Given that this managed property was introduced for mvneta, mvneta's
> > implementation of it is the best reference we have to work out what
> > the intentions of it were beyond the commit text.
> > 
> > With in-band mode enabled, mvneta makes use of a fixed-link PHY, and
> > updates the fixed-link PHY with the status from its GMAC block (which
> > is the combined PCS+MAC).
> > 
> > So, when in-band mode is specified, the results from SGMII or 1000base-X
> > negotiation are read from the MAC side of the link, pushed into the
> > fixed-PHY, which then are reflected back into the driver via the usual
> > phylib adjust_link().
> > 
> > Have a read through mvneta's code at this commit:
> > 
> > git show 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8:drivers/net/ethernet/marvell/mvneta.c
> > 
> > specifically, mvneta_fixed_link_update() and mvneta_adjust_link().
> > Note that when operating in in-band mode, there is actually no need
> > for the configuration of MVNETA_GMAC_AUTONEG_CONFIG to be touched
> > in any way since the values read from the MVNETA_GMAC_STATUS register
> > indicate what parameters the MAC is actually using. (The speed,
> > duplex, and pause bits in AUTONEG_CONFIG are ignored anyway if AN
> > is enabled.)
> 
> I view this as just an implementation detail and not as something that
> influences what managed = "in-band-status" is supposed to mean.
> 
> > I know this is rather wooly, but not everything is defined in black and
> > white, and we need to do the best we can with the information that is
> > available.
> 
> So mvneta at the stage of the commit you've mentioned calls
> mvneta_set_autoneg() with the value of pp->use_inband_status. There is
> then the exception to be made for the PCS being what's exposed to the
> medium, and in that case, ethtool may also override the pp->use_inband_status
> variable (which in turn affects the autoneg).
> 
> So if we take mvneta at this commit as the reference, what we learn is
> that using in-band status essentially depends on using in-band autoneg
> in the first place.
> 
> What is hard for me to comprehend is how we ever came to conclude that
> for SERDES protocols where clause 37 is possible (2500base-x should be
> part of this group), managed = "in-band-status" does not imply in-band
> autoneg, considering the mvneta precedent.

That is a recent addition, since the argument was made that when using
a 1000base-X fibre transceiver, using ethtool to disable autoneg is a
reasonable thing to do - and something that was supported with
mvneta_ethtool_set_link_ksettings() as it stands at the point in the
commit above.

> And why would we essentially redefine its meaning by stating that no,
> it is only about the status, not about the autoneg, even though the
> status comes from the autoneg for these protocols.

I'm not sure I understand what you're getting at there.

Going back to the mvneta combined PCS+MAC implementation, we read the
link parameters from the PCS when operating in in-band mode and throw
them at the fixed PHY so that ethtool works, along with all the usual
link up/down state reporting, carrier etc.

If autoneg is disabled, then we effectively operate in fixed-link mode
(use_inband_status becomes false, and we start forcing the link up/down
and also force the speed and duplex parameters by disabling autoneg.)

Note that this version of mvneta does not support 1000base-X mode, only
SGMII is actually supported.

There's a few things that are rather confusing in the driver:

MVNETA_GMAC_INBAND_AN_ENABLE - this controls whether in-band negotiation
is performed or not.
MVNETA_GMAC_AN_SPEED_EN - this controls whether the result of in-band
negotiation for speed is used, or the manually programmed speed in this
register.
MVNETA_GMAC_AN_DUPLEX_EN - same for duplex.
MVNETA_GMAC_AN_FLOW_CTRL_EN - same for pause (only symmetric pause is
supported)

MVNETA_GMAC2_INBAND_AN_ENABLE - misnamed, it selects whether SGMII (set)
or 1000base-X (unset) format for the 16-bit control word is used.

There is another bit in MVNETA_GMAC_CTRL_0 that selects between
1000base-X and SGMII operation mode, and when this bit is set for
1000base-X. This version of the driver doesn't support 1000base-X,
so this bit is never set.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-22 13:23 UTC (permalink / raw)
  To: Marek Behún
  Cc: Vladimir Oltean, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <20220722145936.497ac73f@dellmb>

On Fri, Jul 22, 2022 at 02:59:36PM +0200, Marek Behún wrote:
>   The 2.5GBASE-X PCS does not support Clause 37 Auto-Negotiation.
>   Hence, the 1000BASE-X PCS is expected to have its Clause 37
>   Auto-Negotiation functionality disabled so that the /C/ ordered set
>   will not be transmitted. If a 2.5GBASE-X PCS receives /C/ ordered
>   set, then undefined behavior may occur.
>   ...

The reason that's probably stated is because there hasn't been any
standardisation of it, different implementations behave differently,
so they can't define a standard behaviour without breaking what's
already out there.

With mvneta, the reality is that the 2.5G speed is implemented by
changing the clock configuration in the COMPHY block (serdes) - which
basically clocks everything 2.5x faster. I seem to remember mvpp2 is
the same deal.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Andrew Lunn @ 2022-07-22 13:20 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Vladimir Oltean, Marek Behún, Heiner Kallweit,
	Alexandre Belloni, Alvin __ipraga, Andy Shevchenko,
	Claudiu Manoil, Daniel Scally, David S. Miller, DENG Qingfang,
	Eric Dumazet, Florian Fainelli, George McCollister,
	Greg Kroah-Hartman, Hauke Mehrtens, Heikki Krogerus,
	Jakub Kicinski, Kurt Kanzenbach, Landen Chao, Linus Walleij,
	linux-acpi, linux-arm-kernel, linux-mediatek, Matthias Brugger,
	netdev, Paolo Abeni, Rafael J. Wysocki, Sakari Ailus, Sean Wang,
	UNGLinuxDriver, Vivien Didelot, Woojung Huh
In-Reply-To: <YtqNkSDLRDtuooy/@shell.armlinux.org.uk>

> > The way I understand what you're saying is that there is no guarantee
> > that the DSA master and CPU port will agree whether to use in-band
> > autoneg or not here (and implicitly, there is no guarantee that this
> > link will work):
> > 
> > 	&eth0 {
> > 		phy-mode = "2500base-x";
> > 		managed = "in-band-status";
> > 	};
> > 
> > 	&switch_cpu_port {
> > 		ethernet = <&eth0>;
> > 		phy-mode = "25000base-x";
> 
> I'll assume that 25000 is a typo.
> 
> > 		managed = "in-band-status";
> > 	};
> 
> Today, there is no guarantee - because it depends on how people have
> chosen to implement 2500base-X, and whether the hardware requires the
> use of in-band AN or prohibits it.

In practice, a Marvell MAC and a Marvell switch are likely to work,
since Marvell produce and tested both ends. I would expect this to be
true for any vendor. It is only going to be a problem when you have
devices from different vendors. And they have different
interpretations of what 2500Base-X is.

So far, mixed vendor systems tend to be

1) Freescale FEC and Marvell switches, and the FEC it still only 1G RGMII
2) The smaller simpler devices which are 1G and do not yet use a SERDES.

So this might be a future problem we will have, as more devices start
supporting 2.5G and 5G, but hopefully those future devices follow the
standard.

	Andrew

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Marek Behún @ 2022-07-22 12:59 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Russell King (Oracle), Andrew Lunn, Heiner Kallweit,
	Alexandre Belloni, Alvin __ipraga, Andy Shevchenko,
	Claudiu Manoil, Daniel Scally, David S. Miller, DENG Qingfang,
	Eric Dumazet, Florian Fainelli, George McCollister,
	Greg Kroah-Hartman, Hauke Mehrtens, Heikki Krogerus,
	Jakub Kicinski, Kurt Kanzenbach, Landen Chao, Linus Walleij,
	linux-acpi, linux-arm-kernel, linux-mediatek, Matthias Brugger,
	netdev, Paolo Abeni, Rafael J. Wysocki, Sakari Ailus, Sean Wang,
	UNGLinuxDriver, Vivien Didelot, Woojung Huh
In-Reply-To: <20220721182216.z4vdaj4zfb6w3emo@skbuf>

On Thu, 21 Jul 2022 21:22:16 +0300
Vladimir Oltean <olteanv@gmail.com> wrote:

> On Thu, Jul 21, 2022 at 07:21:45PM +0200, Marek Behún wrote:
> > Marvell documentation says that 2500base-x does not implement inband
> > AN.  
> 
> Does Marvell documentation actually call it 2500base-x when it says it
> doesn't support in-band autoneg?

Yes, it does.

> > But when it was first implemented, for some reason it was thought that
> > 2500base-x is just 1000base-x at 2.5x speed, and 1000base-x does
> > support inband AN. Also it worked during tests for both switches and
> > SOC NICs, so it was enabled.
> > 
> > At the time 2500base-x was not standardized. Now 2500base-x is
> > stanradrized, and the standard says that 2500base-x does not support
> > clause 37 AN. I guess this is because where it is used, it is intended
> > to work with clause 73 AN somehow.  
> 
> When you say 2500base-x is standardized, do you mean there is a document
> somewhere which I could use to read more about this?

IEEE Std 802.3cb-2018: Amendment 1: Physical Layer Specifications and
Management Parameters for 2.5 Gb/s and 5 Gb/s Operation over Backplane.

Annex 127A (informative): Compatibility of 2.5GBASE-X PCS/PMA with
1000BASE-X PCS/PMA running 2.5 times faster

  ...
  This annex discusses the restrictions when operating 2.5GBASE-X
  PCS/PMA with a 1000BASE-X PCS/PMA link partner running 2.5 times
  faster. Compatibility of the PMD is outside the scope of this annex.
  In this annex when 1000BASE-X PCS/PMA is referred to, the 2.5 times
  speed up is implied.
  ...
  The 2.5GBASE-X PCS does not support Clause 37 Auto-Negotiation.
  Hence, the 1000BASE-X PCS is expected to have its Clause 37
  Auto-Negotiation functionality disabled so that the /C/ ordered set
  will not be transmitted. If a 2.5GBASE-X PCS receives /C/ ordered
  set, then undefined behavior may occur.
  ...

Marek

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Vladimir Oltean @ 2022-07-22 12:46 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <YtqNkSDLRDtuooy/@shell.armlinux.org.uk>

On Fri, Jul 22, 2022 at 12:44:17PM +0100, Russell King (Oracle) wrote:
> On Fri, Jul 22, 2022 at 01:52:38PM +0300, Vladimir Oltean wrote:
> > On Fri, Jul 22, 2022 at 09:28:08AM +0100, Russell King (Oracle) wrote:
> > > On Fri, Jul 22, 2022 at 12:36:45AM +0300, Vladimir Oltean wrote:
> > > > On Thu, Jul 21, 2022 at 10:14:00PM +0100, Russell King (Oracle) wrote:
> > > > > > > So currently we try to enable C37 AN in 2500base-x mode, although
> > > > > > > the standard says that it shouldn't be there, and it shouldn't be there
> > > > > > > presumably because they want it to work with C73 AN.
> > > > > > > 
> > > > > > > I don't know how to solve this issue. Maybe declare a new PHY interface
> > > > > > > mode constant, 2500base-x-no-c37-an ?
> > > > > > 
> > > > > > So this is essentially what I'm asking, and you didn't necessarily fully
> > > > > > answer. I take it that there exist Marvell switches which enable in-band
> > > > > > autoneg for 2500base-x and switches which don't, and managed = "in-band-status"
> > > > > > has nothing to do with that decision. Right?
> > > > > 
> > > > > I think we're getting a little too het up over this.
> > > > 
> > > > No, I think it's relevant to this patch set.
> > > > 
> > > > > We have 1000base-X where, when we're not using in-band-status, we don't
> > > > > use autoneg (some drivers that weren't caught in review annoyingly do
> > > > > still use autoneg, but they shouldn't). We ignore the ethtool autoneg
> > > > > bit.
> > > > > 
> > > > > We also have 1000base-X where we're using in-band-status, and we then
> > > > > respect the ethtool autoneg bit.
> > > > > 
> > > > > So, wouldn't it be logical if 2500base-X were implemented the same way,
> > > > > and on setups where 2500base-X does not support clause 37 AN, we
> > > > > clear the ethtool autoneg bit? If we have 2500base-X being used as the
> > > > > media link, surely this is the right behaviour?
> > > > 
> > > > The ethtool autoneg bit is only relevant when the PCS is the last thing
> > > > before the medium. But if the SERDES protocol connects the MAC to the PHY,
> > > > or the MAC to another MAC (such as the case here, CPU or DSA ports),
> > > > there won't be any ethtool bit to take into consideration, and that's
> > > > where my question is. Is there any expected correlation between enabling
> > > > in-band autoneg and the presence or absence of managed = "in-band-status"?
> > > 
> > > This topic is something I was looking at back in November 2021, trying
> > > to work out what the most sensible way of indicating to a PCS whether
> > > it should enable in-band or not:
> > > 
> > > http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e4ea7d035e7e04e87dfd86702f59952e0cecc18d
> > > http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e454bf101fa457dd5c2cea0b1aaab7ba33048089
> > > http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e2c57490f205ae7c0e11fcf756675937f933be5e
> > > 
> > > The intention there was to move the decision about whether a PCS should
> > > enable autoneg out of the PCS and into phylink, but doing that one comes
> > > immediately on the problem of (e.g.) Marvell NETA/PP2 vs Lynx having
> > > different interpretations for 2500base-X. There are also a number of
> > > drivers that do not follow MLO_AN_INBAND-means-use-inband or not for
> > > things such as SGMII or 1000base-X.
> > > 
> > > This means we have no standard interpretation amongst phylink users
> > > about when in-band signalling should be enabled or disabled, which
> > > means moving that decision into phylink today isn't possible.
> > > 
> > > The only thing we could do is provide the PCS with an additional bit
> > > of information so it can make the decision - something like a boolean
> > > "pcs_connects_to_medium" flag, and keep the decision making in the
> > > PCS-specific code - sadly keeping the variability between different
> > > PCS implementations.
> > 
> > The way I understand what you're saying is that there is no guarantee
> > that the DSA master and CPU port will agree whether to use in-band
> > autoneg or not here (and implicitly, there is no guarantee that this
> > link will work):
> > 
> > 	&eth0 {
> > 		phy-mode = "2500base-x";
> > 		managed = "in-band-status";
> > 	};
> > 
> > 	&switch_cpu_port {
> > 		ethernet = <&eth0>;
> > 		phy-mode = "25000base-x";
> 
> I'll assume that 25000 is a typo.

typo.

> > 		managed = "in-band-status";
> > 	};
> 
> Today, there is no guarantee - because it depends on how people have
> chosen to implement 2500base-X, and whether the hardware requires the
> use of in-band AN or prohibits it. This is what happens when stuff
> isn't standardised - one ends up with differing implementations doing
> different things, and this has happened not _only_ at hardware level
> but also software level as well.
> 
> You have to also throw into this that various implementations also have
> an "AN bypass" flag, which means if they see what looks like a valid
> SERDES data stream, but do not see the AN data, after a certain timeout
> they allow the link to come up - and again, whether that is enabled or
> not is pot luck today.

Interesting. After the timeout expires, does the lane ever transition
back into the encoding required for AN mode, in case there appears at a
later time someone willing to negotiate?

> > similarly, there is a good chance that the DT description below might
> > result in a functional link:
> > 
> > 	&eth0 {
> > 		phy-mode = "2500base-x";
> > 		managed = "in-band-status";
> > 	};
> > 
> > 	&switch_cpu_port {
> > 		ethernet = <&eth0>;
> > 		phy-mode = "25000base-x";
> > 
> > 		fixed-link {
> > 			speed = <2500>;
> > 			full-duplex;
> > 		};
> > 	};
> > 
> > There is no expectation from either DT description to use in-band
> > autoneg or not.
> > 
> > The fact that of_phy_is_fixed_link() was made by Stas Sergeev to say
> > that a 'managed' link with the value != 'auto' is fixed prompted me to
> > study exactly what those changes were about.
> 
> From what I can see, there is no formal definition of "in-band-status"
> beyond what it says on the tin. The description in the DT binding
> specification, which is really where this should be formally documented,
> is totally lacking.
> 
> >     This patch introduces the new string property 'managed' that allows
> >     the user to set the management type explicitly.
> >     The supported values are:
> >     "auto" - default. Uses either MDIO or nothing, depending on the presence
> >     of the fixed-link node
> >     "in-band-status" - use in-band status
> 
> This, and how this is implemented by mvneta, is the best we have to go
> on for the meaning of this.
> 
> > This is why I am asking whether there is any formal definition of what
> > managed = "in-band-status" means. You've said it means about retrieving
> > link status from the PCS. What are you basing upon when you are saying that?
> 
> Given that this managed property was introduced for mvneta, mvneta's
> implementation of it is the best reference we have to work out what
> the intentions of it were beyond the commit text.
> 
> With in-band mode enabled, mvneta makes use of a fixed-link PHY, and
> updates the fixed-link PHY with the status from its GMAC block (which
> is the combined PCS+MAC).
> 
> So, when in-band mode is specified, the results from SGMII or 1000base-X
> negotiation are read from the MAC side of the link, pushed into the
> fixed-PHY, which then are reflected back into the driver via the usual
> phylib adjust_link().
> 
> Have a read through mvneta's code at this commit:
> 
> git show 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8:drivers/net/ethernet/marvell/mvneta.c
> 
> specifically, mvneta_fixed_link_update() and mvneta_adjust_link().
> Note that when operating in in-band mode, there is actually no need
> for the configuration of MVNETA_GMAC_AUTONEG_CONFIG to be touched
> in any way since the values read from the MVNETA_GMAC_STATUS register
> indicate what parameters the MAC is actually using. (The speed,
> duplex, and pause bits in AUTONEG_CONFIG are ignored anyway if AN
> is enabled.)

I view this as just an implementation detail and not as something that
influences what managed = "in-band-status" is supposed to mean.

> I know this is rather wooly, but not everything is defined in black and
> white, and we need to do the best we can with the information that is
> available.

So mvneta at the stage of the commit you've mentioned calls
mvneta_set_autoneg() with the value of pp->use_inband_status. There is
then the exception to be made for the PCS being what's exposed to the
medium, and in that case, ethtool may also override the pp->use_inband_status
variable (which in turn affects the autoneg).

So if we take mvneta at this commit as the reference, what we learn is
that using in-band status essentially depends on using in-band autoneg
in the first place.

What is hard for me to comprehend is how we ever came to conclude that
for SERDES protocols where clause 37 is possible (2500base-x should be
part of this group), managed = "in-band-status" does not imply in-band
autoneg, considering the mvneta precedent.
And why would we essentially redefine its meaning by stating that no,
it is only about the status, not about the autoneg, even though the
status comes from the autoneg for these protocols.

There is a separate discussion to be had about SERDES protocols without
clause 37 AN (10GBase-R), I don't want to go there just yet, I want to
concentrate on what's similar to what mvneta originally supported.

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Vladimir Oltean @ 2022-07-22 10:52 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <YtpfmF37FmfY6BV5@shell.armlinux.org.uk>

On Fri, Jul 22, 2022 at 09:28:08AM +0100, Russell King (Oracle) wrote:
> On Fri, Jul 22, 2022 at 12:36:45AM +0300, Vladimir Oltean wrote:
> > On Thu, Jul 21, 2022 at 10:14:00PM +0100, Russell King (Oracle) wrote:
> > > > > So currently we try to enable C37 AN in 2500base-x mode, although
> > > > > the standard says that it shouldn't be there, and it shouldn't be there
> > > > > presumably because they want it to work with C73 AN.
> > > > > 
> > > > > I don't know how to solve this issue. Maybe declare a new PHY interface
> > > > > mode constant, 2500base-x-no-c37-an ?
> > > > 
> > > > So this is essentially what I'm asking, and you didn't necessarily fully
> > > > answer. I take it that there exist Marvell switches which enable in-band
> > > > autoneg for 2500base-x and switches which don't, and managed = "in-band-status"
> > > > has nothing to do with that decision. Right?
> > > 
> > > I think we're getting a little too het up over this.
> > 
> > No, I think it's relevant to this patch set.
> > 
> > > We have 1000base-X where, when we're not using in-band-status, we don't
> > > use autoneg (some drivers that weren't caught in review annoyingly do
> > > still use autoneg, but they shouldn't). We ignore the ethtool autoneg
> > > bit.
> > > 
> > > We also have 1000base-X where we're using in-band-status, and we then
> > > respect the ethtool autoneg bit.
> > > 
> > > So, wouldn't it be logical if 2500base-X were implemented the same way,
> > > and on setups where 2500base-X does not support clause 37 AN, we
> > > clear the ethtool autoneg bit? If we have 2500base-X being used as the
> > > media link, surely this is the right behaviour?
> > 
> > The ethtool autoneg bit is only relevant when the PCS is the last thing
> > before the medium. But if the SERDES protocol connects the MAC to the PHY,
> > or the MAC to another MAC (such as the case here, CPU or DSA ports),
> > there won't be any ethtool bit to take into consideration, and that's
> > where my question is. Is there any expected correlation between enabling
> > in-band autoneg and the presence or absence of managed = "in-band-status"?
> 
> This topic is something I was looking at back in November 2021, trying
> to work out what the most sensible way of indicating to a PCS whether
> it should enable in-band or not:
> 
> http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e4ea7d035e7e04e87dfd86702f59952e0cecc18d
> http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e454bf101fa457dd5c2cea0b1aaab7ba33048089
> http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e2c57490f205ae7c0e11fcf756675937f933be5e
> 
> The intention there was to move the decision about whether a PCS should
> enable autoneg out of the PCS and into phylink, but doing that one comes
> immediately on the problem of (e.g.) Marvell NETA/PP2 vs Lynx having
> different interpretations for 2500base-X. There are also a number of
> drivers that do not follow MLO_AN_INBAND-means-use-inband or not for
> things such as SGMII or 1000base-X.
> 
> This means we have no standard interpretation amongst phylink users
> about when in-band signalling should be enabled or disabled, which
> means moving that decision into phylink today isn't possible.
> 
> The only thing we could do is provide the PCS with an additional bit
> of information so it can make the decision - something like a boolean
> "pcs_connects_to_medium" flag, and keep the decision making in the
> PCS-specific code - sadly keeping the variability between different
> PCS implementations.

The way I understand what you're saying is that there is no guarantee
that the DSA master and CPU port will agree whether to use in-band
autoneg or not here (and implicitly, there is no guarantee that this
link will work):

	&eth0 {
		phy-mode = "2500base-x";
		managed = "in-band-status";
	};

	&switch_cpu_port {
		ethernet = <&eth0>;
		phy-mode = "25000base-x";
		managed = "in-band-status";
	};

similarly, there is a good chance that the DT description below might
result in a functional link:

	&eth0 {
		phy-mode = "2500base-x";
		managed = "in-band-status";
	};

	&switch_cpu_port {
		ethernet = <&eth0>;
		phy-mode = "25000base-x";

		fixed-link {
			speed = <2500>;
			full-duplex;
		};
	};

There is no expectation from either DT description to use in-band
autoneg or not.

The fact that of_phy_is_fixed_link() was made by Stas Sergeev to say
that a 'managed' link with the value != 'auto' is fixed prompted me to
study exactly what those changes were about.

Was the managed = "in-band-status" property introduced by Stas Sergeev
in this commit:

commit 4cba5c2103657d43d0886e4cff8004d95a3d0def
Author: Stas Sergeev <stsp@list.ru>
Date:   Mon Jul 20 17:49:57 2015 -0700

    of_mdio: add new DT property 'managed' to specify the PHY management type

    Currently the PHY management type is selected by the MAC driver arbitrary.
    The decision is based on the presence of the "fixed-link" node and on a
    will of the driver's authors.
    This caused a regression recently, when mvneta driver suddenly started
    to use the in-band status for auto-negotiation on fixed links.
    It appears the auto-negotiation may not work when expected by the MAC driver.
    Sebastien Rannou explains:
    << Yes, I confirm that my HW does not generate an in-band status. AFAIK, it's
    a PHY that aggregates 4xSGMIIs to 1xQSGMII ; the MAC side of the PHY (with
    inband status) is connected to the switch through QSGMII, and in this context
    we are on the media side of the PHY. >>
    https://lkml.org/lkml/2015/7/10/206

    This patch introduces the new string property 'managed' that allows
    the user to set the management type explicitly.
    The supported values are:
    "auto" - default. Uses either MDIO or nothing, depending on the presence
    of the fixed-link node
    "in-band-status" - use in-band status

    Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>

    CC: Rob Herring <robh+dt@kernel.org>
    CC: Pawel Moll <pawel.moll@arm.com>
    CC: Mark Rutland <mark.rutland@arm.com>
    CC: Ian Campbell <ijc+devicetree@hellion.org.uk>
    CC: Kumar Gala <galak@codeaurora.org>
    CC: Florian Fainelli <f.fainelli@gmail.com>
    CC: Grant Likely <grant.likely@linaro.org>
    CC: devicetree@vger.kernel.org
    CC: linux-kernel@vger.kernel.org
    CC: netdev@vger.kernel.org
    Signed-off-by: David S. Miller <davem@davemloft.net>

not added specifically to mean whether the MAC should use in-band
autoneg or not? See commit f8af8e6eb950 ("mvneta: use inband status only
when explicitly enabled") for the mvneta change, after which the code
became:

mvneta_probe:

	err = of_property_read_string(dn, "managed", &managed);
	pp->use_inband_status = (err == 0 &&
				 strcmp(managed, "in-band-status") == 0);

mvneta_defaults_set:

	if (pp->use_inband_status) {
		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
			 MVNETA_GMAC_FORCE_LINK_DOWN |
			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
		       MVNETA_GMAC_AN_SPEED_EN |
		       MVNETA_GMAC_AN_DUPLEX_EN;
		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
	} else {
		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
		       MVNETA_GMAC_AN_SPEED_EN |
		       MVNETA_GMAC_AN_DUPLEX_EN);
		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
	}

mvneta_port_power_up:

	if (pp->use_inband_status)
		ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE;

This is why I am asking whether there is any formal definition of what
managed = "in-band-status" means. You've said it means about retrieving
link status from the PCS. What are you basing upon when you are saying that?

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-22  8:28 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <20220721213645.57ne2jf7f6try4ec@skbuf>

On Fri, Jul 22, 2022 at 12:36:45AM +0300, Vladimir Oltean wrote:
> On Thu, Jul 21, 2022 at 10:14:00PM +0100, Russell King (Oracle) wrote:
> > > > So currently we try to enable C37 AN in 2500base-x mode, although
> > > > the standard says that it shouldn't be there, and it shouldn't be there
> > > > presumably because they want it to work with C73 AN.
> > > > 
> > > > I don't know how to solve this issue. Maybe declare a new PHY interface
> > > > mode constant, 2500base-x-no-c37-an ?
> > > 
> > > So this is essentially what I'm asking, and you didn't necessarily fully
> > > answer. I take it that there exist Marvell switches which enable in-band
> > > autoneg for 2500base-x and switches which don't, and managed = "in-band-status"
> > > has nothing to do with that decision. Right?
> > 
> > I think we're getting a little too het up over this.
> 
> No, I think it's relevant to this patch set.
> 
> > We have 1000base-X where, when we're not using in-band-status, we don't
> > use autoneg (some drivers that weren't caught in review annoyingly do
> > still use autoneg, but they shouldn't). We ignore the ethtool autoneg
> > bit.
> > 
> > We also have 1000base-X where we're using in-band-status, and we then
> > respect the ethtool autoneg bit.
> > 
> > So, wouldn't it be logical if 2500base-X were implemented the same way,
> > and on setups where 2500base-X does not support clause 37 AN, we
> > clear the ethtool autoneg bit? If we have 2500base-X being used as the
> > media link, surely this is the right behaviour?
> 
> The ethtool autoneg bit is only relevant when the PCS is the last thing
> before the medium. But if the SERDES protocol connects the MAC to the PHY,
> or the MAC to another MAC (such as the case here, CPU or DSA ports),
> there won't be any ethtool bit to take into consideration, and that's
> where my question is. Is there any expected correlation between enabling
> in-band autoneg and the presence or absence of managed = "in-band-status"?

This topic is something I was looking at back in November 2021, trying
to work out what the most sensible way of indicating to a PCS whether
it should enable in-band or not:

http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e4ea7d035e7e04e87dfd86702f59952e0cecc18d
http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e454bf101fa457dd5c2cea0b1aaab7ba33048089
http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue&id=e2c57490f205ae7c0e11fcf756675937f933be5e

The intention there was to move the decision about whether a PCS should
enable autoneg out of the PCS and into phylink, but doing that one comes
immediately on the problem of (e.g.) Marvell NETA/PP2 vs Lynx having
different interpretations for 2500base-X. There are also a number of
drivers that do not follow MLO_AN_INBAND-means-use-inband or not for
things such as SGMII or 1000base-X.

This means we have no standard interpretation amongst phylink users
about when in-band signalling should be enabled or disabled, which
means moving that decision into phylink today isn't possible.

The only thing we could do is provide the PCS with an additional bit
of information so it can make the decision - something like a boolean
"pcs_connects_to_medium" flag, and keep the decision making in the
PCS-specific code - sadly keeping the variability between different
PCS implementations.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* [PATCH v5 7/8] ACPI: property: Add support for parsing buffer property UUID
From: Sakari Ailus @ 2022-07-22  6:57 UTC (permalink / raw)
  To: linux-acpi; +Cc: rafael, andriy.shevchenko
In-Reply-To: <CAJZ5v0h5-JyES4_kWCFf9EAhqa+VKZGobST2zX0+Z1PsWB0H3Q@mail.gmail.com>

Add support for newly added buffer property UUID, as defined in the DSD
guide section 3.3
<URL:https://github.com/UEFI/DSD-Guide/blob/main/src/dsd-guide.adoc#buffer-data-extension-uuid>.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
---
since v4:

Refer to DSD guide section 3.3, with URL.

 drivers/acpi/property.c | 142 ++++++++++++++++++++++++++++++++++++----
 include/acpi/acpi_bus.h |   3 +-
 include/linux/acpi.h    |   2 +-
 3 files changed, 132 insertions(+), 15 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 236a847f1bfbd..7621f684212ff 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -55,14 +55,19 @@ static const guid_t ads_guid =
 	GUID_INIT(0xdbb8e3e6, 0x5886, 0x4ba6,
 		  0x87, 0x95, 0x13, 0x19, 0xf5, 0x2a, 0x96, 0x6b);
 
+static const guid_t buffer_prop_guid =
+	GUID_INIT(0xedb12dd0, 0x363d, 0x4085,
+		  0xa3, 0xd2, 0x49, 0x52, 0x2c, 0xa1, 0x60, 0xc4);
+
 static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
-					   const union acpi_object *desc,
+					   union acpi_object *desc,
 					   struct acpi_device_data *data,
 					   struct fwnode_handle *parent);
-static bool acpi_extract_properties(const union acpi_object *desc,
+static bool acpi_extract_properties(acpi_handle handle,
+				    union acpi_object *desc,
 				    struct acpi_device_data *data);
 
-static bool acpi_nondev_subnode_extract(const union acpi_object *desc,
+static bool acpi_nondev_subnode_extract(union acpi_object *desc,
 					acpi_handle handle,
 					const union acpi_object *link,
 					struct list_head *list,
@@ -81,7 +86,7 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc,
 	INIT_LIST_HEAD(&dn->data.properties);
 	INIT_LIST_HEAD(&dn->data.subnodes);
 
-	result = acpi_extract_properties(desc, &dn->data);
+	result = acpi_extract_properties(handle, desc, &dn->data);
 
 	if (handle) {
 		acpi_handle scope;
@@ -156,7 +161,7 @@ static bool acpi_nondev_subnode_ok(acpi_handle scope,
 }
 
 static bool acpi_add_nondev_subnodes(acpi_handle scope,
-				     const union acpi_object *links,
+				     union acpi_object *links,
 				     struct list_head *list,
 				     struct fwnode_handle *parent)
 {
@@ -164,7 +169,7 @@ static bool acpi_add_nondev_subnodes(acpi_handle scope,
 	int i;
 
 	for (i = 0; i < links->package.count; i++) {
-		const union acpi_object *link, *desc;
+		union acpi_object *link, *desc;
 		acpi_handle handle;
 		bool result;
 
@@ -204,7 +209,7 @@ static bool acpi_add_nondev_subnodes(acpi_handle scope,
 }
 
 static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
-					   const union acpi_object *desc,
+					   union acpi_object *desc,
 					   struct acpi_device_data *data,
 					   struct fwnode_handle *parent)
 {
@@ -212,7 +217,8 @@ static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
 
 	/* Look for the ACPI data subnodes GUID. */
 	for (i = 0; i < desc->package.count; i += 2) {
-		const union acpi_object *guid, *links;
+		const union acpi_object *guid;
+		union acpi_object *links;
 
 		guid = &desc->package.elements[i];
 		links = &desc->package.elements[i + 1];
@@ -325,7 +331,7 @@ static bool acpi_is_property_guid(const guid_t *guid)
 
 struct acpi_device_properties *
 acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
-		    const union acpi_object *properties)
+		    union acpi_object *properties)
 {
 	struct acpi_device_properties *props;
 
@@ -377,7 +383,104 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data)
 	return true;
 }
 
-static bool acpi_extract_properties(const union acpi_object *desc,
+static void acpi_data_add_buffer_props(acpi_handle handle,
+				       struct acpi_device_data *data,
+				       union acpi_object *properties)
+{
+	struct acpi_device_properties *props;
+	union acpi_object *package;
+	size_t alloc_size;
+	unsigned int i;
+	u32 *count;
+
+	if (check_mul_overflow((size_t)properties->package.count,
+			       sizeof(*package) + sizeof(void *),
+			       &alloc_size) ||
+	    check_add_overflow(sizeof(*props) + sizeof(*package), alloc_size,
+			       &alloc_size)) {
+		acpi_handle_warn(handle,
+				 "can't allocate memory for %u buffer props",
+				 properties->package.count);
+		return;
+	}
+
+	props = kvzalloc(alloc_size, GFP_KERNEL);
+	if (!props)
+		return;
+
+	props->guid = &buffer_prop_guid;
+	props->bufs = (void *)(props + 1);
+	props->properties = (void *)(props->bufs + properties->package.count);
+
+	/* Outer package */
+	package = props->properties;
+	package->type = ACPI_TYPE_PACKAGE;
+	package->package.elements = package + 1;
+	count = &package->package.count;
+	*count = 0;
+
+	/* Inner packages */
+	package++;
+
+	for (i = 0; i < properties->package.count; i++) {
+		struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
+		union acpi_object *property = &properties->package.elements[i];
+		union acpi_object *prop, *obj, *buf_obj;
+		acpi_status status;
+
+		if (property->type != ACPI_TYPE_PACKAGE ||
+		    property->package.count != 2) {
+			acpi_handle_warn(handle,
+					 "buffer property %u has %u entries\n",
+					 i, property->package.count);
+			continue;
+		}
+
+		prop = &property->package.elements[0];
+		obj = &property->package.elements[1];
+
+		if (prop->type != ACPI_TYPE_STRING ||
+		    obj->type != ACPI_TYPE_STRING) {
+			acpi_handle_warn(handle,
+					 "wrong object types %u and %u\n",
+					 prop->type, obj->type);
+			continue;
+		}
+
+		status = acpi_evaluate_object_typed(handle, obj->string.pointer,
+						    NULL, &buf,
+						    ACPI_TYPE_BUFFER);
+		if (ACPI_FAILURE(status)) {
+			acpi_handle_warn(handle,
+					 "can't evaluate \"%*pE\" as buffer\n",
+					 obj->string.length,
+					 obj->string.pointer);
+			continue;
+		}
+
+		package->type = ACPI_TYPE_PACKAGE;
+		package->package.elements = prop;
+		package->package.count = 2;
+
+		buf_obj = buf.pointer;
+
+		/* Replace the string object with a buffer object */
+		obj->type = ACPI_TYPE_BUFFER;
+		obj->buffer.length = buf_obj->buffer.length;
+		obj->buffer.pointer = buf_obj->buffer.pointer;
+
+		props->bufs[i] = buf.pointer;
+		package++;
+		(*count)++;
+	}
+
+	if (*count)
+		list_add(&props->list, &data->properties);
+	else
+		kvfree(props);
+}
+
+static bool acpi_extract_properties(acpi_handle scope, union acpi_object *desc,
 				    struct acpi_device_data *data)
 {
 	int i;
@@ -387,7 +490,8 @@ static bool acpi_extract_properties(const union acpi_object *desc,
 
 	/* Look for the device properties GUID. */
 	for (i = 0; i < desc->package.count; i += 2) {
-		const union acpi_object *guid, *properties;
+		const union acpi_object *guid;
+		union acpi_object *properties;
 
 		guid = &desc->package.elements[i];
 		properties = &desc->package.elements[i + 1];
@@ -401,6 +505,12 @@ static bool acpi_extract_properties(const union acpi_object *desc,
 		    properties->type != ACPI_TYPE_PACKAGE)
 			break;
 
+		if (guid_equal((guid_t *)guid->buffer.pointer,
+			       &buffer_prop_guid)) {
+			acpi_data_add_buffer_props(scope, data, properties);
+			continue;
+		}
+
 		if (!acpi_is_property_guid((guid_t *)guid->buffer.pointer))
 			continue;
 
@@ -447,7 +557,7 @@ void acpi_init_properties(struct acpi_device *adev)
 	if (ACPI_FAILURE(status))
 		goto out;
 
-	if (acpi_extract_properties(buf.pointer, &adev->data)) {
+	if (acpi_extract_properties(adev->handle, buf.pointer, &adev->data)) {
 		adev->data.pointer = buf.pointer;
 		if (acpi_of)
 			acpi_init_of_compatible(adev);
@@ -477,8 +587,14 @@ static void acpi_free_device_properties(struct list_head *list)
 	struct acpi_device_properties *props, *tmp;
 
 	list_for_each_entry_safe(props, tmp, list, list) {
+		u32 i;
+
 		list_del(&props->list);
-		kfree(props);
+		/* Buffer data properties were separately allocated */
+		if (props->bufs)
+			for (i = 0; i < props->properties->package.count; i++)
+				ACPI_FREE(props->bufs[i]);
+		kvfree(props);
 	}
 }
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 54c5566df9fe1..88a17fce49fd0 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -344,8 +344,9 @@ struct acpi_device_physical_node {
 
 struct acpi_device_properties {
 	const guid_t *guid;
-	const union acpi_object *properties;
+	union acpi_object *properties;
 	struct list_head list;
+	void **bufs;
 };
 
 /* ACPI Device Specific Data (_DSD) */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4f82a5bc6d987..93a695cab9d8a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1243,7 +1243,7 @@ static inline bool acpi_dev_has_props(const struct acpi_device *adev)
 
 struct acpi_device_properties *
 acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
-		    const union acpi_object *properties);
+		    union acpi_object *properties);
 
 int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
 		       void **valptr);
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH net-next 2/6] software node: allow named software node to be created
From: Sakari Ailus @ 2022-07-22  6:21 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Russell King (Oracle), Andy Shevchenko, Andrew Lunn,
	Heiner Kallweit, Alexandre Belloni, Alvin __ipraga,
	Claudiu Manoil, Daniel Scally, David S. Miller, DENG Qingfang,
	Eric Dumazet, Florian Fainelli, George McCollister,
	Greg Kroah-Hartman, Hauke Mehrtens, Heikki Krogerus,
	Jakub Kicinski, Kurt Kanzenbach, Landen Chao, Linus Walleij,
	linux-acpi, linux-arm-kernel, linux-mediatek, Matthias Brugger,
	netdev, Paolo Abeni, Rafael J. Wysocki, Sean Wang, UNGLinuxDriver,
	Vivien Didelot, Woojung Huh, Marek Behún
In-Reply-To: <20220720225652.4uo6fcdcunenej3j@skbuf>

Hi Vladimir,

On Thu, Jul 21, 2022 at 01:56:52AM +0300, Vladimir Oltean wrote:
> Hi Sakari,
> 
> On Tue, Jul 19, 2022 at 08:50:27AM +0000, Sakari Ailus wrote:
> > Basically what your patch is doing is adding a helper function that creates
> > an fwnode with a given name. This functionality was there previously through
> > software_node_register_nodes(), with node allocation responsibility residing
> > on the caller. It's used e.g. here:
> > drivers/media/pci/intel/ipu3/cio2-bridge.c .
> > 
> > The larger question is perhaps when can you safely remove software nodes.
> > And which of these two APIs would be preferred. I haven't checked how many
> > users each has. There's no refcounting nor locking for software nodes, so
> > once made visible to the rest of the kernel, they're always expected to be
> > there, unchanged, or at least it needs to be known when they can be removed.
> 
> Just for my clarity, are you saying that this printf selftest is
> violating the software nodes' expectation to always be there unchanged
> and never be removed?

No. This is the other case, i.e. it's known the nodes can be removed.

> 
> static void __init fwnode_pointer(void)
> {
> 	const struct software_node softnodes[] = {
> 		{ .name = "first", },
> 		{ .name = "second", .parent = &softnodes[0], },
> 		{ .name = "third", .parent = &softnodes[1], },
> 		{ NULL /* Guardian */ }
> 	};
> 	const char * const full_name = "first/second/third";
> 	const char * const full_name_second = "first/second";
> 	const char * const second_name = "second";
> 	const char * const third_name = "third";
> 	int rval;
> 
> 	rval = software_node_register_nodes(softnodes);
> 	if (rval) {
> 		pr_warn("cannot register softnodes; rval %d\n", rval);
> 		return;
> 	}
> 
> 	test(full_name_second, "%pfw", software_node_fwnode(&softnodes[1]));
> 	test(full_name, "%pfw", software_node_fwnode(&softnodes[2]));
> 	test(full_name, "%pfwf", software_node_fwnode(&softnodes[2]));
> 	test(second_name, "%pfwP", software_node_fwnode(&softnodes[1]));
> 	test(third_name, "%pfwP", software_node_fwnode(&softnodes[2]));
> 
> 	software_node_unregister_nodes(softnodes);
> }
> 
> The use case in this patch set is essentially equivalent to what printf
> does: exposing the software nodes to the rest of the kernel and to user
> space is probably not necessary, it's just that we need to call a
> function that parses their structure (essentially an equivalent to
> calling "test" above). Could you indicate whether there is a better
> alternative of doing this?

I'm actually not suggesting to do otherwise. What I wanted to say was that
it'd be best to settle with a single API to create software nodes while
keeping in mind serialising access to the data structure as well as
the lifetime of the software nodes.

This patch is adding another API function to register software nodes which
expands the scope of another that effectively did not allow sub-nodes.
Lifetime management currently doesn't really exist for ACPI nodes (device
or data) and only exists in somewhat unsatisfactory form for DT nodes. That
might be still the best model for software nodes.

Perhaps the API this patch adds is nicer to use than
software_node_register_nodes() and better lends itself for adding
refcounting later on.

I wonder what Andy or Heikki think.

-- 
Kind regards,

Sakari Ailus

^ permalink raw reply

* RE: [PATCH] intel: thermal: PCH: Drop ACPI_FADT_LOW_POWER_S0 check
From: Zhang, Rui @ 2022-07-22  2:59 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Rafael J. Wysocki, Linux PM, Linux ACPI, LKML, Mario Limonciello,
	Srinivas Pandruvada
In-Reply-To: <CAJZ5v0j+FTX4UF-9Y0BQc2mYXQiphsnkt07CALhF7BPtSdDxgg@mail.gmail.com>



> -----Original Message-----
> From: Rafael J. Wysocki <rafael@kernel.org>
> Sent: Monday, July 18, 2022 3:39 AM
> To: Zhang, Rui <rui.zhang@intel.com>
> Cc: Rafael J. Wysocki <rjw@rjwysocki.net>; Linux PM <linux-
> pm@vger.kernel.org>; Linux ACPI <linux-acpi@vger.kernel.org>; LKML <linux-
> kernel@vger.kernel.org>; Mario Limonciello <mario.limonciello@amd.com>;
> Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Subject: Re: [PATCH] intel: thermal: PCH: Drop ACPI_FADT_LOW_POWER_S0
> check
> Importance: High
> 
> On Sun, Jul 17, 2022 at 8:14 AM Zhang Rui <rui.zhang@intel.com> wrote:
> >
> > On Thu, 2022-07-14 at 21:11 +0200, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > >
> > > If ACPI_FADT_LOW_POWER_S0 is not set, this doesn't mean that low-
> > > power
> > > S0 idle is not usable.  It merely means that using S3 on the given
> > > system is more beneficial from the energy saving perspective than
> > > using low-power S0 idle, as long as S3 is supported.
> >
> > Agreed.
> >
> > >
> > > Suspend-to-idle is still a valid suspend mode if
> > > ACPI_FADT_LOW_POWER_S0
> > > is not set and the pm_suspend_via_firmware() check in
> > > pch_wpt_suspend()
> > > is sufficient to distinguish suspend-to-idle from S3, so drop the
> > > confusing ACPI_FADT_LOW_POWER_S0 check.
> >
> > the cooling delay in the suspend callback is to make sure PCH
> > temperature won't block S0ix during s2idle. So if S0ix is not
> > supported, it is meaningless to invoke the cooling delay during s2idle.
> 
> But there is no way to determine whether or not S0ix is supported.  In
> particular, ACPI_FADT_LOW_POWER_S0 is not one.
> 
> > so the problem is that we don't have an indicator for S0ix capability.
> > And this also applies to drivers/rtc/rtc-cmos.c, where we use ACPI SCI
> > for runtime RTC wakeup instead of HPET interrupt on "S0ix capable"
> > platforms because the HPET timer may block S0ix.
> 
> "S0ix capable" doesn't matter.  What matters is whether or not the current
> transition under way is into S0 or into suspend-to-idle.  In the latter case
> there is no reason to avoid doing whatever is done in the expectation that
> S0ix may be entered going forward.

Okay. It is not perfect but we have to live with this.

Acked-by: Zhang Rui <rui.zhang@intel.com>

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Vladimir Oltean @ 2022-07-21 21:36 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <YtnBmFm8Jhokgp7Q@shell.armlinux.org.uk>

On Thu, Jul 21, 2022 at 10:14:00PM +0100, Russell King (Oracle) wrote:
> > > So currently we try to enable C37 AN in 2500base-x mode, although
> > > the standard says that it shouldn't be there, and it shouldn't be there
> > > presumably because they want it to work with C73 AN.
> > > 
> > > I don't know how to solve this issue. Maybe declare a new PHY interface
> > > mode constant, 2500base-x-no-c37-an ?
> > 
> > So this is essentially what I'm asking, and you didn't necessarily fully
> > answer. I take it that there exist Marvell switches which enable in-band
> > autoneg for 2500base-x and switches which don't, and managed = "in-band-status"
> > has nothing to do with that decision. Right?
> 
> I think we're getting a little too het up over this.

No, I think it's relevant to this patch set.

> We have 1000base-X where, when we're not using in-band-status, we don't
> use autoneg (some drivers that weren't caught in review annoyingly do
> still use autoneg, but they shouldn't). We ignore the ethtool autoneg
> bit.
> 
> We also have 1000base-X where we're using in-band-status, and we then
> respect the ethtool autoneg bit.
> 
> So, wouldn't it be logical if 2500base-X were implemented the same way,
> and on setups where 2500base-X does not support clause 37 AN, we
> clear the ethtool autoneg bit? If we have 2500base-X being used as the
> media link, surely this is the right behaviour?

The ethtool autoneg bit is only relevant when the PCS is the last thing
before the medium. But if the SERDES protocol connects the MAC to the PHY,
or the MAC to another MAC (such as the case here, CPU or DSA ports),
there won't be any ethtool bit to take into consideration, and that's
where my question is. Is there any expected correlation between enabling
in-band autoneg and the presence or absence of managed = "in-band-status"?

> (This has implications for the rate adaption case, since the 2500base-X
> link is not the media, and therefore the state of the autoneg bit
> shouldn't apply to the 2500base-X link.)

This is closer to what I was interested in knowing, but still not that.

^ permalink raw reply

* Re: [PATCH v2 01/29] ACPI: video: Add acpi_video_backlight_use_native() helper
From: Daniel Dadap @ 2022-07-21 21:30 UTC (permalink / raw)
  To: Hans de Goede, Ben Skeggs, Karol Herbst, Lyude, Maarten Lankhorst,
	Maxime Ripard, Thomas Zimmermann, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Alex Deucher, Christian König,
	Xinhui, Rafael J . Wysocki, Mika Westerberg, Lukas Wunner,
	Mark Gross, Andy Shevchenko
  Cc: nouveau, Daniel Vetter, David Airlie, intel-gfx, dri-devel,
	amd-gfx, Len Brown, linux-acpi, platform-driver-x86
In-Reply-To: <641cb059-48f5-5f05-5ec2-610f1215391c@nvidia.com>


On 7/21/22 16:24, Daniel Dadap wrote:
>
> On 7/12/22 14:38, Hans de Goede wrote:
>> ATM on x86 laptops where we want userspace to use the acpi_video 
>> backlight
>> device we often register both the GPU's native backlight device and
>> acpi_video's firmware acpi_video# backlight device. This relies on
>> userspace preferring firmware type backlight devices over native 
>> ones, but
>> registering 2 backlight devices for a single display really is 
>> undesirable.
>>
>> On x86 laptops where the native GPU backlight device should be used,
>> the registering of other backlight devices is avoided by their drivers
>> using acpi_video_get_backlight_type() and only registering their 
>> backlight
>> if the return value matches their type.
>>
>> acpi_video_get_backlight_type() uses
>> backlight_device_get_by_type(BACKLIGHT_RAW) to determine if a native
>> driver is available and will never return native if this returns
>> false. This means that the GPU's native backlight registering code
>> cannot just call acpi_video_get_backlight_type() to determine if it
>> should register its backlight, since acpi_video_get_backlight_type() 
>> will
>> never return native until the native backlight has already registered.
>>
>> To fix this add a new internal native function parameter to
>> acpi_video_get_backlight_type(), which when set to true will make
>> acpi_video_get_backlight_type() behave as if a native backlight has
>> already been registered.
>>
>> And add a new acpi_video_backlight_use_native() helper, which sets this
>> to true, for use in native GPU backlight code.
>>
>> Changes in v2:
>> - Replace adding a native parameter to 
>> acpi_video_get_backlight_type() with
>>    adding a new acpi_video_backlight_use_native() helper.
>>
>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
>> ---
>>   drivers/acpi/video_detect.c | 24 ++++++++++++++++++++----
>>   include/acpi/video.h        |  5 +++++
>>   2 files changed, 25 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
>> index becc198e4c22..4346c990022d 100644
>> --- a/drivers/acpi/video_detect.c
>> +++ b/drivers/acpi/video_detect.c
>> @@ -17,8 +17,9 @@
>>    * Otherwise vendor specific drivers like thinkpad_acpi, asus-laptop,
>>    * sony_acpi,... can take care about backlight brightness.
>>    *
>> - * Backlight drivers can use acpi_video_get_backlight_type() to 
>> determine
>> - * which driver should handle the backlight.
>> + * Backlight drivers can use acpi_video_get_backlight_type() to 
>> determine which
>> + * driver should handle the backlight. RAW/GPU-driver backlight 
>> drivers must
>> + * use the acpi_video_backlight_use_native() helper for this.
>>    *
>>    * If CONFIG_ACPI_VIDEO is neither set as "compiled in" (y) nor as 
>> a module (m)
>>    * this file will not be compiled and 
>> acpi_video_get_backlight_type() will
>> @@ -548,9 +549,10 @@ static int acpi_video_backlight_notify(struct 
>> notifier_block *nb,
>>    * Arguably the native on win8 check should be done first, but that 
>> would
>>    * be a behavior change, which may causes issues.
>>    */
>> -enum acpi_backlight_type acpi_video_get_backlight_type(void)
>> +static enum acpi_backlight_type __acpi_video_get_backlight_type(bool 
>> native)
>>   {
>>       static DEFINE_MUTEX(init_mutex);
>> +    static bool native_available;
>>       static bool init_done;
>>       static long video_caps;
>>   @@ -570,6 +572,8 @@ enum acpi_backlight_type 
>> acpi_video_get_backlight_type(void)
>>               backlight_notifier_registered = true;
>>           init_done = true;
>>       }
>> +    if (native)
>> +        native_available = true;
>>       mutex_unlock(&init_mutex);
>>         if (acpi_backlight_cmdline != acpi_backlight_undef)
>> @@ -581,13 +585,25 @@ enum acpi_backlight_type 
>> acpi_video_get_backlight_type(void)
>>       if (!(video_caps & ACPI_VIDEO_BACKLIGHT))
>>           return acpi_backlight_vendor;
>>   -    if (acpi_osi_is_win8() && 
>> backlight_device_get_by_type(BACKLIGHT_RAW))
>> +    if (acpi_osi_is_win8() &&
>> +        (native_available || 
>> backlight_device_get_by_type(BACKLIGHT_RAW)))
>>           return acpi_backlight_native;
>>         return acpi_backlight_video;
>
>
> So I ran into a minor problem when testing the NVIDIA proprietary 
> driver against this change set, after checking 
> acpi_video_backlight_use_native() before registering the NVIDIA 
> proprietary driver's backlight handler. Namely, for the case where a 
> user installs the NVIDIA proprietary driver after the video.ko has 
> already registered its backlight handler, we end up with both the 
> firmware and native handlers registered simultaneously, since the ACPI 
> video driver no longer unregisters its backlight handler. In this 
> state, desktop environments end up preferring the registered but 
> non-functional firmware handler from video.ko. (Manually twiddling the 
> sysfs interface for the native NVIDIA handler works fine.) When 
> rebooting the system after installing the NVIDIA proprietary driver, 
> it is able to register its native handler before the delayed work to 
> register the ACPI video backlight handler fires, so we end up with 
> only one (native) handler, and userspace is happy.
>
> Maybe this will be moot later on, when the existing sysfs interface is 
> deprecated, and it probably isn't a huge deal, since a reboot fixes 
> things (I imagine installing an in-tree DRM/KMS driver on an already 
> running kernel isn't really a thing, which is why this isn't a problem 
> with the in-tree drivers), but would it make sense to unregister the 
> ACPI video backlight handler here before returning 
> acpi_backlight_native? That way, we'll briefly end up with zero 
> backlight handlers rather than briefly ending up with two of them. Not 
> sure if that's really any better, though.
>

Thinking about this a little more, maybe it's better not to overly 
complicate things, and just assert that users of the NVIDIA proprietary 
driver will need to reboot after installation in order to get the 
backlight working, at least until we get further along in this effort 
and the backlight interface transitions to the DRM connector property 
you have proposed.


>
>>   }
>> +
>> +enum acpi_backlight_type acpi_video_get_backlight_type(void)
>> +{
>> +    return __acpi_video_get_backlight_type(false);
>> +}
>>   EXPORT_SYMBOL(acpi_video_get_backlight_type);
>>   +bool acpi_video_backlight_use_native(void)
>> +{
>> +    return __acpi_video_get_backlight_type(true) == 
>> acpi_backlight_native;
>> +}
>> +EXPORT_SYMBOL(acpi_video_backlight_use_native);
>> +
>>   /*
>>    * Set the preferred backlight interface type based on DMI info.
>>    * This function allows DMI blacklists to be implemented by external
>> diff --git a/include/acpi/video.h b/include/acpi/video.h
>> index db8548ff03ce..4705e339c252 100644
>> --- a/include/acpi/video.h
>> +++ b/include/acpi/video.h
>> @@ -56,6 +56,7 @@ extern void acpi_video_unregister(void);
>>   extern int acpi_video_get_edid(struct acpi_device *device, int type,
>>                      int device_id, void **edid);
>>   extern enum acpi_backlight_type acpi_video_get_backlight_type(void);
>> +extern bool acpi_video_backlight_use_native(void);
>>   extern void acpi_video_set_dmi_backlight_type(enum 
>> acpi_backlight_type type);
>>   /*
>>    * Note: The value returned by 
>> acpi_video_handles_brightness_key_presses()
>> @@ -77,6 +78,10 @@ static inline enum acpi_backlight_type 
>> acpi_video_get_backlight_type(void)
>>   {
>>       return acpi_backlight_vendor;
>>   }
>> +static inline bool acpi_video_backlight_use_native(void)
>> +{
>> +    return true;
>> +}
>>   static inline void acpi_video_set_dmi_backlight_type(enum 
>> acpi_backlight_type type)
>>   {
>>   }

^ permalink raw reply

* Re: [PATCH v2 01/29] ACPI: video: Add acpi_video_backlight_use_native() helper
From: Daniel Dadap @ 2022-07-21 21:24 UTC (permalink / raw)
  To: Hans de Goede, Ben Skeggs, Karol Herbst, Lyude, Maarten Lankhorst,
	Maxime Ripard, Thomas Zimmermann, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Alex Deucher, Christian König,
	Xinhui, Rafael J . Wysocki, Mika Westerberg, Lukas Wunner,
	Mark Gross, Andy Shevchenko
  Cc: nouveau, Daniel Vetter, David Airlie, intel-gfx, dri-devel,
	amd-gfx, Len Brown, linux-acpi, platform-driver-x86
In-Reply-To: <20220712193910.439171-2-hdegoede@redhat.com>


On 7/12/22 14:38, Hans de Goede wrote:
> ATM on x86 laptops where we want userspace to use the acpi_video backlight
> device we often register both the GPU's native backlight device and
> acpi_video's firmware acpi_video# backlight device. This relies on
> userspace preferring firmware type backlight devices over native ones, but
> registering 2 backlight devices for a single display really is undesirable.
>
> On x86 laptops where the native GPU backlight device should be used,
> the registering of other backlight devices is avoided by their drivers
> using acpi_video_get_backlight_type() and only registering their backlight
> if the return value matches their type.
>
> acpi_video_get_backlight_type() uses
> backlight_device_get_by_type(BACKLIGHT_RAW) to determine if a native
> driver is available and will never return native if this returns
> false. This means that the GPU's native backlight registering code
> cannot just call acpi_video_get_backlight_type() to determine if it
> should register its backlight, since acpi_video_get_backlight_type() will
> never return native until the native backlight has already registered.
>
> To fix this add a new internal native function parameter to
> acpi_video_get_backlight_type(), which when set to true will make
> acpi_video_get_backlight_type() behave as if a native backlight has
> already been registered.
>
> And add a new acpi_video_backlight_use_native() helper, which sets this
> to true, for use in native GPU backlight code.
>
> Changes in v2:
> - Replace adding a native parameter to acpi_video_get_backlight_type() with
>    adding a new acpi_video_backlight_use_native() helper.
>
> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> ---
>   drivers/acpi/video_detect.c | 24 ++++++++++++++++++++----
>   include/acpi/video.h        |  5 +++++
>   2 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
> index becc198e4c22..4346c990022d 100644
> --- a/drivers/acpi/video_detect.c
> +++ b/drivers/acpi/video_detect.c
> @@ -17,8 +17,9 @@
>    * Otherwise vendor specific drivers like thinkpad_acpi, asus-laptop,
>    * sony_acpi,... can take care about backlight brightness.
>    *
> - * Backlight drivers can use acpi_video_get_backlight_type() to determine
> - * which driver should handle the backlight.
> + * Backlight drivers can use acpi_video_get_backlight_type() to determine which
> + * driver should handle the backlight. RAW/GPU-driver backlight drivers must
> + * use the acpi_video_backlight_use_native() helper for this.
>    *
>    * If CONFIG_ACPI_VIDEO is neither set as "compiled in" (y) nor as a module (m)
>    * this file will not be compiled and acpi_video_get_backlight_type() will
> @@ -548,9 +549,10 @@ static int acpi_video_backlight_notify(struct notifier_block *nb,
>    * Arguably the native on win8 check should be done first, but that would
>    * be a behavior change, which may causes issues.
>    */
> -enum acpi_backlight_type acpi_video_get_backlight_type(void)
> +static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
>   {
>   	static DEFINE_MUTEX(init_mutex);
> +	static bool native_available;
>   	static bool init_done;
>   	static long video_caps;
>   
> @@ -570,6 +572,8 @@ enum acpi_backlight_type acpi_video_get_backlight_type(void)
>   			backlight_notifier_registered = true;
>   		init_done = true;
>   	}
> +	if (native)
> +		native_available = true;
>   	mutex_unlock(&init_mutex);
>   
>   	if (acpi_backlight_cmdline != acpi_backlight_undef)
> @@ -581,13 +585,25 @@ enum acpi_backlight_type acpi_video_get_backlight_type(void)
>   	if (!(video_caps & ACPI_VIDEO_BACKLIGHT))
>   		return acpi_backlight_vendor;
>   
> -	if (acpi_osi_is_win8() && backlight_device_get_by_type(BACKLIGHT_RAW))
> +	if (acpi_osi_is_win8() &&
> +	    (native_available || backlight_device_get_by_type(BACKLIGHT_RAW)))
>   		return acpi_backlight_native;
>   
>   	return acpi_backlight_video;


So I ran into a minor problem when testing the NVIDIA proprietary driver 
against this change set, after checking 
acpi_video_backlight_use_native() before registering the NVIDIA 
proprietary driver's backlight handler. Namely, for the case where a 
user installs the NVIDIA proprietary driver after the video.ko has 
already registered its backlight handler, we end up with both the 
firmware and native handlers registered simultaneously, since the ACPI 
video driver no longer unregisters its backlight handler. In this state, 
desktop environments end up preferring the registered but non-functional 
firmware handler from video.ko. (Manually twiddling the sysfs interface 
for the native NVIDIA handler works fine.) When rebooting the system 
after installing the NVIDIA proprietary driver, it is able to register 
its native handler before the delayed work to register the ACPI video 
backlight handler fires, so we end up with only one (native) handler, 
and userspace is happy.

Maybe this will be moot later on, when the existing sysfs interface is 
deprecated, and it probably isn't a huge deal, since a reboot fixes 
things (I imagine installing an in-tree DRM/KMS driver on an already 
running kernel isn't really a thing, which is why this isn't a problem 
with the in-tree drivers), but would it make sense to unregister the 
ACPI video backlight handler here before returning 
acpi_backlight_native? That way, we'll briefly end up with zero 
backlight handlers rather than briefly ending up with two of them. Not 
sure if that's really any better, though.


>   }
> +
> +enum acpi_backlight_type acpi_video_get_backlight_type(void)
> +{
> +	return __acpi_video_get_backlight_type(false);
> +}
>   EXPORT_SYMBOL(acpi_video_get_backlight_type);
>   
> +bool acpi_video_backlight_use_native(void)
> +{
> +	return __acpi_video_get_backlight_type(true) == acpi_backlight_native;
> +}
> +EXPORT_SYMBOL(acpi_video_backlight_use_native);
> +
>   /*
>    * Set the preferred backlight interface type based on DMI info.
>    * This function allows DMI blacklists to be implemented by external
> diff --git a/include/acpi/video.h b/include/acpi/video.h
> index db8548ff03ce..4705e339c252 100644
> --- a/include/acpi/video.h
> +++ b/include/acpi/video.h
> @@ -56,6 +56,7 @@ extern void acpi_video_unregister(void);
>   extern int acpi_video_get_edid(struct acpi_device *device, int type,
>   			       int device_id, void **edid);
>   extern enum acpi_backlight_type acpi_video_get_backlight_type(void);
> +extern bool acpi_video_backlight_use_native(void);
>   extern void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type);
>   /*
>    * Note: The value returned by acpi_video_handles_brightness_key_presses()
> @@ -77,6 +78,10 @@ static inline enum acpi_backlight_type acpi_video_get_backlight_type(void)
>   {
>   	return acpi_backlight_vendor;
>   }
> +static inline bool acpi_video_backlight_use_native(void)
> +{
> +	return true;
> +}
>   static inline void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
>   {
>   }

^ permalink raw reply

* Re: [PATCH net-next 3/6] net: dsa: add support for retrieving the interface mode
From: Russell King (Oracle) @ 2022-07-21 21:14 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Marek Behún, Andrew Lunn, Heiner Kallweit, Alexandre Belloni,
	Alvin __ipraga, Andy Shevchenko, Claudiu Manoil, Daniel Scally,
	David S. Miller, DENG Qingfang, Eric Dumazet, Florian Fainelli,
	George McCollister, Greg Kroah-Hartman, Hauke Mehrtens,
	Heikki Krogerus, Jakub Kicinski, Kurt Kanzenbach, Landen Chao,
	Linus Walleij, linux-acpi, linux-arm-kernel, linux-mediatek,
	Matthias Brugger, netdev, Paolo Abeni, Rafael J. Wysocki,
	Sakari Ailus, Sean Wang, UNGLinuxDriver, Vivien Didelot,
	Woojung Huh
In-Reply-To: <20220721182216.z4vdaj4zfb6w3emo@skbuf>

On Thu, Jul 21, 2022 at 09:22:16PM +0300, Vladimir Oltean wrote:
> On Thu, Jul 21, 2022 at 07:21:45PM +0200, Marek Behún wrote:
> > And then came 6373X switch, which didn't support clause 37 inband AN in
> > 2500base-x mode (the AN reigster returned 0xffff or something when
> > 2500base-x CMODE was set). Maybe 6373X finally supports clause 73 AN
> > (I don't know, but I don't think so) and that is the reason they now
> > forbid clause 37 AN in HW in 2500base-x.
> > 
> > But the problem is that by this time there is software out there then
> > expects 2500base-x to have clause 37 AN enabled. Indeed a passive SFP
> > cable did not work between MOX' SFP port and CN9130-CRB's SFP port
> > when used with Peridot (6190), if C37 AN was disabled on 6393x and left
> > enabled on Peridot.
> > 
> > I managed to work out how to enable C37 AN on 6393x:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=163000dbc772c1eae9bdfe7c8fe30155db1efd74
> > 
> > So currently we try to enable C37 AN in 2500base-x mode, although
> > the standard says that it shouldn't be there, and it shouldn't be there
> > presumably because they want it to work with C73 AN.
> > 
> > I don't know how to solve this issue. Maybe declare a new PHY interface
> > mode constant, 2500base-x-no-c37-an ?
> 
> So this is essentially what I'm asking, and you didn't necessarily fully
> answer. I take it that there exist Marvell switches which enable in-band
> autoneg for 2500base-x and switches which don't, and managed = "in-band-status"
> has nothing to do with that decision. Right?

I think we're getting a little too het up over this.

We have 1000base-X where, when we're not using in-band-status, we don't
use autoneg (some drivers that weren't caught in review annoyingly do
still use autoneg, but they shouldn't). We ignore the ethtool autoneg
bit.

We also have 1000base-X where we're using in-band-status, and we then
respect the ethtool autoneg bit.

So, wouldn't it be logical if 2500base-X were implemented the same way,
and on setups where 2500base-X does not support clause 37 AN, we
clear the ethtool autoneg bit? If we have 2500base-X being used as the
media link, surely this is the right behaviour?

(This has implications for the rate adaption case, since the 2500base-X
link is not the media, and therefore the state of the autoneg bit
shouldn't apply to the 2500base-X link.)

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH] ACPI: PM: x86: Print messages regarding LPS0 idle support
From: Limonciello, Mario @ 2022-07-21 19:55 UTC (permalink / raw)
  To: Rafael J. Wysocki, Linux ACPI
  Cc: LKML, Linux PM, Zhang Rui, Srinivas Pandruvada, Len Brown
In-Reply-To: <12039470.O9o76ZdvQC@kreacher>

On 7/21/2022 11:13, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> Because suspend-to-idle is always supported and on x86 it is the only
> way to suspend the system if S3 is not supported by the platform, the
> kernel attempts to enter low-power S0 idle in the suspend-to-idle flow
> regardless of whether or not the ACPI_FADT_LOW_POWER_S0 flag is set in
> the FADT.  However, if that flag is not set, residency counters
> associated with low-power S0 idle may not count and the platform may
> refuse to put the EC into a low-power mode, for example.
> 
> For this reason, print diagnostic messages when the platform should
> achieve significant energy savings in low-power S0 idle (because the
> ACPI_FADT_LOW_POWER_S0 flag is set in the FADT) and when
> suspend-to-idle becomes the default suspend method (because low-power
> S0 idle should be equally or more efficient than S3, if available).
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>

> ---
>   drivers/acpi/sleep.c      |    3 +++
>   drivers/acpi/x86/s2idle.c |    4 +++-
>   2 files changed, 6 insertions(+), 1 deletion(-)
> 
> Index: linux-pm/drivers/acpi/sleep.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/sleep.c
> +++ linux-pm/drivers/acpi/sleep.c
> @@ -824,6 +824,9 @@ static const struct platform_s2idle_ops
>   
>   void __weak acpi_s2idle_setup(void)
>   {
> +	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
> +		pr_info("Efficient low-power S0 idle declared\n");
> +
>   	s2idle_set_ops(&acpi_s2idle_ops);
>   }
>   
> Index: linux-pm/drivers/acpi/x86/s2idle.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/x86/s2idle.c
> +++ linux-pm/drivers/acpi/x86/s2idle.c
> @@ -423,8 +423,10 @@ static int lps0_device_attach(struct acp
>   	 * line.
>   	 */
>   	if ((acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) &&
> -	    mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3)
> +	    mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3) {
>   		mem_sleep_current = PM_SUSPEND_TO_IDLE;
> +		pr_info("Low-power S0 idle used by default for system suspend\n");
> +	}
>   
>   	/*
>   	 * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the
> 
> 
> 


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox