Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/9] openrisc: Don't pull in all of linux/bitops.h in asm/cmpxchg.h
From: Will Deacon @ 2018-05-24 10:59 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527159586-8578-1-git-send-email-will.deacon@arm.com>

The openrisc implementation of asm/cmpxchg.h pulls in linux/bitops.h
so that it can refer to BITS_PER_BYTE. It also transitively relies on
this pulling in linux/compiler.h for READ_ONCE.

Replace the #include with linux/bits.h and linux/compiler.h

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/openrisc/include/asm/cmpxchg.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index d29f7db53906..f9cd43a39d72 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -16,8 +16,9 @@
 #ifndef __ASM_OPENRISC_CMPXCHG_H
 #define __ASM_OPENRISC_CMPXCHG_H
 
+#include  <linux/bits.h>
+#include  <linux/compiler.h>
 #include  <linux/types.h>
-#include  <linux/bitops.h>
 
 #define __HAVE_ARCH_CMPXCHG 1
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 3/9] asm-generic: Move some macros from linux/bitops.h to a new bits.h file
From: Will Deacon @ 2018-05-24 10:59 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527159586-8578-1-git-send-email-will.deacon@arm.com>

In preparation for implementing the asm-generic atomic bitops in terms
of atomic_long_*, we need to prevent asm/atomic.h implementations from
pulling in linux/bitops.h. A common reason for this include is for the
BITS_PER_BYTE definition, so move this and some other BIT and masking
macros into a new header file, linux/bits.h

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/bitops.h | 22 +---------------------
 include/linux/bits.h   | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/bits.h

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 4cac4e1a72ff..af419012d77d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,29 +2,9 @@
 #ifndef _LINUX_BITOPS_H
 #define _LINUX_BITOPS_H
 #include <asm/types.h>
+#include <linux/bits.h>
 
-#ifdef	__KERNEL__
-#define BIT(nr)			(1UL << (nr))
-#define BIT_ULL(nr)		(1ULL << (nr))
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
-#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
-#define BITS_PER_BYTE		8
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-#endif
-
-/*
- * Create a contiguous bitmask starting@bit position @l and ending at
- * position @h. For example
- * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
- */
-#define GENMASK(h, l) \
-	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
-
-#define GENMASK_ULL(h, l) \
-	(((~0ULL) - (1ULL << (l)) + 1) & \
-	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/bits.h b/include/linux/bits.h
new file mode 100644
index 000000000000..2b7b532c1d51
--- /dev/null
+++ b/include/linux/bits.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITS_H
+#define __LINUX_BITS_H
+#include <asm/bitsperlong.h>
+
+#define BIT(nr)			(1UL << (nr))
+#define BIT_ULL(nr)		(1ULL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
+#define BITS_PER_BYTE		8
+
+/*
+ * Create a contiguous bitmask starting@bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l) \
+	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif	/* __LINUX_BITS_H */
-- 
2.1.4

^ permalink raw reply related

* [PATCH 2/9] m68k: Don't use asm-generic/bitops/lock.h
From: Will Deacon @ 2018-05-24 10:59 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527159586-8578-1-git-send-email-will.deacon@arm.com>

asm-generic/bitops/lock.h is shortly going to be built on top of the
atomic_long_* API, which introduces a nasty circular dependency for
m68k where linux/atomic.h pulls in linux/bitops.h via:

	linux/atomic.h
	asm/atomic.h
	linux/irqflags.h
	asm/irqflags.h
	linux/preempt.h
	asm/preempt.h
	asm-generic/preempt.h
	linux/thread_info.h
	asm/thread_info.h
	asm/page.h
	asm-generic/getorder.h
	linux/log2.h
	linux/bitops.h

Since m68k isn't SMP and doesn't support ACQUIRE/RELEASE barriers, we
can just define the lock bitops in terms of the atomic bitops in the
asm/bitops.h header.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/m68k/include/asm/bitops.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 93b47b1f6fb4..18193419f97d 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -515,12 +515,16 @@ static inline int __fls(int x)
 
 #endif
 
+/* Simple test-and-set bit locks */
+#define test_and_set_bit_lock	test_and_set_bit
+#define clear_bit_unlock	clear_bit
+#define __clear_bit_unlock	clear_bit_unlock
+
 #include <asm-generic/bitops/ext2-atomic.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
 #endif /* __KERNEL__ */
 
 #endif /* _M68K_BITOPS_H */
-- 
2.1.4

^ permalink raw reply related

* [PATCH 1/9] h8300: Don't include linux/kernel.h in asm/atomic.h
From: Will Deacon @ 2018-05-24 10:59 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527159586-8578-1-git-send-email-will.deacon@arm.com>

linux/kernel.h isn't needed by asm/atomic.h and will result in circular
dependencies when the asm-generic atomic bitops are built around the
tomic_long_t interface.

Remove the broad include and replace it with linux/compiler.h for
READ_ONCE etc and asm/irqflags.h for arch_local_irq_save etc.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/h8300/include/asm/atomic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 941e7554e886..b174dec099bf 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -2,8 +2,10 @@
 #ifndef __ARCH_H8300_ATOMIC__
 #define __ARCH_H8300_ATOMIC__
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/cmpxchg.h>
+#include <asm/irqflags.h>
 
 /*
  * Atomic operations that C can't guarantee us.  Useful for
@@ -15,8 +17,6 @@
 #define atomic_read(v)		READ_ONCE((v)->counter)
 #define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
-#include <linux/kernel.h>
-
 #define ATOMIC_OP_RETURN(op, c_op)				\
 static inline int atomic_##op##_return(int i, atomic_t *v)	\
 {								\
-- 
2.1.4

^ permalink raw reply related

* [PATCH 0/9] Rewrite asm-generic/bitops/{atomic, lock}.h and use on arm64
From: Will Deacon @ 2018-05-24 10:59 UTC (permalink / raw)
  To: linux-arm-kernel

Hi all,

This patch series has previously been posted in RFC form here:

  RFCv1: https://www.spinics.net/lists/arm-kernel/msg634719.html
  RFCv2: https://www.spinics.net/lists/arm-kernel/msg636875.html

Changes since RFCv2 include:

  * Rebased onto v4.17-rc4, which allowed me to drop some patches from
    the series which were merged in 4.16.

  * Moved bit.h to be linux/bit.h instead of asm-generic/bit.h

Thanks,

Will

--->8

Will Deacon (9):
  h8300: Don't include linux/kernel.h in asm/atomic.h
  m68k: Don't use asm-generic/bitops/lock.h
  asm-generic: Move some macros from linux/bitops.h to a new bits.h file
  openrisc: Don't pull in all of linux/bitops.h in asm/cmpxchg.h
  sh: Don't pull in all of linux/bitops.h in asm/cmpxchg-xchg.h
  asm-generic/bitops/atomic.h: Rewrite using atomic_fetch_*
  asm-generic/bitops/lock.h: Rewrite using atomic_fetch_*
  arm64: Replace our atomic/lock bitop implementations with asm-generic
  arm64: bitops: Include <asm-generic/bitops/ext2-atomic-setbit.h>

 arch/arm64/include/asm/bitops.h     |  21 +---
 arch/arm64/lib/Makefile             |   2 +-
 arch/arm64/lib/bitops.S             |  76 ---------------
 arch/h8300/include/asm/atomic.h     |   4 +-
 arch/m68k/include/asm/bitops.h      |   6 +-
 arch/openrisc/include/asm/cmpxchg.h |   3 +-
 arch/sh/include/asm/cmpxchg-xchg.h  |   3 +-
 include/asm-generic/bitops/atomic.h | 188 +++++++-----------------------------
 include/asm-generic/bitops/lock.h   |  68 ++++++++++---
 include/linux/bitops.h              |  22 +----
 include/linux/bits.h                |  26 +++++
 11 files changed, 131 insertions(+), 288 deletions(-)
 delete mode 100644 arch/arm64/lib/bitops.S
 create mode 100644 include/linux/bits.h

-- 
2.1.4

^ permalink raw reply

* [PATCH 01/14] arm/arm64: smccc: Add SMCCC-specific return codes
From: Mark Rutland @ 2018-05-24 10:55 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180522150648.28297-2-marc.zyngier@arm.com>

On Tue, May 22, 2018 at 04:06:35PM +0100, Marc Zyngier wrote:
> We've so far used the PSCI return codes for SMCCC because they
> were extremely similar. But with the new ARM DEN 0070A specification,
> "NOT_REQUIRED" (-2) is clashing with PSCI's "PSCI_RET_INVALID_PARAMS".
> 
> Let's bite the bullet and add SMCCC specific return codes. Users
> can be repainted as and when required.
> 
> Acked-by: Will Deacon <will.deacon@arm.com>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

Reviewed-by: Mark Rutland <mark.rutland@arm.com>

Mark.

> ---
>  include/linux/arm-smccc.h | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
> index a031897fca76..c89da86de99f 100644
> --- a/include/linux/arm-smccc.h
> +++ b/include/linux/arm-smccc.h
> @@ -291,5 +291,10 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
>   */
>  #define arm_smccc_1_1_hvc(...)	__arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
>  
> +/* Return codes defined in ARM DEN 0070A */
> +#define SMCCC_RET_SUCCESS			0
> +#define SMCCC_RET_NOT_SUPPORTED			-1
> +#define SMCCC_RET_NOT_REQUIRED			-2
> +
>  #endif /*__ASSEMBLY__*/
>  #endif /*__LINUX_ARM_SMCCC_H*/
> -- 
> 2.14.2
> 
> _______________________________________________
> kvmarm mailing list
> kvmarm at lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply

* [PATCH 02/14] arm64: Call ARCH_WORKAROUND_2 on transitions between EL0 and EL1
From: Mark Rutland @ 2018-05-24 10:52 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <0b03ce12-510e-464b-63de-cd481d7cb1c5@arm.com>

On Wed, May 23, 2018 at 10:23:20AM +0100, Julien Grall wrote:
> Hi Marc,
> 
> On 05/22/2018 04:06 PM, Marc Zyngier wrote:
> > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> > index ec2ee720e33e..f33e6aed3037 100644
> > --- a/arch/arm64/kernel/entry.S
> > +++ b/arch/arm64/kernel/entry.S
> > @@ -18,6 +18,7 @@
> >    * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> >    */
> > +#include <linux/arm-smccc.h>
> >   #include <linux/init.h>
> >   #include <linux/linkage.h>
> > @@ -137,6 +138,18 @@ alternative_else_nop_endif
> >   	add	\dst, \dst, #(\sym - .entry.tramp.text)
> >   	.endm
> > +	// This macro corrupts x0-x3. It is the caller's duty
> > +	// to save/restore them if required.
> 
> NIT: Shouldn't you use /* ... */ for multi-line comments?

There's no requirement to do so, and IIRC even Torvalds prefers '//'
comments for multi-line things these days.

Mark.

^ permalink raw reply

* [PATCH v4 1/2] arm64/sve: Thin out initialisation sanity-checks for sve_max_vl
From: Will Deacon @ 2018-05-24 10:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527097616-25214-2-git-send-email-Dave.Martin@arm.com>

On Wed, May 23, 2018 at 06:46:55PM +0100, Dave Martin wrote:
> Now that the kernel SVE support is reasonably mature, it is
> excessive to default sve_max_vl to the invalid value -1 and then
> sprinkle WARN_ON()s around the place to make sure it has been
> initialised before use.  The cpufeatures code already runs pretty
> early, and will ensure sve_max_vl gets initialised.
> 
> This patch initialises sve_max_vl to something sane that will be
> supported by every SVE implementation, and removes most of the
> sanity checks.
> 
> The checks in find_supported_vector_length() are retained for now.
> If anything goes horribly wrong, we are likely to trip a check here
> sooner or later.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> ---
>  arch/arm64/kernel/fpsimd.c | 17 ++++-------------
>  arch/arm64/kernel/ptrace.c |  3 ---
>  2 files changed, 4 insertions(+), 16 deletions(-)

Thanks, this makes the code a bit more readable imo:

Acked-by: Will Deacon <will.deacon@arm.com>

Will

^ permalink raw reply

* [PATCH v3 3/3] ARM: dts: Renesas R9A06G032 SMP enable method
From: Michel Pollet @ 2018-05-24 10:39 UTC (permalink / raw)
  To: linux-arm-kernel

Add a special enable method for the second CA7 of the R9A06G032
as well as the default value for the "cpu-release-addr" property.

Signed-off-by: Michel Pollet <michel.pollet@bp.renesas.com>
---
 arch/arm/boot/dts/r9a06g032.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/r9a06g032.dtsi b/arch/arm/boot/dts/r9a06g032.dtsi
index 9534f1b..d7b5414 100644
--- a/arch/arm/boot/dts/r9a06g032.dtsi
+++ b/arch/arm/boot/dts/r9a06g032.dtsi
@@ -30,6 +30,8 @@
 			compatible = "arm,cortex-a7";
 			reg = <1>;
 			clocks = <&sysctrl R9A06G032_DIV_CA7>;
+			enable-method = "renesas,r9a06g032-smp";
+			cpu-release-addr = <0x4000c204>;
 		};
 	};
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 2/3] arm: shmobile: Add the R9A06G032 SMP enabler driver
From: Michel Pollet @ 2018-05-24 10:30 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527157834-7747-1-git-send-email-michel.pollet@bp.renesas.com>

The Renesas R9A06G032 second CA7 is parked in a ROM pen at boot time, it
requires a special enable method to get it started.

Signed-off-by: Michel Pollet <michel.pollet@bp.renesas.com>
---
 arch/arm/mach-shmobile/Makefile        |  1 +
 arch/arm/mach-shmobile/smp-r9a06g032.c | 85 ++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 arch/arm/mach-shmobile/smp-r9a06g032.c

diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index 1939f52..d7fc98f 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -34,6 +34,7 @@ smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-scu.o platsmp-scu.o
 smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o headsmp-scu.o platsmp-scu.o
 smp-$(CONFIG_ARCH_R8A7790)	+= smp-r8a7790.o
 smp-$(CONFIG_ARCH_R8A7791)	+= smp-r8a7791.o
+smp-$(CONFIG_ARCH_R9A06G032)	+= smp-r9a06g032.o
 smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o headsmp-scu.o platsmp-scu.o
 
 # PM objects
diff --git a/arch/arm/mach-shmobile/smp-r9a06g032.c b/arch/arm/mach-shmobile/smp-r9a06g032.c
new file mode 100644
index 0000000..a536e89
--- /dev/null
+++ b/arch/arm/mach-shmobile/smp-r9a06g032.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RZ/N1D Second CA7 enabler.
+ *
+ * Copyright (C) 2018 Renesas Electronics Europe Limited
+ *
+ * Michel Pollet <michel.pollet@bp.renesas.com>, <buserror@gmail.com>
+ * Derived from action,s500-smp
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/smp_plat.h>
+#include <asm/smp_scu.h>
+
+/*
+ * The second CPU is parked in ROM at boot time. It requires waking it after
+ * writing an address into the BOOTADDR register of sysctrl.
+ *
+ * So the default value of the "cpu-release-addr" corresponds to BOOTADDR...
+ *
+ * *However* the BOOTADDR register is not available when the kernel
+ * starts in NONSEC mode.
+ *
+ * So for NONSEC mode, the bootloader re-parks the second CPU into a pen
+ * in SRAM, and changes the "cpu-release-addr" of linux's DT to a SRAM address,
+ * which is not restricted.
+ */
+
+static void __iomem *cpu_bootaddr;
+
+static DEFINE_SPINLOCK(cpu_lock);
+
+static int rzn1_smp_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	if (!cpu_bootaddr)
+		return -ENODEV;
+
+	spin_lock(&cpu_lock);
+
+	writel(__pa_symbol(secondary_startup), cpu_bootaddr);
+	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+
+	spin_unlock(&cpu_lock);
+
+	return 0;
+}
+
+static void __init rzn1_smp_prepare_cpus(unsigned int max_cpus)
+{
+	struct device_node *dn;
+	int ret;
+	u32 bootaddr;
+
+	dn = of_get_cpu_node(1, NULL);
+	if (!dn) {
+		pr_err("CPU#1: missing device tree node\n");
+		return;
+	}
+	/*
+	 * Determine the address from which the CPU is polling.
+	 * The bootloader *does* change this property
+	 */
+	ret = of_property_read_u32(dn, "cpu-release-addr", &bootaddr);
+	of_node_put(dn);
+	if (ret) {
+		pr_err("CPU#1: invalid cpu-release-addr property\n");
+		return;
+	}
+	pr_info("CPU#1: cpu-release-addr %08x\n", (u32)bootaddr);
+
+	cpu_bootaddr = ioremap(bootaddr, sizeof(bootaddr));
+	if (!cpu_bootaddr)
+		pr_err("CPU#1: cpu-release-addr map failed\n");
+}
+
+static const struct smp_operations rzn1_smp_ops __initconst = {
+	.smp_prepare_cpus = rzn1_smp_prepare_cpus,
+	.smp_boot_secondary = rzn1_smp_boot_secondary,
+};
+CPU_METHOD_OF_DECLARE(rzn1_smp, "renesas,r9a06g032-smp", &rzn1_smp_ops);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 1/3] dt-bindings: cpu: Add Renesas R9A06G032 SMP enable method.
From: Michel Pollet @ 2018-05-24 10:30 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527157834-7747-1-git-send-email-michel.pollet@bp.renesas.com>

Add a special enable method for second CA7 of the R9A06G032

Signed-off-by: Michel Pollet <michel.pollet@bp.renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/cpus.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index 29e1dc5..b395d107 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -219,6 +219,7 @@ described below.
 			    "qcom,kpss-acc-v1"
 			    "qcom,kpss-acc-v2"
 			    "renesas,apmu"
+			    "renesas,r9a06g032-smp"
 			    "rockchip,rk3036-smp"
 			    "rockchip,rk3066-smp"
 			    "ste,dbx500-smp"
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 0/3] Renesas R9A06G032 SMP enabler
From: Michel Pollet @ 2018-05-24 10:30 UTC (permalink / raw)
  To: linux-arm-kernel

*WARNING -- this requires the base R9A06G032 support patches 
already posted

This patch series is for enabling the second CA7 of the R9A06G032.
It's based on a spin_table method, and it reuses the same binding
property as that driver.

v3:
  + Removed mentions of rz/?n1d?
  + Rebased on base patch v7 
v2:
  + Added suggestions from Florian Fainelli
  + Use __pa_symbol()
  + Simplified logic in prepare_cpu()
  + Reordered the patches
  + Rebased on RZN1 Base patch v5

Michel Pollet (3):
  dt-bindings: cpu: Add Renesas R9A06G032 SMP enable method.
  arm: shmobile: Add the R9A06G032 SMP enabler driver
  ARM: dts: Renesas R9A06G032 SMP enable method

 Documentation/devicetree/bindings/arm/cpus.txt |  1 +
 arch/arm/boot/dts/r9a06g032.dtsi               |  2 +
 arch/arm/mach-shmobile/Makefile                |  1 +
 arch/arm/mach-shmobile/smp-r9a06g032.c         | 85 ++++++++++++++++++++++++++
 4 files changed, 89 insertions(+)
 create mode 100644 arch/arm/mach-shmobile/smp-r9a06g032.c

-- 
2.7.4

^ permalink raw reply

* [PATCHv2] arm64: Make sure permission updates happen for pmd/pud
From: Will Deacon @ 2018-05-24 10:27 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180523184346.487-1-labbott@redhat.com>

Hi Laura,

On Wed, May 23, 2018 at 11:43:46AM -0700, Laura Abbott wrote:
> Commit 15122ee2c515 ("arm64: Enforce BBM for huge IO/VMAP mappings")
> disallowed block mappings for ioremap since that code does not honor
> break-before-make. The same APIs are also used for permission updating
> though and the extra checks prevent the permission updates from happening,
> even though this should be permitted. This results in read-only permissions
> not being fully applied. Visibly, this can occasionaly be seen as a failure
> on the built in rodata test when the test data ends up in a section or
> as an odd RW gap on the page table dump. Fix this by using
> pgattr_change_is_safe instead of p*d_present for determining if the
> change is permitted.
> 
> Reported-by: Peter Robinson <pbrobinson@gmail.com>
> Fixes: 15122ee2c515 ("arm64: Enforce BBM for huge IO/VMAP mappings")
> Signed-off-by: Laura Abbott <labbott@redhat.com>
> ---
> v2: Switch to using pgattr_change_is_safe per suggestion of Will
> ---

Thanks for re-spinning so quickly. I'll queue as a fix with the relevant
tags.

Will

^ permalink raw reply

* [PATCH v10 13/18] arm64/sve: Move sve_pffr() to fpsimd.h and make inline
From: Alex Bennée @ 2018-05-24 10:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527005119-6842-14-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> In order to make sve_save_state()/sve_load_state() more easily
> reusable and to get rid of a potential branch on context switch
> critical paths, this patch makes sve_pffr() inline and moves it to
> fpsimd.h.
>
> <asm/processor.h> must be included in fpsimd.h in order to make
> this work, and this creates an #include cycle that is tricky to
> avoid without modifying core code, due to the way the PR_SVE_*()
> prctl helpers are included in the core prctl implementation.
>
> Instead of breaking the cycle, this patch defers inclusion of
> <asm/fpsimd.h> in <asm/processor.h> until the point where it is
> actually needed: i.e., immediately before the prctl definitions.
>
> No functional change.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>
> Acked-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/include/asm/fpsimd.h    | 13 +++++++++++++
>  arch/arm64/include/asm/processor.h |  3 ++-
>  arch/arm64/kernel/fpsimd.c         | 12 ------------
>  3 files changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index fb60b22..fa92747 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -18,6 +18,8 @@
>
>  #include <asm/ptrace.h>
>  #include <asm/errno.h>
> +#include <asm/processor.h>
> +#include <asm/sigcontext.h>
>
>  #ifndef __ASSEMBLY__
>
> @@ -61,6 +63,17 @@ extern void sve_flush_cpu_state(void);
>  /* Maximum VL that SVE VL-agnostic software can transparently support */
>  #define SVE_VL_ARCH_MAX 0x100
>
> +/* Offset of FFR in the SVE register dump */
> +static inline size_t sve_ffr_offset(int vl)
> +{
> +	return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
> +}
> +
> +static inline void *sve_pffr(struct thread_struct *thread)
> +{
> +	return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
> +}
> +
>  extern void sve_save_state(void *state, u32 *pfpsr);
>  extern void sve_load_state(void const *state, u32 const *pfpsr,
>  			   unsigned long vq_minus_1);
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index f902b6d..ebaadb1 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -40,7 +40,6 @@
>
>  #include <asm/alternative.h>
>  #include <asm/cpufeature.h>
> -#include <asm/fpsimd.h>
>  #include <asm/hw_breakpoint.h>
>  #include <asm/lse.h>
>  #include <asm/pgtable-hwdef.h>
> @@ -245,6 +244,8 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused);
>  void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
>  void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
>
> +#include <asm/fpsimd.h>
> +

You really need a one-liner comment to note why the include is in a
funny place to save someone just moving it back and then getting really
confused. Maybe:

  /* included just in time to avoid circular inclusion issues */
  #include <asm/fpsimd.h>

It still seems weird to me though :-/

Otherwise:

Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>

--
Alex Benn?e

^ permalink raw reply

* [PATCH v10 10/18] KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing
From: Dave Martin @ 2018-05-24 10:18 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <877enttlfl.fsf@linaro.org>

On Thu, May 24, 2018 at 11:09:02AM +0100, Alex Benn?e wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > This patch refactors KVM to align the host and guest FPSIMD
> > save/restore logic with each other for arm64.  This reduces the
> > number of redundant save/restore operations that must occur, and
> > reduces the common-case IRQ blackout time during guest exit storms
> > by saving the host state lazily and optimising away the need to
> > restore the host state before returning to the run loop.
> >
> > Four hooks are defined in order to enable this:
> >
> >  * kvm_arch_vcpu_run_map_fp():
> >    Called on PID change to map necessary bits of current to Hyp.
> >
> >  * kvm_arch_vcpu_load_fp():
> >    Set up FP/SIMD for entering the KVM run loop (parse as
> >    "vcpu_load fp").
> >
> >  * kvm_arch_vcpu_ctxsync_fp():
> >    Get FP/SIMD into a safe state for re-enabling interrupts after a
> >    guest exit back to the run loop.
> >
> >    For arm64 specifically, this involves updating the host kernel's
> >    FPSIMD context tracking metadata so that kernel-mode NEON use
> >    will cause the vcpu's FPSIMD state to be saved back correctly
> >    into the vcpu struct.  This must be done before re-enabling
> >    interrupts because kernel-mode NEON may be used by softirqs.
> >
> >  * kvm_arch_vcpu_put_fp():
> >    Save guest FP/SIMD state back to memory and dissociate from the
> >    CPU ("vcpu_put fp").
> >
> > Also, the arm64 FPSIMD context switch code is updated to enable it
> > to save back FPSIMD state for a vcpu, not just current.  A few
> > helpers drive this:
> >
> >  * fpsimd_bind_state_to_cpu(struct user_fpsimd_state *fp):
> >    mark this CPU as having context fp (which may belong to a vcpu)
> >    currently loaded in its registers.  This is the non-task
> >    equivalent of the static function fpsimd_bind_to_cpu() in
> >    fpsimd.c.
> >
> >  * task_fpsimd_save():
> >    exported to allow KVM to save the guest's FPSIMD state back to
> >    memory on exit from the run loop.
> >
> >  * fpsimd_flush_state():
> >    invalidate any context's FPSIMD state that is currently loaded.
> >    Used to disassociate the vcpu from the CPU regs on run loop exit.
> >
> > These changes allow the run loop to enable interrupts (and thus
> > softirqs that may use kernel-mode NEON) without having to save the
> > guest's FPSIMD state eagerly.
> >
> > Some new vcpu_arch fields are added to make all this work.  Because
> > host FPSIMD state can now be saved back directly into current's
> > thread_struct as appropriate, host_cpu_context is no longer used
> > for preserving the FPSIMD state.  However, it is still needed for
> > preserving other things such as the host's system registers.  To
> > avoid ABI churn, the redundant storage space in host_cpu_context is
> > not removed for now.
> >
> > arch/arm is not addressed by this patch and continues to use its
> > current save/restore logic.  It could provide implementations of
> > the helpers later if desired.
> >
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> > Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
> > Acked-by: Catalin Marinas <catalin.marinas@arm.com>
> >
> > ---
> >
> > Reviewers note: tags retained because this delta is straightforward by
> > itself.  Please shout if you're not happy!
> >
> > Changes since v9:
> >
> >  * Remove redundant set_thread_flag(TIF_FOREIGN_FPSTATE) that is now
> >    implicit in fpsimd_flush_cpu_state().
> > ---
> >  arch/arm/include/asm/kvm_host.h   |   8 +++
> >  arch/arm64/include/asm/fpsimd.h   |   6 +++
> >  arch/arm64/include/asm/kvm_host.h |  21 ++++++++
> >  arch/arm64/kernel/fpsimd.c        |  17 ++++--
> >  arch/arm64/kvm/Kconfig            |   1 +
> >  arch/arm64/kvm/Makefile           |   2 +-
> >  arch/arm64/kvm/fpsimd.c           | 111 ++++++++++++++++++++++++++++++++++++++
> >  arch/arm64/kvm/hyp/switch.c       |  51 +++++++++---------
> >  virt/kvm/arm/arm.c                |   4 ++
> >  9 files changed, 191 insertions(+), 30 deletions(-)
> >  create mode 100644 arch/arm64/kvm/fpsimd.c
> >
> > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> > index c7c28c8..ac870b2 100644
> > --- a/arch/arm/include/asm/kvm_host.h
> > +++ b/arch/arm/include/asm/kvm_host.h
> > @@ -303,6 +303,14 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
> >  int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
> >  			       struct kvm_device_attr *attr);
> >
> > +/*
> > + * VFP/NEON switching is all done by the hyp switch code, so no need to
> > + * coordinate with host context handling for this state:
> > + */
> > +static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
> > +static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
> > +static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
> > +
> >  /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
> >  static inline void kvm_fpsimd_flush_cpu_state(void) {}
> >
> > diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> > index aa7162a..3e00f70 100644
> > --- a/arch/arm64/include/asm/fpsimd.h
> > +++ b/arch/arm64/include/asm/fpsimd.h
> > @@ -41,6 +41,8 @@ struct task_struct;
> >  extern void fpsimd_save_state(struct user_fpsimd_state *state);
> >  extern void fpsimd_load_state(struct user_fpsimd_state *state);
> >
> > +extern void fpsimd_save(void);
> > +
> >  extern void fpsimd_thread_switch(struct task_struct *next);
> >  extern void fpsimd_flush_thread(void);
> >
> > @@ -49,7 +51,11 @@ extern void fpsimd_preserve_current_state(void);
> >  extern void fpsimd_restore_current_state(void);
> >  extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
> >
> > +extern void fpsimd_bind_task_to_cpu(void);
> > +extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
> > +
> >  extern void fpsimd_flush_task_state(struct task_struct *target);
> > +extern void fpsimd_flush_cpu_state(void);
> >  extern void sve_flush_cpu_state(void);
> >
> >  /* Maximum VL that SVE VL-agnostic software can transparently support */
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 146c167..b3fe730 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -30,6 +30,7 @@
> >  #include <asm/kvm.h>
> >  #include <asm/kvm_asm.h>
> >  #include <asm/kvm_mmio.h>
> > +#include <asm/thread_info.h>
> >
> >  #define __KVM_HAVE_ARCH_INTC_INITIALIZED
> >
> > @@ -238,6 +239,10 @@ struct kvm_vcpu_arch {
> >
> >  	/* Pointer to host CPU context */
> >  	kvm_cpu_context_t *host_cpu_context;
> > +
> > +	struct thread_info *host_thread_info;	/* hyp VA */
> > +	struct user_fpsimd_state *host_fpsimd_state;	/* hyp VA */
> > +
> >  	struct {
> >  		/* {Break,watch}point registers */
> >  		struct kvm_guest_debug_arch regs;
> > @@ -295,6 +300,9 @@ struct kvm_vcpu_arch {
> >
> >  /* vcpu_arch flags field values: */
> >  #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
> > +#define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
> > +#define KVM_ARM64_FP_HOST		(1 << 2) /* host FP regs loaded
> >  */
> 
> I may be descending into bike-shedding territory here but it seems a
> little incongruous to have _ENABLED = guest FP state when we have _HOST
> for host FP state. Why not KVM_ARM64_FP_GUEST?

I thought about this, but wanted to retain the clear relationship
between the _ENABLED flag and the state of the FPSIMD trap controls.

The HOST flag has no direct relationship with trap controls, so these
seemed different enough things to justify different names, though the
inconsistency was a bit annoying.

[...]

> Minor bike-shedding aside:
> 
> Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>

Thanks.  I'll probably leave it as is, but shout if you're unhappy with
this.

Cheers
---Dave

^ permalink raw reply

* [PATCH v10 12/18] arm64/sve: Switch sve_pffr() argument from task to thread
From: Alex Bennée @ 2018-05-24 10:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527005119-6842-13-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> sve_pffr(), which is used to derive the base address used for
> low-level SVE save/restore routines, currently takes the relevant
> task_struct as an argument.
>
> The only accessed fields are actually part of thread_struct, so
> this patch changes the argument type accordingly.  This is done in
> preparation for moving this function to a header, where we do not
> want to have to include <linux/sched.h> due to the consequent
> circular #include problems.
>
> No functional change.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>
> Acked-by: Marc Zyngier <marc.zyngier@arm.com>

Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>

> ---
>  arch/arm64/kernel/fpsimd.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 5152bbc..c4e9762 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -44,6 +44,7 @@
>  #include <asm/fpsimd.h>
>  #include <asm/cpufeature.h>
>  #include <asm/cputype.h>
> +#include <asm/processor.h>
>  #include <asm/simd.h>
>  #include <asm/sigcontext.h>
>  #include <asm/sysreg.h>
> @@ -167,10 +168,9 @@ static size_t sve_ffr_offset(int vl)
>  	return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
>  }
>
> -static void *sve_pffr(struct task_struct *task)
> +static void *sve_pffr(struct thread_struct *thread)
>  {
> -	return (char *)task->thread.sve_state +
> -		sve_ffr_offset(task->thread.sve_vl);
> +	return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
>  }
>
>  static void change_cpacr(u64 val, u64 mask)
> @@ -253,7 +253,7 @@ static void task_fpsimd_load(void)
>  	WARN_ON(!in_softirq() && !irqs_disabled());
>
>  	if (system_supports_sve() && test_thread_flag(TIF_SVE))
> -		sve_load_state(sve_pffr(current),
> +		sve_load_state(sve_pffr(&current->thread),
>  			       &current->thread.uw.fpsimd_state.fpsr,
>  			       sve_vq_from_vl(current->thread.sve_vl) - 1);
>  	else
> @@ -284,7 +284,7 @@ void fpsimd_save(void)
>  				return;
>  			}
>
> -			sve_save_state(sve_pffr(current), &st->fpsr);
> +			sve_save_state(sve_pffr(&current->thread), &st->fpsr);
>  		} else
>  			fpsimd_save_state(st);
>  	}


--
Alex Benn?e

^ permalink raw reply

* [PATCH v10 11/18] arm64/sve: Move read_zcr_features() out of cpufeature.h
From: Alex Bennée @ 2018-05-24 10:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527005119-6842-12-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> Having read_zcr_features() inline in cpufeature.h results in that
> header requiring #includes which make it hard to include
> <asm/fpsimd.h> elsewhere without triggering header inclusion
> cycles.
>
> This is not a hot-path function and arguably should not be in
> cpufeature.h in the first place, so this patch moves it to
> fpsimd.c, compiled conditionally if CONFIG_ARM64_SVE=y.
>
> This allows some SVE-related #includes to be dropped from
> cpufeature.h, which will ease future maintenance.
>
> A couple of missing #includes of <asm/fpsimd.h> are exposed by this
> change under arch/arm64/.  This patch adds the missing #includes as
> necessary.
>
> No functional change.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>
> Acked-by: Marc Zyngier <marc.zyngier@arm.com>

Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>

> ---
>  arch/arm64/include/asm/cpufeature.h | 29 -----------------------------
>  arch/arm64/include/asm/fpsimd.h     |  2 ++
>  arch/arm64/include/asm/processor.h  |  1 +
>  arch/arm64/kernel/fpsimd.c          | 28 ++++++++++++++++++++++++++++
>  arch/arm64/kernel/ptrace.c          |  1 +
>  5 files changed, 32 insertions(+), 29 deletions(-)
>
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 09b0f2a..0a6b713 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -11,9 +11,7 @@
>
>  #include <asm/cpucaps.h>
>  #include <asm/cputype.h>
> -#include <asm/fpsimd.h>
>  #include <asm/hwcap.h>
> -#include <asm/sigcontext.h>
>  #include <asm/sysreg.h>
>
>  /*
> @@ -510,33 +508,6 @@ static inline bool system_supports_sve(void)
>  		cpus_have_const_cap(ARM64_SVE);
>  }
>
> -/*
> - * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
> - * vector length.
> - *
> - * Use only if SVE is present.
> - * This function clobbers the SVE vector length.
> - */
> -static inline u64 read_zcr_features(void)
> -{
> -	u64 zcr;
> -	unsigned int vq_max;
> -
> -	/*
> -	 * Set the maximum possible VL, and write zeroes to all other
> -	 * bits to see if they stick.
> -	 */
> -	sve_kernel_enable(NULL);
> -	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
> -
> -	zcr = read_sysreg_s(SYS_ZCR_EL1);
> -	zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
> -	vq_max = sve_vq_from_vl(sve_get_vl());
> -	zcr |= vq_max - 1; /* set LEN field to maximum effective value */
> -
> -	return zcr;
> -}
> -
>  #endif /* __ASSEMBLY__ */
>
>  #endif
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 3e00f70..fb60b22 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -69,6 +69,8 @@ extern unsigned int sve_get_vl(void);
>  struct arm64_cpu_capabilities;
>  extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
>
> +extern u64 read_zcr_features(void);
> +
>  extern int __ro_after_init sve_max_vl;
>
>  #ifdef CONFIG_ARM64_SVE
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 7675989..f902b6d 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -40,6 +40,7 @@
>
>  #include <asm/alternative.h>
>  #include <asm/cpufeature.h>
> +#include <asm/fpsimd.h>
>  #include <asm/hw_breakpoint.h>
>  #include <asm/lse.h>
>  #include <asm/pgtable-hwdef.h>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index ded7ffd..5152bbc 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -37,6 +37,7 @@
>  #include <linux/sched/task_stack.h>
>  #include <linux/signal.h>
>  #include <linux/slab.h>
> +#include <linux/stddef.h>
>  #include <linux/sysctl.h>
>
>  #include <asm/esr.h>
> @@ -754,6 +755,33 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
>  	isb();
>  }
>
> +/*
> + * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
> + * vector length.
> + *
> + * Use only if SVE is present.
> + * This function clobbers the SVE vector length.
> + */
> +u64 read_zcr_features(void)
> +{
> +	u64 zcr;
> +	unsigned int vq_max;
> +
> +	/*
> +	 * Set the maximum possible VL, and write zeroes to all other
> +	 * bits to see if they stick.
> +	 */
> +	sve_kernel_enable(NULL);
> +	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
> +
> +	zcr = read_sysreg_s(SYS_ZCR_EL1);
> +	zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
> +	vq_max = sve_vq_from_vl(sve_get_vl());
> +	zcr |= vq_max - 1; /* set LEN field to maximum effective value */
> +
> +	return zcr;
> +}
> +
>  void __init sve_setup(void)
>  {
>  	u64 zcr;
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 7ff81fe..78889c4 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -44,6 +44,7 @@
>  #include <asm/compat.h>
>  #include <asm/cpufeature.h>
>  #include <asm/debug-monitors.h>
> +#include <asm/fpsimd.h>
>  #include <asm/pgtable.h>
>  #include <asm/stacktrace.h>
>  #include <asm/syscall.h>


--
Alex Benn?e

^ permalink raw reply

* [PATCH v10 10/18] KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing
From: Alex Bennée @ 2018-05-24 10:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527005119-6842-11-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> This patch refactors KVM to align the host and guest FPSIMD
> save/restore logic with each other for arm64.  This reduces the
> number of redundant save/restore operations that must occur, and
> reduces the common-case IRQ blackout time during guest exit storms
> by saving the host state lazily and optimising away the need to
> restore the host state before returning to the run loop.
>
> Four hooks are defined in order to enable this:
>
>  * kvm_arch_vcpu_run_map_fp():
>    Called on PID change to map necessary bits of current to Hyp.
>
>  * kvm_arch_vcpu_load_fp():
>    Set up FP/SIMD for entering the KVM run loop (parse as
>    "vcpu_load fp").
>
>  * kvm_arch_vcpu_ctxsync_fp():
>    Get FP/SIMD into a safe state for re-enabling interrupts after a
>    guest exit back to the run loop.
>
>    For arm64 specifically, this involves updating the host kernel's
>    FPSIMD context tracking metadata so that kernel-mode NEON use
>    will cause the vcpu's FPSIMD state to be saved back correctly
>    into the vcpu struct.  This must be done before re-enabling
>    interrupts because kernel-mode NEON may be used by softirqs.
>
>  * kvm_arch_vcpu_put_fp():
>    Save guest FP/SIMD state back to memory and dissociate from the
>    CPU ("vcpu_put fp").
>
> Also, the arm64 FPSIMD context switch code is updated to enable it
> to save back FPSIMD state for a vcpu, not just current.  A few
> helpers drive this:
>
>  * fpsimd_bind_state_to_cpu(struct user_fpsimd_state *fp):
>    mark this CPU as having context fp (which may belong to a vcpu)
>    currently loaded in its registers.  This is the non-task
>    equivalent of the static function fpsimd_bind_to_cpu() in
>    fpsimd.c.
>
>  * task_fpsimd_save():
>    exported to allow KVM to save the guest's FPSIMD state back to
>    memory on exit from the run loop.
>
>  * fpsimd_flush_state():
>    invalidate any context's FPSIMD state that is currently loaded.
>    Used to disassociate the vcpu from the CPU regs on run loop exit.
>
> These changes allow the run loop to enable interrupts (and thus
> softirqs that may use kernel-mode NEON) without having to save the
> guest's FPSIMD state eagerly.
>
> Some new vcpu_arch fields are added to make all this work.  Because
> host FPSIMD state can now be saved back directly into current's
> thread_struct as appropriate, host_cpu_context is no longer used
> for preserving the FPSIMD state.  However, it is still needed for
> preserving other things such as the host's system registers.  To
> avoid ABI churn, the redundant storage space in host_cpu_context is
> not removed for now.
>
> arch/arm is not addressed by this patch and continues to use its
> current save/restore logic.  It could provide implementations of
> the helpers later if desired.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>
>
> ---
>
> Reviewers note: tags retained because this delta is straightforward by
> itself.  Please shout if you're not happy!
>
> Changes since v9:
>
>  * Remove redundant set_thread_flag(TIF_FOREIGN_FPSTATE) that is now
>    implicit in fpsimd_flush_cpu_state().
> ---
>  arch/arm/include/asm/kvm_host.h   |   8 +++
>  arch/arm64/include/asm/fpsimd.h   |   6 +++
>  arch/arm64/include/asm/kvm_host.h |  21 ++++++++
>  arch/arm64/kernel/fpsimd.c        |  17 ++++--
>  arch/arm64/kvm/Kconfig            |   1 +
>  arch/arm64/kvm/Makefile           |   2 +-
>  arch/arm64/kvm/fpsimd.c           | 111 ++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kvm/hyp/switch.c       |  51 +++++++++---------
>  virt/kvm/arm/arm.c                |   4 ++
>  9 files changed, 191 insertions(+), 30 deletions(-)
>  create mode 100644 arch/arm64/kvm/fpsimd.c
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index c7c28c8..ac870b2 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -303,6 +303,14 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
>  int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
>  			       struct kvm_device_attr *attr);
>
> +/*
> + * VFP/NEON switching is all done by the hyp switch code, so no need to
> + * coordinate with host context handling for this state:
> + */
> +static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
> +
>  /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
>  static inline void kvm_fpsimd_flush_cpu_state(void) {}
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index aa7162a..3e00f70 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -41,6 +41,8 @@ struct task_struct;
>  extern void fpsimd_save_state(struct user_fpsimd_state *state);
>  extern void fpsimd_load_state(struct user_fpsimd_state *state);
>
> +extern void fpsimd_save(void);
> +
>  extern void fpsimd_thread_switch(struct task_struct *next);
>  extern void fpsimd_flush_thread(void);
>
> @@ -49,7 +51,11 @@ extern void fpsimd_preserve_current_state(void);
>  extern void fpsimd_restore_current_state(void);
>  extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
>
> +extern void fpsimd_bind_task_to_cpu(void);
> +extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
> +
>  extern void fpsimd_flush_task_state(struct task_struct *target);
> +extern void fpsimd_flush_cpu_state(void);
>  extern void sve_flush_cpu_state(void);
>
>  /* Maximum VL that SVE VL-agnostic software can transparently support */
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 146c167..b3fe730 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -30,6 +30,7 @@
>  #include <asm/kvm.h>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_mmio.h>
> +#include <asm/thread_info.h>
>
>  #define __KVM_HAVE_ARCH_INTC_INITIALIZED
>
> @@ -238,6 +239,10 @@ struct kvm_vcpu_arch {
>
>  	/* Pointer to host CPU context */
>  	kvm_cpu_context_t *host_cpu_context;
> +
> +	struct thread_info *host_thread_info;	/* hyp VA */
> +	struct user_fpsimd_state *host_fpsimd_state;	/* hyp VA */
> +
>  	struct {
>  		/* {Break,watch}point registers */
>  		struct kvm_guest_debug_arch regs;
> @@ -295,6 +300,9 @@ struct kvm_vcpu_arch {
>
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
> +#define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
> +#define KVM_ARM64_FP_HOST		(1 << 2) /* host FP regs loaded
>  */

I may be descending into bike-shedding territory here but it seems a
little incongruous to have _ENABLED = guest FP state when we have _HOST
for host FP state. Why not KVM_ARM64_FP_GUEST?

> +#define KVM_ARM64_HOST_SVE_IN_USE	(1 << 3) /* backup for host TIF_SVE */
>
>  #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
>
> @@ -423,6 +431,19 @@ static inline void __cpu_init_stage2(void)
>  		  "PARange is %d bits, unsupported configuration!", parange);
>  }
>
> +/* Guest/host FPSIMD coordination helpers */
> +int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
> +void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
> +void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
> +void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
> +
> +#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
> +static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
> +{
> +	return kvm_arch_vcpu_run_map_fp(vcpu);
> +}
> +#endif
> +
>  /*
>   * All host FP/SIMD state is restored on guest exit, so nothing needs
>   * doing here except in the SVE case:
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index ba9e7df..ded7ffd 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -265,7 +265,7 @@ static void task_fpsimd_load(void)
>   *
>   * Softirqs (and preemption) must be disabled.
>   */
> -static void fpsimd_save(void)
> +void fpsimd_save(void)
>  {
>  	struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
>
> @@ -981,7 +981,7 @@ void fpsimd_signal_preserve_current_state(void)
>   * Associate current's FPSIMD context with this cpu
>   * Preemption must be disabled when calling this function.
>   */
> -static void fpsimd_bind_task_to_cpu(void)
> +void fpsimd_bind_task_to_cpu(void)
>  {
>  	struct fpsimd_last_state_struct *last =
>  		this_cpu_ptr(&fpsimd_last_state);
> @@ -1001,6 +1001,17 @@ static void fpsimd_bind_task_to_cpu(void)
>  	}
>  }
>
> +void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
> +{
> +	struct fpsimd_last_state_struct *last =
> +		this_cpu_ptr(&fpsimd_last_state);
> +
> +	WARN_ON(!in_softirq() && !irqs_disabled());
> +
> +	last->st = st;
> +	last->sve_in_use = false;
> +}
> +
>  /*
>   * Load the userland FPSIMD state of 'current' from memory, but only if the
>   * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
> @@ -1053,7 +1064,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
>  	t->thread.fpsimd_cpu = NR_CPUS;
>  }
>
> -static inline void fpsimd_flush_cpu_state(void)
> +void fpsimd_flush_cpu_state(void)
>  {
>  	__this_cpu_write(fpsimd_last_state.st, NULL);
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index a2e3a5a..47b23bf 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -39,6 +39,7 @@ config KVM
>  	select HAVE_KVM_IRQ_ROUTING
>  	select IRQ_BYPASS_MANAGER
>  	select HAVE_KVM_IRQ_BYPASS
> +	select HAVE_KVM_VCPU_RUN_PID_CHANGE
>  	---help---
>  	  Support hosting virtualized guest machines.
>  	  We don't support KVM with 16K page tables yet, due to the multiple
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 93afff9..0f2a135 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -19,7 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
> -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
> +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
>
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> new file mode 100644
> index 0000000..365933a
> --- /dev/null
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -0,0 +1,111 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * arch/arm64/kvm/fpsimd.c: Guest/host FPSIMD context coordination helpers
> + *
> + * Copyright 2018 Arm Limited
> + * Author: Dave Martin <Dave.Martin@arm.com>
> + */
> +#include <linux/bottom_half.h>
> +#include <linux/sched.h>
> +#include <linux/thread_info.h>
> +#include <linux/kvm_host.h>
> +#include <asm/kvm_asm.h>
> +#include <asm/kvm_host.h>
> +#include <asm/kvm_mmu.h>
> +
> +/*
> + * Called on entry to KVM_RUN unless this vcpu previously ran at least
> + * once and the most recent prior KVM_RUN for this vcpu was called from
> + * the same task as current (highly likely).
> + *
> + * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
> + * such that on entering hyp the relevant parts of current are already
> + * mapped.
> + */
> +int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
> +{
> +	int ret;
> +
> +	struct thread_info *ti = &current->thread_info;
> +	struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
> +
> +	/*
> +	 * Make sure the host task thread flags and fpsimd state are
> +	 * visible to hyp:
> +	 */
> +	ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
> +	if (ret)
> +		goto error;
> +
> +	ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
> +	if (ret)
> +		goto error;
> +
> +	vcpu->arch.host_thread_info = kern_hyp_va(ti);
> +	vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
> +error:
> +	return ret;
> +}
> +
> +/*
> + * Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
> + * The actual loading is done by the FPSIMD access trap taken to hyp.
> + *
> + * Here, we just set the correct metadata to indicate that the FPSIMD
> + * state in the cpu regs (if any) belongs to current on the host.
> + *
> + * TIF_SVE is backed up here, since it may get clobbered with guest state.
> + * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
> + */
> +void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
> +{
> +	BUG_ON(system_supports_sve());
> +	BUG_ON(!current->mm);
> +
> +	vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
> +	vcpu->arch.flags |= KVM_ARM64_FP_HOST;
> +	if (test_thread_flag(TIF_SVE))
> +		vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
> +}
> +
> +/*
> + * If the guest FPSIMD state was loaded, update the host's context
> + * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
> + * so that they will be written back if the kernel clobbers them due to
> + * kernel-mode NEON before re-entry into the guest.
> + */
> +void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
> +{
> +	WARN_ON_ONCE(!irqs_disabled());
> +
> +	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
> +		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs);
> +		clear_thread_flag(TIF_FOREIGN_FPSTATE);
> +		clear_thread_flag(TIF_SVE);
> +	}
> +}
> +
> +/*
> + * Write back the vcpu FPSIMD regs if they are dirty, and invalidate the
> + * cpu FPSIMD regs so that they can't be spuriously reused if this vcpu
> + * disappears and another task or vcpu appears that recycles the same
> + * struct fpsimd_state.
> + */
> +void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
> +{
> +	local_bh_disable();
> +
> +	update_thread_flag(TIF_SVE,
> +			   vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
> +
> +	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
> +		/* Clean guest FP state to memory and invalidate cpu view */
> +		fpsimd_save();
> +		fpsimd_flush_cpu_state();
> +	} else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
> +		/* Ensure user trap controls are correctly restored */
> +		fpsimd_bind_task_to_cpu();
> +	}
> +
> +	local_bh_enable();
> +}
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index c0796c4..118f300 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -23,19 +23,21 @@
>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_emulate.h>
> +#include <asm/kvm_host.h>
>  #include <asm/kvm_hyp.h>
>  #include <asm/kvm_mmu.h>
>  #include <asm/fpsimd.h>
>  #include <asm/debug-monitors.h>
> +#include <asm/thread_info.h>
>
> -static bool __hyp_text __fpsimd_enabled_nvhe(void)
> +/* Check whether the FP regs were dirtied while in the host-side run loop: */
> +static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
>  {
> -	return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
> -}
> +	if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
> +		vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
> +				      KVM_ARM64_FP_HOST);
>
> -static bool fpsimd_enabled_vhe(void)
> -{
> -	return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
> +	return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
>  }
>
>  /* Save the 32-bit only FPSIMD system register state */
> @@ -92,7 +94,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
>
>  	val = read_sysreg(cpacr_el1);
>  	val |= CPACR_EL1_TTA;
> -	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
> +	val &= ~CPACR_EL1_ZEN;
> +	if (!update_fp_enabled(vcpu))
> +		val &= ~CPACR_EL1_FPEN;
> +
>  	write_sysreg(val, cpacr_el1);
>
>  	write_sysreg(kvm_get_hyp_vector(), vbar_el1);
> @@ -105,7 +110,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
>  	__activate_traps_common(vcpu);
>
>  	val = CPTR_EL2_DEFAULT;
> -	val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
> +	val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
> +	if (!update_fp_enabled(vcpu))
> +		val |= CPTR_EL2_TFP;
> +
>  	write_sysreg(val, cptr_el2);
>  }
>
> @@ -321,8 +329,6 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
>  void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
>  				    struct kvm_vcpu *vcpu)
>  {
> -	kvm_cpu_context_t *host_ctxt;
> -
>  	if (has_vhe())
>  		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
>  			     cpacr_el1);
> @@ -332,14 +338,19 @@ void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
>
>  	isb();
>
> -	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
> -	__fpsimd_save_state(&host_ctxt->gp_regs.fp_regs);
> +	if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
> +		__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
> +		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
> +	}
> +
>  	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
>
>  	/* Skip restoring fpexc32 for AArch64 guests */
>  	if (!(read_sysreg(hcr_el2) & HCR_RW))
>  		write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
>  			     fpexc32_el2);
> +
> +	vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
>  }
>
>  /*
> @@ -418,7 +429,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_cpu_context *host_ctxt;
>  	struct kvm_cpu_context *guest_ctxt;
> -	bool fp_enabled;
>  	u64 exit_code;
>
>  	host_ctxt = vcpu->arch.host_cpu_context;
> @@ -440,19 +450,14 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
>  		/* And we're baaack! */
>  	} while (fixup_guest_exit(vcpu, &exit_code));
>
> -	fp_enabled = fpsimd_enabled_vhe();
> -
>  	sysreg_save_guest_state_vhe(guest_ctxt);
>
>  	__deactivate_traps(vcpu);
>
>  	sysreg_restore_host_state_vhe(host_ctxt);
>
> -	if (fp_enabled) {
> -		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
> -		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
> +	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
>  		__fpsimd_save_fpexc32(vcpu);
> -	}
>
>  	__debug_switch_to_host(vcpu);
>
> @@ -464,7 +469,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_cpu_context *host_ctxt;
>  	struct kvm_cpu_context *guest_ctxt;
> -	bool fp_enabled;
>  	u64 exit_code;
>
>  	vcpu = kern_hyp_va(vcpu);
> @@ -496,8 +500,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
>  		/* And we're baaack! */
>  	} while (fixup_guest_exit(vcpu, &exit_code));
>
> -	fp_enabled = __fpsimd_enabled_nvhe();
> -
>  	__sysreg_save_state_nvhe(guest_ctxt);
>  	__sysreg32_save_state(vcpu);
>  	__timer_disable_traps(vcpu);
> @@ -508,11 +510,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
>
>  	__sysreg_restore_state_nvhe(host_ctxt);
>
> -	if (fp_enabled) {
> -		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
> -		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
> +	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
>  		__fpsimd_save_fpexc32(vcpu);
> -	}
>
>  	/*
>  	 * This must come after restoring the host sysregs, since a non-VHE
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index a4c1b76..bee226c 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -363,10 +363,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  	kvm_vgic_load(vcpu);
>  	kvm_timer_vcpu_load(vcpu);
>  	kvm_vcpu_load_sysregs(vcpu);
> +	kvm_arch_vcpu_load_fp(vcpu);
>  }
>
>  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  {
> +	kvm_arch_vcpu_put_fp(vcpu);
>  	kvm_vcpu_put_sysregs(vcpu);
>  	kvm_timer_vcpu_put(vcpu);
>  	kvm_vgic_put(vcpu);
> @@ -778,6 +780,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  		if (static_branch_unlikely(&userspace_irqchip_in_use))
>  			kvm_timer_sync_hwstate(vcpu);
>
> +		kvm_arch_vcpu_ctxsync_fp(vcpu);
> +
>  		/*
>  		 * We may have taken a host interrupt in HYP mode (ie
>  		 * while executing the guest). This interrupt is still

Minor bike-shedding aside:

Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>

--
Alex Benn?e

^ permalink raw reply

* [PATCH v10 07/18] arm64: fpsimd: Eliminate task->mm checks
From: Christoffer Dall @ 2018-05-24 10:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180524095056.GR13470@e103592.cambridge.arm.com>

On Thu, May 24, 2018 at 10:50:56AM +0100, Dave Martin wrote:
> On Thu, May 24, 2018 at 10:33:50AM +0200, Christoffer Dall wrote:
> > On Wed, May 23, 2018 at 04:03:37PM +0100, Dave Martin wrote:
> > > On Wed, May 23, 2018 at 03:56:57PM +0100, Catalin Marinas wrote:
> > > > On Wed, May 23, 2018 at 02:31:59PM +0100, Dave P Martin wrote:
> > > > > On Wed, May 23, 2018 at 01:48:12PM +0200, Christoffer Dall wrote:
> > > > > > On Tue, May 22, 2018 at 05:05:08PM +0100, Dave Martin wrote:
> > > > > > > This is true by construction however: TIF_FOREIGN_FPSTATE is never
> > > > > > > cleared except when returning to userspace or returning from a
> > > > > > > signal: thus, for a true kernel thread no FPSIMD context is ever
> > > > > > > loaded, TIF_FOREIGN_FPSTATE will remain set and no context will
> > > > > > > ever be saved.
> > > > > > 
> > > > > > I don't understand this construction proof; from looking at the patch
> > > > > > below it is not obvious to me why fpsimd_thread_switch() can never have
> > > > > > !wrong_task && !wrong_cpu and therefore clear TIF_FOREIGN_FPSTATE for a
> > > > > > kernel thread?
> > > > > 
> > > > > Looking at this again, I think it is poorly worded.  This patch aims to
> > > > > make it true by construction, but it isn't prior to the patch.
> > > > > 
> > > > > I'm tempted to delete the paragraph: the assertion of both untrue and
> > > > > not the best way to justify that this patch works.
> > > > > 
> > > > > 
> > > > > How about:
> > > > > 
> > > > > -8<-
> > > > > 
> > > > > The context switch logic already isolates user threads from each other.
> > > > > This, it is sufficient for isolating user threads from the kernel,
> > 
> > s/This/Thus/ ?
> > 
> > I don't understand what 'it' refers to here?
> > 
> > > > > since the goal either way is to ensure that code executing in userspace
> > > > > cannot see any FPSIMD state except its own.  Thus, there is no special
> > > > > property of kernel threads that we care about except that it is
> > > > > pointless to save or load FPSIMD register state for them.
> > 
> > Actually, I'm not really sure what this paragraph is getting at.
> 
> Reading this again, I don't think the paragraph adds much useful.
> 
> So I propose deleting that too.
> 
> > > > > 
> > > > > At worst, the removal of all the kernel thread special cases by this
> > > > > patch would thus spuriously load and save state for kernel threads when
> > > > > unnecessary.
> > > > > 
> > > > > But the context switch logic is already deliberately optimised to defer
> > > > > reloads of the regs until ret_to_user (or sigreturn as a special case),
> > > > > which kernel threads by definition never reach.
> > > > > 
> > > > > ->8-
> > > > 
> > > > The "at worst" paragraph makes it look like it could happen (at least
> > > > until you reach the last paragraph). Maybe you can just say that
> > > > wrong_task and wrong_cpu (with the fpsimd_cpu = NR_CPUS addition) are
> > > > always true for kernel threads. You should probably mention this in a
> > > > comment in the code as well.
> > > 
> > > What if I just delete the second paragraph, and remove the "But" from
> > > the start of the third, and append:
> > > 
> > > "As a result, the wrong_task and wrong_cpu tests in
> > > fpsimd_thread_switch() will always yield false for kernel threads."
> > > 
> > > ...with a similar comment in the code?
> > 
> > ...with a risk of being a bit over-pedantic and annoying, may I suggest
> > the following complete commit text:
> > 
> > ------8<------
> > Currently the FPSIMD handling code uses the condition task->mm ==
> > NULL as a hint that task has no FPSIMD register context.
> > 
> > The ->mm check is only there to filter out tasks that cannot
> > possibly have FPSIMD context loaded, for optimisation purposes.
> > However, TIF_FOREIGN_FPSTATE must always be checked anyway before
> > saving FPSIMD context back to memory.  For this reason, the ->mm
> > checks are not useful, providing that that TIF_FOREIGN_FPSTATE is
> > maintained properly for kernel threads.
> > 
> > FPSIMD context is never preserved for kernel threads across a context
> > switch and therefore TIF_FOREIGN_FPSTATE should always be true for
> 
> (This refactoring opens up the interesting possibility of making
> kernel-mode NEON in task context preemptible for kernel threads so
> that we actually do preserve state... but that's a discussion for
> another day.  There may be code around that relies on
> kernel_neon_begin() disabling preemption for real.)
> 
> > kernel threads.  This is indeed the case, as the wrong_task and
> 
> This suggests that TIF_FOREIGN_FPSTATE is always true for kernel
> threads today.  This is not quite because use_mm() can make mm non-
> NULL.
> 

I was suggesting that it's always true after this patch.

> > wrong_cpu tests in fpsimd_thread_switch() will always yield false for
> > kernel threads.
> 
> ("false" -> "true".  My bad.)
> 
> > Further, the context switch logic is already deliberately optimised to
> > defer reloads of the FPSIMD context until ret_to_user (or sigreturn as a
> > special case), which kernel threads by definition never reach, and
> > therefore this change introduces no additional work in the critical
> > path.
> > 
> > This patch removes the redundant checks and special-case code.
> > ------8<------
> 
> Looking at my existing text, I rather reworded it like this.
> Does this work any better for you?
> 
> --8<--
> 
> Currently the FPSIMD handling code uses the condition task->mm ==
> NULL as a hint that task has no FPSIMD register context.
> 
> The ->mm check is only there to filter out tasks that cannot
> possibly have FPSIMD context loaded, for optimisation purposes.
> Also, TIF_FOREIGN_FPSTATE must always be checked anyway before
> saving FPSIMD context back to memory.  For these reasons, the ->mm
> checks are not useful, providing that TIF_FOREIGN_FPSTATE is
> maintained in a consistent way for kernel threads.

Consistent with what?  Without more context or explanation,
I'm not sure what the reader is to make of that.  Do you not mean the
TIF_FOREIGN_FPSTATE is always true for kernel threads?

> 
> The context switch logic is already deliberately optimised to defer
> reloads of the regs until ret_to_user (or sigreturn as a special
> case), and save them only if they have been previously loaded.
> Kernel threads by definition never reach these paths.  As a result,

I'm struggling with the "As a result," here.  Is this because reloads of
regs in ret_to_user (or sigreturn) are the only places that can make
wrong_cpu or wrong_task be false?

(I'm actually wanting to understand this, not just bikeshedding the
commit message, as new corner cases keep coming up on this logic.)

> the wrong_task and wrong_cpu tests in fpsimd_thread_switch() will
> always yield true for kernel threads.
> 
> This patch removes the redundant checks and special-case code,                  ensuring that TIF_FOREIGN_FPSTATE is set whenever a kernel thread               is scheduled in, and ensures that this flag is set for the init
> task.  The fpsimd_flush_task_state() call already present in                    copy_thread() ensures the same for any new task.

nit: funny formatting

nit: ensuring that TIF_FOREIGN_FPSTATE *remains* set whenever a kernel
thread is scheduled in?

> 
> With TIF_FOREIGN_FPSTATE always set for kernel threads, this patch
> ensures that no extra context save work is added for kernel
> threads, and eliminates the redundant context saving that may
> currently occur for kernel threads that have acquired an mm via
> use_mm().
> 
> -->8--

If you can slightly connect the dots with the "As a result" above, I'm
fine with your version of the text.

Thanks,
-Christoffer

^ permalink raw reply

* [PATCH v10 04/18] KVM: arm/arm64: Introduce kvm_arch_vcpu_run_pid_change
From: Dave Martin @ 2018-05-24 10:04 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <87d0xltnrk.fsf@linaro.org>

On Thu, May 24, 2018 at 10:18:39AM +0100, Alex Benn?e wrote:
> 
> Christoffer Dall <christoffer.dall@arm.com> writes:
> 
> > On Wed, May 23, 2018 at 03:40:26PM +0100, Dave Martin wrote:
> >> On Wed, May 23, 2018 at 03:34:20PM +0100, Alex Benn?e wrote:
> >> >
> >> > Dave Martin <Dave.Martin@arm.com> writes:

[...]

> >> > > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> >> > > index cca7e06..72143cf 100644
> >> > > --- a/virt/kvm/Kconfig
> >> > > +++ b/virt/kvm/Kconfig
> >> > > @@ -54,3 +54,6 @@ config HAVE_KVM_IRQ_BYPASS
> >> > >
> >> > >  config HAVE_KVM_VCPU_ASYNC_IOCTL
> >> > >         bool
> >> > > +
> >> > > +config HAVE_KVM_VCPU_RUN_PID_CHANGE
> >> > > +       bool
> >> >
> >> > This almost threw me as I thought you might be able to enable this and
> >> > break the build, but apparently not:
> >> >
> >> > Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>
> >>
> >> Without a "help", the option seems non-interactive and cannot be true
> >> unless something selects it.  It seems a bit weird to me too, but the
> >> idiom appears widely used...
> >>
> > Indeed, I've copied this idiom from other things before and nobody has
> > complained, so I think it works (without any further deep insights into
> > the inner workings of Kconfig).
> 
> It's fine. My main worry was breaking bisection with the normal "make
> olddefconfig" approach. I tested it and found it to be fine and I don't
> think we need to worry about people adding the symbol to .config
> manually - they get to keep both pieces ;-)

I wasted a fair amount of time at some point in the past trying to work
out why I couldn't set one of these options by
echo CONFIG_FOO=y >>.config ...

That was fun ;)

Cheers
---Dave

^ permalink raw reply

* [PATCH 04/14] arm64: Add ARCH_WORKAROUND_2 probing
From: Suzuki K Poulose @ 2018-05-24  9:58 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180522150648.28297-5-marc.zyngier@arm.com>

On 22/05/18 16:06, Marc Zyngier wrote:
> As for Spectre variant-2, we rely on SMCCC 1.1 to provide the
> discovery mechanism for detecting the SSBD mitigation.
> 
> A new capability is also allocated for that purpose, and a
> config option.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>


> +static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
> +				    int scope)
> +{
> +	struct arm_smccc_res res;
> +	bool supported = true;
> +
> +	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
> +
> +	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
> +		return false;
> +
> +	/*
> +	 * The probe function return value is either negative
> +	 * (unsupported or mitigated), positive (unaffected), or zero
> +	 * (requires mitigation). We only need to do anything in the
> +	 * last case.
> +	 */
> +	switch (psci_ops.conduit) {
> +	case PSCI_CONDUIT_HVC:
> +		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
> +				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
> +		if ((int)res.a0 != 0)
> +			supported = false;
> +		break;
> +
> +	case PSCI_CONDUIT_SMC:
> +		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
> +				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
> +		if ((int)res.a0 != 0)
> +			supported = false;
> +		break;
> +
> +	default:
> +		supported = false;
> +	}
> +
> +	if (supported) {
> +		__this_cpu_write(arm64_ssbd_callback_required, 1);
> +		do_ssbd(true);
> +	}


Marc,

As discussed, we have minor issue with the "corner case". If a CPU
is hotplugged in which requires the mitigation, after the system has
finalised the cap to "not available", the CPU could go ahead and
do the "work around" as above, while not effectively doing anything
about it at runtime for KVM guests (as thats the only place where
we rely on the CAP being set).

But, yes this is real corner case. There is no easy way to solve it
other than

1) Allow late modifications to CPU hwcaps

OR

2) Penalise the fastpath to always check per-cpu setting.


Regardless,

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>

^ permalink raw reply

* [PATCH v10 07/18] arm64: fpsimd: Eliminate task->mm checks
From: Dave Martin @ 2018-05-24  9:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180524083350.GK55598@C02W217FHV2R.local>

On Thu, May 24, 2018 at 10:33:50AM +0200, Christoffer Dall wrote:
> On Wed, May 23, 2018 at 04:03:37PM +0100, Dave Martin wrote:
> > On Wed, May 23, 2018 at 03:56:57PM +0100, Catalin Marinas wrote:
> > > On Wed, May 23, 2018 at 02:31:59PM +0100, Dave P Martin wrote:
> > > > On Wed, May 23, 2018 at 01:48:12PM +0200, Christoffer Dall wrote:
> > > > > On Tue, May 22, 2018 at 05:05:08PM +0100, Dave Martin wrote:
> > > > > > This is true by construction however: TIF_FOREIGN_FPSTATE is never
> > > > > > cleared except when returning to userspace or returning from a
> > > > > > signal: thus, for a true kernel thread no FPSIMD context is ever
> > > > > > loaded, TIF_FOREIGN_FPSTATE will remain set and no context will
> > > > > > ever be saved.
> > > > > 
> > > > > I don't understand this construction proof; from looking at the patch
> > > > > below it is not obvious to me why fpsimd_thread_switch() can never have
> > > > > !wrong_task && !wrong_cpu and therefore clear TIF_FOREIGN_FPSTATE for a
> > > > > kernel thread?
> > > > 
> > > > Looking at this again, I think it is poorly worded.  This patch aims to
> > > > make it true by construction, but it isn't prior to the patch.
> > > > 
> > > > I'm tempted to delete the paragraph: the assertion of both untrue and
> > > > not the best way to justify that this patch works.
> > > > 
> > > > 
> > > > How about:
> > > > 
> > > > -8<-
> > > > 
> > > > The context switch logic already isolates user threads from each other.
> > > > This, it is sufficient for isolating user threads from the kernel,
> 
> s/This/Thus/ ?
> 
> I don't understand what 'it' refers to here?
> 
> > > > since the goal either way is to ensure that code executing in userspace
> > > > cannot see any FPSIMD state except its own.  Thus, there is no special
> > > > property of kernel threads that we care about except that it is
> > > > pointless to save or load FPSIMD register state for them.
> 
> Actually, I'm not really sure what this paragraph is getting at.

Reading this again, I don't think the paragraph adds much useful.

So I propose deleting that too.

> > > > 
> > > > At worst, the removal of all the kernel thread special cases by this
> > > > patch would thus spuriously load and save state for kernel threads when
> > > > unnecessary.
> > > > 
> > > > But the context switch logic is already deliberately optimised to defer
> > > > reloads of the regs until ret_to_user (or sigreturn as a special case),
> > > > which kernel threads by definition never reach.
> > > > 
> > > > ->8-
> > > 
> > > The "at worst" paragraph makes it look like it could happen (at least
> > > until you reach the last paragraph). Maybe you can just say that
> > > wrong_task and wrong_cpu (with the fpsimd_cpu = NR_CPUS addition) are
> > > always true for kernel threads. You should probably mention this in a
> > > comment in the code as well.
> > 
> > What if I just delete the second paragraph, and remove the "But" from
> > the start of the third, and append:
> > 
> > "As a result, the wrong_task and wrong_cpu tests in
> > fpsimd_thread_switch() will always yield false for kernel threads."
> > 
> > ...with a similar comment in the code?
> 
> ...with a risk of being a bit over-pedantic and annoying, may I suggest
> the following complete commit text:
> 
> ------8<------
> Currently the FPSIMD handling code uses the condition task->mm ==
> NULL as a hint that task has no FPSIMD register context.
> 
> The ->mm check is only there to filter out tasks that cannot
> possibly have FPSIMD context loaded, for optimisation purposes.
> However, TIF_FOREIGN_FPSTATE must always be checked anyway before
> saving FPSIMD context back to memory.  For this reason, the ->mm
> checks are not useful, providing that that TIF_FOREIGN_FPSTATE is
> maintained properly for kernel threads.
> 
> FPSIMD context is never preserved for kernel threads across a context
> switch and therefore TIF_FOREIGN_FPSTATE should always be true for

(This refactoring opens up the interesting possibility of making
kernel-mode NEON in task context preemptible for kernel threads so
that we actually do preserve state... but that's a discussion for
another day.  There may be code around that relies on
kernel_neon_begin() disabling preemption for real.)

> kernel threads.  This is indeed the case, as the wrong_task and

This suggests that TIF_FOREIGN_FPSTATE is always true for kernel
threads today.  This is not quite because use_mm() can make mm non-
NULL.

> wrong_cpu tests in fpsimd_thread_switch() will always yield false for
> kernel threads.

("false" -> "true".  My bad.)

> Further, the context switch logic is already deliberately optimised to
> defer reloads of the FPSIMD context until ret_to_user (or sigreturn as a
> special case), which kernel threads by definition never reach, and
> therefore this change introduces no additional work in the critical
> path.
> 
> This patch removes the redundant checks and special-case code.
> ------8<------

Looking@my existing text, I rather reworded it like this.
Does this work any better for you?

--8<--

Currently the FPSIMD handling code uses the condition task->mm ==
NULL as a hint that task has no FPSIMD register context.

The ->mm check is only there to filter out tasks that cannot
possibly have FPSIMD context loaded, for optimisation purposes.
Also, TIF_FOREIGN_FPSTATE must always be checked anyway before
saving FPSIMD context back to memory.  For these reasons, the ->mm
checks are not useful, providing that TIF_FOREIGN_FPSTATE is
maintained in a consistent way for kernel threads.

The context switch logic is already deliberately optimised to defer
reloads of the regs until ret_to_user (or sigreturn as a special
case), and save them only if they have been previously loaded.
Kernel threads by definition never reach these paths.  As a result,
the wrong_task and wrong_cpu tests in fpsimd_thread_switch() will
always yield true for kernel threads.

This patch removes the redundant checks and special-case code,                  ensuring that TIF_FOREIGN_FPSTATE is set whenever a kernel thread               is scheduled in, and ensures that this flag is set for the init
task.  The fpsimd_flush_task_state() call already present in                    copy_thread() ensures the same for any new task.

With TIF_FOREIGN_FPSTATE always set for kernel threads, this patch
ensures that no extra context save work is added for kernel
threads, and eliminates the redundant context saving that may
currently occur for kernel threads that have acquired an mm via
use_mm().

-->8--

Cheers
---Dave

^ permalink raw reply

* v4.17-rc1: regressions on N900, N950
From: Pavel Machek @ 2018-05-24  9:47 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180523220456.GY98604@atomide.com>

Hi!

> > > So... I did some experiments on v4.16.
> > > 
> > > Swapping tsc2005 at 0 and lcd: acx565akm at 2 entries in the dts does break
> > > stuff.
> > > 
> > > I thought it might be due to vio regulator, but it does not appear
> > > so... screen still works with tsc2005 driver disabled in .config. (so
> > > there's noone to enable vio regulator).
> > 
> > Deleting tsc2005 at 0 entry also breaks screen.
> > 
> > Replacing tsc2005 at 0 entry with 
> > 
> >         foobar at 0 {
> > 	               compatible = "not really with anything";
> >  		       spi-max-frequency = <6000000>;
> >  		       reg = <0>;
> >  };
> > 
> > still results in working touchscreen, removing any of the three fields
> > breaks it again.
> 
> I bisected one regression down, see "omapdrm regression in v4.17-rc series"
> not sure if it covers all the issues being discussed here though.

Reverting 24aac6011f70 fixes screen on n900 in v4.17-rc for me. (But
strange dependency on tsc2005 at 0 is still there).

Best regards,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180524/ce0bf288/attachment.sig>

^ permalink raw reply

* [PATCH v10 06/18] arm64: fpsimd: Generalise context saving for non-task contexts
From: Alex Bennée @ 2018-05-24  9:41 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180524090322.GQ13470@e103592.cambridge.arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> On Wed, May 23, 2018 at 09:15:11PM +0100, Alex Benn?e wrote:
>>
>> Dave Martin <Dave.Martin@arm.com> writes:
>>
>> > In preparation for allowing non-task (i.e., KVM vcpu) FPSIMD
>> > contexts to be handled by the fpsimd common code, this patch adapts
>> > task_fpsimd_save() to save back the currently loaded context,
>> > removing the explicit dependency on current.
>> >
>> > The relevant storage to write back to in memory is now found by
>> > examining the fpsimd_last_state percpu struct.
>> >
>> > fpsimd_save() does nothing unless TIF_FOREIGN_FPSTATE is clear, and
>> > fpsimd_last_state is updated under local_bh_disable() or
>> > local_irq_disable() everywhere that TIF_FOREIGN_FPSTATE is cleared:
>> > thus, fpsimd_save() will write back to the correct storage for the
>> > loaded context.
>> >
>> > No functional change.
>> >
>> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
>> > Acked-by: Marc Zyngier <marc.zyngier@arm.com>
>> > Acked-by: Catalin Marinas <catalin.marinas@arm.com>
>> > ---
>> >  arch/arm64/kernel/fpsimd.c | 25 +++++++++++++------------
>> >  1 file changed, 13 insertions(+), 12 deletions(-)
>> >
>> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> > index 9d85373..3aa100a 100644
>> > --- a/arch/arm64/kernel/fpsimd.c
>> > +++ b/arch/arm64/kernel/fpsimd.c
>> > @@ -270,13 +270,15 @@ static void task_fpsimd_load(void)
>> >  }
>> >
>> >  /*
>> > - * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
>> > - * with respect to the CPU registers.
>> > + * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
>> > + * date with respect to the CPU registers.
>> >   *
>> >   * Softirqs (and preemption) must be disabled.
>> >   */
>> > -static void task_fpsimd_save(void)
>> > +static void fpsimd_save(void)
>> >  {
>> > +	struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
>> > +
>>
>> I thought I was missing something but the only write I saw of this was:
>>
>>   __this_cpu_write(fpsimd_last_state.st, NULL);
>>
>> which implied to me it is possible to have an invalid de-reference. I
>> did figure it out eventually as fpsimd_bind_state_to_cpu uses a more
>> indirect this_cpu_ptr idiom for tweaking this. I guess a reference to
>> fpsimd_bind_[task|state]_to_cpu in the comment would have helped my
>> confusion.
>
> How about:
>
>  static void fpsimd_save(void)
>  {
>  	struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
> +	/* set by fpsimd_bind_to_cpu() */

Great, thanks ;-)

--
Alex Benn?e

^ permalink raw reply

* [PATCH 4.16 090/161] staging: bcm2835-audio: Release resources on module_exit()
From: Greg Kroah-Hartman @ 2018-05-24  9:38 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180524093018.331893860@linuxfoundation.org>

4.16-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Kirill Marinushkin <k.marinushkin@gmail.com>

[ Upstream commit 626118b472d2eb45f83a0276a18d3e6a01c69f6a ]

In the current implementation, `rmmod snd_bcm2835` does not release
resources properly. It causes an oops when trying to list sound devices.

This commit fixes it.

The details WRT allocation / free are described below.

Device structure WRT allocation:

pdev
  \childdev[]
    \card
      \chip
        \pcm
        \ctl

Allocation / register sequence:

* childdev: devm_kzalloc      - freed during driver detach
* childdev: device_initialize - freed during device_unregister
* pdev: devres_alloc          - freed during driver detach
* childdev: device_add        - removed during device_unregister
* pdev, childdev: devres_add  - freed during driver detach
* card: snd_card_new          - freed during snd_card_free
* chip: kzalloc               - freed during kfree
* card, chip: snd_device_new  - freed during snd_device_free
* chip: new_pcm               - TODO: free pcm
* chip: new_ctl               - TODO: free ctl
* card: snd_card_register     - unregistered during snd_card_free

Free / unregister sequence:

* card: snd_card_free
* card, chip: snd_device_free
* childdev: device_unregister
* chip: kfree

Steps to reproduce the issue before this commit:

~~~~
$ rmmod snd_bcm2835
$ aplay -L
[  138.648130] Unable to handle kernel paging request at virtual address 7f1343c0
[  138.660415] pgd = ad8f0000
[  138.665567] [7f1343c0] *pgd=3864c811, *pte=00000000, *ppte=00000000
[  138.674887] Internal error: Oops: 7 [#1] SMP ARM
[  138.683571] Modules linked in: sha256_generic cfg80211 rfkill snd_pcm snd_timer
 snd fixed uio_pdrv_genirq uio ip_tables x_tables ipv6 [last unloaded: snd_bcm2835
]
[  138.706594] CPU: 3 PID: 463 Comm: aplay Tainted: G        WC       4.15.0-rc1-v
7+ #6
[  138.719833] Hardware name: BCM2835
[  138.726016] task: b877ac00 task.stack: aebec000
[  138.733408] PC is at try_module_get+0x38/0x24c
[  138.740813] LR is at snd_ctl_open+0x58/0x194 [snd]
[  138.748485] pc : [<801c4d5c>]    lr : [<7f0e6b2c>]    psr: 20000013
[  138.757709] sp : aebedd60  ip : aebedd88  fp : aebedd84
[  138.765884] r10: 00000000  r9 : 00000004  r8 : 7f0ed440
[  138.774040] r7 : b7e469b0  r6 : 7f0e6b2c  r5 : afd91900  r4 : 7f1343c0
[  138.783571] r3 : aebec000  r2 : 00000001  r1 : b877ac00  r0 : 7f1343c0
[  138.793084] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[  138.803300] Control: 10c5387d  Table: 2d8f006a  DAC: 00000055
[  138.812064] Process aplay (pid: 463, stack limit = 0xaebec210)
[  138.820868] Stack: (0xaebedd60 to 0xaebee000)
[  138.828207] dd60: 00000000 b848d000 afd91900 00000000 b7e469b0 7f0ed440 aebedda4 aebedd88
[  138.842371] dd80: 7f0e6b2c 801c4d30 afd91900 7f0ea4dc 00000000 b7e469b0 aebeddcc aebedda8
[  138.856611] dda0: 7f0e250c 7f0e6ae0 7f0e2464 b8478ec0 b7e469b0 afd91900 7f0ea388 00000000
[  138.870864] ddc0: aebeddf4 aebeddd0 802ce590 7f0e2470 8090ab64 afd91900 afd91900 b7e469b0
[  138.885301] dde0: afd91908 802ce4e4 aebede1c aebeddf8 802c57b4 802ce4f0 afd91900 aebedea8
[  138.900110] de00: b7fa4c00 00000000 00000000 00000004 aebede3c aebede20 802c6ba8 802c56b4
[  138.915260] de20: aebedea8 00000000 aebedf5c 00000000 aebedea4 aebede40 802d9a68 802c6b58
[  138.930661] de40: b874ddd0 00000000 00000000 00000001 00000041 00000000 afd91900 aebede70
[  138.946402] de60: 00000000 00000000 00000002 b7e469b0 b8a87610 b8d6ab80 801852f8 00080000
[  138.962314] de80: aebedf5c aebedea8 00000001 80108464 aebec000 00000000 aebedf4c aebedea8
[  138.978414] dea0: 802dacd4 802d970c b8a87610 b8d6ab80 a7982bc6 00000009 af363019 b9231480
[  138.994617] dec0: 00000000 b8c038a0 b7e469b0 00000101 00000002 00000238 00000000 00000000
[  139.010823] dee0: 00000000 aebedee8 00080000 0000000f aebedf3c aebedf00 802ed7e4 80843f94
[  139.027025] df00: 00000003 00080000 b9231490 b9231480 00000000 00080000 af363000 00000000
[  139.043229] df20: 00000005 00000002 ffffff9c 00000000 00080000 ffffff9c af363000 00000003
[  139.059430] df40: aebedf94 aebedf50 802c6f70 802dac70 aebec000 00000000 00000001 00000000
[  139.075629] df60: 00020000 00000004 00000100 00000001 7ebe577c 0002e038 00000000 00000005
[  139.091828] df80: 80108464 aebec000 aebedfa4 aebedf98 802c7060 802c6e6c 00000000 aebedfa8
[  139.108025] dfa0: 801082c0 802c7040 7ebe577c 0002e038 7ebe577c 00080000 00000b98 e81c8400
[  139.124222] dfc0: 7ebe577c 0002e038 00000000 00000005 7ebe57e4 00a20af8 7ebe57f0 76f87394
[  139.140419] dfe0: 00000000 7ebe55c4 76ec88e8 76df1d9c 60000010 7ebe577c 00000000 00000000
[  139.156715] [<801c4d5c>] (try_module_get) from [<7f0e6b2c>] (snd_ctl_open+0x58/0x194 [snd])
[  139.173222] [<7f0e6b2c>] (snd_ctl_open [snd]) from [<7f0e250c>] (snd_open+0xa8/0x14c [snd])
[  139.189683] [<7f0e250c>] (snd_open [snd]) from [<802ce590>] (chrdev_open+0xac/0x188)
[  139.205465] [<802ce590>] (chrdev_open) from [<802c57b4>] (do_dentry_open+0x10c/0x314)
[  139.221347] [<802c57b4>] (do_dentry_open) from [<802c6ba8>] (vfs_open+0x5c/0x88)
[  139.236788] [<802c6ba8>] (vfs_open) from [<802d9a68>] (path_openat+0x368/0x944)
[  139.248270] [<802d9a68>] (path_openat) from [<802dacd4>] (do_filp_open+0x70/0xc4)
[  139.263731] [<802dacd4>] (do_filp_open) from [<802c6f70>] (do_sys_open+0x110/0x1d4)
[  139.279378] [<802c6f70>] (do_sys_open) from [<802c7060>] (SyS_open+0x2c/0x30)
[  139.290647] [<802c7060>] (SyS_open) from [<801082c0>] (ret_fast_syscall+0x0/0x28)
[  139.306021] Code: e3c3303f e5932004 e2822001 e5832004 (e5943000)
[  139.316265] ---[ end trace 7f3f7f6193b663ed ]---
[  139.324956] note: aplay[463] exited with preempt_count 1
~~~~

Signed-off-by: Kirill Marinushkin <k.marinushkin@gmail.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Ray Jui <rjui@broadcom.com>
Cc: Scott Branden <sbranden@broadcom.com>
Cc: bcm-kernel-feedback-list at broadcom.com
Cc: Michael Zoran <mzoran@crowfest.net>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: linux-rpi-kernel at lists.infradead.org
Cc: linux-arm-kernel at lists.infradead.org
Cc: devel at driverdev.osuosl.org
Cc: linux-kernel at vger.kernel.org
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vc04_services/bcm2835-audio/bcm2835.c |   54 ++++++++----------
 1 file changed, 25 insertions(+), 29 deletions(-)

--- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835.c
+++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835.c
@@ -25,6 +25,10 @@ MODULE_PARM_DESC(enable_compat_alsa,
 static void snd_devm_unregister_child(struct device *dev, void *res)
 {
 	struct device *childdev = *(struct device **)res;
+	struct bcm2835_chip *chip = dev_get_drvdata(childdev);
+	struct snd_card *card = chip->card;
+
+	snd_card_free(card);
 
 	device_unregister(childdev);
 }
@@ -50,6 +54,13 @@ static int snd_devm_add_child(struct dev
 	return 0;
 }
 
+static void snd_bcm2835_release(struct device *dev)
+{
+	struct bcm2835_chip *chip = dev_get_drvdata(dev);
+
+	kfree(chip);
+}
+
 static struct device *
 snd_create_device(struct device *parent,
 		  struct device_driver *driver,
@@ -65,6 +76,7 @@ snd_create_device(struct device *parent,
 	device_initialize(device);
 	device->parent = parent;
 	device->driver = driver;
+	device->release = snd_bcm2835_release;
 
 	dev_set_name(device, "%s", name);
 
@@ -75,18 +87,19 @@ snd_create_device(struct device *parent,
 	return device;
 }
 
-static int snd_bcm2835_free(struct bcm2835_chip *chip)
-{
-	kfree(chip);
-	return 0;
-}
-
 /* component-destructor
  * (see "Management of Cards and Components")
  */
 static int snd_bcm2835_dev_free(struct snd_device *device)
 {
-	return snd_bcm2835_free(device->device_data);
+	struct bcm2835_chip *chip = device->device_data;
+	struct snd_card *card = chip->card;
+
+	/* TODO: free pcm, ctl */
+
+	snd_device_free(card, chip);
+
+	return 0;
 }
 
 /* chip-specific constructor
@@ -111,7 +124,7 @@ static int snd_bcm2835_create(struct snd
 
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
 	if (err) {
-		snd_bcm2835_free(chip);
+		kfree(chip);
 		return err;
 	}
 
@@ -119,31 +132,14 @@ static int snd_bcm2835_create(struct snd
 	return 0;
 }
 
-static void snd_devm_card_free(struct device *dev, void *res)
+static struct snd_card *snd_bcm2835_card_new(struct device *dev)
 {
-	struct snd_card *snd_card = *(struct snd_card **)res;
-
-	snd_card_free(snd_card);
-}
-
-static struct snd_card *snd_devm_card_new(struct device *dev)
-{
-	struct snd_card **dr;
 	struct snd_card *card;
 	int ret;
 
-	dr = devres_alloc(snd_devm_card_free, sizeof(*dr), GFP_KERNEL);
-	if (!dr)
-		return ERR_PTR(-ENOMEM);
-
 	ret = snd_card_new(dev, -1, NULL, THIS_MODULE, 0, &card);
-	if (ret) {
-		devres_free(dr);
+	if (ret)
 		return ERR_PTR(ret);
-	}
-
-	*dr = card;
-	devres_add(dev, dr);
 
 	return card;
 }
@@ -260,7 +256,7 @@ static int snd_add_child_device(struct d
 		return PTR_ERR(child);
 	}
 
-	card = snd_devm_card_new(child);
+	card = snd_bcm2835_card_new(child);
 	if (IS_ERR(card)) {
 		dev_err(child, "Failed to create card");
 		return PTR_ERR(card);
@@ -302,7 +298,7 @@ static int snd_add_child_device(struct d
 		return err;
 	}
 
-	dev_set_drvdata(child, card);
+	dev_set_drvdata(child, chip);
 	dev_info(child, "card created with %d channels\n", numchans);
 
 	return 0;

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox