LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] powerpc/audit: Simplify syscall_get_arch()
From: Christophe Leroy @ 2021-08-20  9:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Make use of is_32bit_task() and CONFIG_CPU_LITTLE_ENDIAN
to simplify syscall_get_arch().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/syscall.h | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index ba0f88f3a30d..ac766037e8a1 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -116,16 +116,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 static inline int syscall_get_arch(struct task_struct *task)
 {
-	int arch;
-
-	if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT))
-		arch = AUDIT_ARCH_PPC64;
+	if (is_32bit_task())
+		return AUDIT_ARCH_PPC;
+	else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		return AUDIT_ARCH_PPC64LE;
 	else
-		arch = AUDIT_ARCH_PPC;
-
-#ifdef __LITTLE_ENDIAN__
-	arch |= __AUDIT_ARCH_LE;
-#endif
-	return arch;
+		return AUDIT_ARCH_PPC64;
 }
 #endif	/* _ASM_SYSCALL_H */
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/audit: Avoid unneccessary #ifdef in syscall_get_arguments()
From: Christophe Leroy @ 2021-08-20  9:28 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Use is_32bit_task() which already handles CONFIG_COMPAT.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/syscall.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index ac766037e8a1..c60ebd04b2ed 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,10 +90,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 	unsigned long val, mask = -1UL;
 	unsigned int n = 6;
 
-#ifdef CONFIG_COMPAT
-	if (test_tsk_thread_flag(task, TIF_32BIT))
+	if (is_32bit_task())
 		mask = 0xffffffff;
-#endif
+
 	while (n--) {
 		if (n == 0)
 			val = regs->orig_gpr3;
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH] powerpc/audit: Convert powerpc to AUDIT_ARCH_COMPAT_GENERIC
From: Christophe Leroy @ 2021-08-20  9:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

No time to finalise commit description and testing before the
weekend, sending out as RFC to eventually get some feedback.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/Kconfig               |  5 +-
 arch/powerpc/kernel/Makefile       |  3 --
 arch/powerpc/kernel/audit.c        | 84 ------------------------------
 arch/powerpc/kernel/compat_audit.c | 44 ----------------
 4 files changed, 1 insertion(+), 135 deletions(-)
 delete mode 100644 arch/powerpc/kernel/audit.c
 delete mode 100644 arch/powerpc/kernel/compat_audit.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 663766fbf505..5472358609d2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -163,6 +163,7 @@ config PPC
 	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WEAK_RELEASE_ACQUIRE
+	select AUDIT_ARCH_COMPAT_GENERIC
 	select BINFMT_ELF
 	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS
@@ -316,10 +317,6 @@ config GENERIC_TBSYNC
 	bool
 	default y if PPC32 && SMP
 
-config AUDIT_ARCH
-	bool
-	default y
-
 config GENERIC_BUG
 	bool
 	default y
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7be36c1e1db6..825121eba3c2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -125,9 +125,6 @@ obj-$(CONFIG_PCI)		+= pci_$(BITS).o $(pci64-y) \
 				   pci-common.o pci_of_scan.o
 obj-$(CONFIG_PCI_MSI)		+= msi.o
 
-obj-$(CONFIG_AUDIT)		+= audit.o
-obj64-$(CONFIG_AUDIT)		+= compat_audit.o
-
 obj-$(CONFIG_PPC_IO_WORKAROUNDS)	+= io-workarounds.o
 
 obj-y				+= trace/
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
deleted file mode 100644
index a2dddd7f3d09..000000000000
--- a/arch/powerpc/kernel/audit.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/audit.h>
-#include <asm/unistd.h>
-
-static unsigned dir_class[] = {
-#include <asm-generic/audit_dir_write.h>
-~0U
-};
-
-static unsigned read_class[] = {
-#include <asm-generic/audit_read.h>
-~0U
-};
-
-static unsigned write_class[] = {
-#include <asm-generic/audit_write.h>
-~0U
-};
-
-static unsigned chattr_class[] = {
-#include <asm-generic/audit_change_attr.h>
-~0U
-};
-
-static unsigned signal_class[] = {
-#include <asm-generic/audit_signal.h>
-~0U
-};
-
-int audit_classify_arch(int arch)
-{
-#ifdef CONFIG_PPC64
-	if (arch == AUDIT_ARCH_PPC)
-		return 1;
-#endif
-	return 0;
-}
-
-int audit_classify_syscall(int abi, unsigned syscall)
-{
-#ifdef CONFIG_PPC64
-	extern int ppc32_classify_syscall(unsigned);
-	if (abi == AUDIT_ARCH_PPC)
-		return ppc32_classify_syscall(syscall);
-#endif
-	switch(syscall) {
-	case __NR_open:
-		return 2;
-	case __NR_openat:
-		return 3;
-	case __NR_socketcall:
-		return 4;
-	case __NR_execve:
-		return 5;
-	default:
-		return 0;
-	}
-}
-
-static int __init audit_classes_init(void)
-{
-#ifdef CONFIG_PPC64
-	extern __u32 ppc32_dir_class[];
-	extern __u32 ppc32_write_class[];
-	extern __u32 ppc32_read_class[];
-	extern __u32 ppc32_chattr_class[];
-	extern __u32 ppc32_signal_class[];
-	audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class);
-	audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class);
-	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class);
-	audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class);
-	audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class);
-#endif
-	audit_register_class(AUDIT_CLASS_WRITE, write_class);
-	audit_register_class(AUDIT_CLASS_READ, read_class);
-	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
-	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
-	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
-	return 0;
-}
-
-__initcall(audit_classes_init);
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
deleted file mode 100644
index 55c6ccda0a85..000000000000
--- a/arch/powerpc/kernel/compat_audit.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#undef __powerpc64__
-#include <asm/unistd.h>
-
-unsigned ppc32_dir_class[] = {
-#include <asm-generic/audit_dir_write.h>
-~0U
-};
-
-unsigned ppc32_chattr_class[] = {
-#include <asm-generic/audit_change_attr.h>
-~0U
-};
-
-unsigned ppc32_write_class[] = {
-#include <asm-generic/audit_write.h>
-~0U
-};
-
-unsigned ppc32_read_class[] = {
-#include <asm-generic/audit_read.h>
-~0U
-};
-
-unsigned ppc32_signal_class[] = {
-#include <asm-generic/audit_signal.h>
-~0U
-};
-
-int ppc32_classify_syscall(unsigned syscall)
-{
-	switch(syscall) {
-	case __NR_open:
-		return 2;
-	case __NR_openat:
-		return 3;
-	case __NR_socketcall:
-		return 4;
-	case __NR_execve:
-		return 5;
-	default:
-		return 1;
-	}
-}
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v8 2/3] tty: hvc: pass DMA capable memory to put_chars()
From: Xianting TIan @ 2021-08-20  8:43 UTC (permalink / raw)
  To: Daniel Axtens, gregkh, jirislaby, amit, arnd, osandov
  Cc: shile.zhang, linuxppc-dev, linux-kernel, virtualization
In-Reply-To: <87pmu8ehkk.fsf@linkitivity.dja.id.au>


在 2021/8/20 下午2:49, Daniel Axtens 写道:
> Xianting Tian <xianting.tian@linux.alibaba.com> writes:
>
>> As well known, hvc backend driver(eg, virtio-console) can register its
>> operations to hvc framework. The operations can contain put_chars(),
>> get_chars() and so on.
>>
>> Some hvc backend may do dma in its operations. eg, put_chars() of
>> virtio-console. But in the code of hvc framework, it may pass DMA
>> incapable memory to put_chars() under a specific configuration, which
>> is explained in commit c4baad5029(virtio-console: avoid DMA from stack):
> We could also run into issues on powerpc where Andrew is working on
> adding vmap-stack but the opal hvc driver assumes that it is passed a
> buffer which is not in vmalloc space but in the linear mapping. So it
> would be good to fix this (or more clearly document what drivers can
> expect).
>
>> 1, c[] is on stack,
>>     hvc_console_print():
>> 	char c[N_OUTBUF] __ALIGNED__;
>> 	cons_ops[index]->put_chars(vtermnos[index], c, i);
>> 2, ch is on stack,
>>     static void hvc_poll_put_char(,,char ch)
>>     {
>> 	struct tty_struct *tty = driver->ttys[0];
>> 	struct hvc_struct *hp = tty->driver_data;
>> 	int n;
>>
>> 	do {
>> 		n = hp->ops->put_chars(hp->vtermno, &ch, 1);
>> 	} while (n <= 0);
>>     }
>>
>> Commit c4baad5029 is just the fix to avoid DMA from stack memory, which
>> is passed to virtio-console by hvc framework in above code. But I think
>> the fix is aggressive, it directly uses kmemdup() to alloc new buffer
>> from kmalloc area and do memcpy no matter the memory is in kmalloc area
>> or not. But most importantly, it should better be fixed in the hvc
>> framework, by changing it to never pass stack memory to the put_chars()
>> function in the first place. Otherwise, we still face the same issue if
>> a new hvc backend using dma added in the future.
>>
>> In this patch, we make 'char out_buf[N_OUTBUF]' and 'chat out_ch' part
>> of 'struct hvc_struct', so both two buf are no longer the stack memory.
>> we can use it in above two cases separately.
>>
>> Introduce another array(cons_outbufs[]) for buffer pointers next to
>> the cons_ops[] and vtermnos[] arrays. With the array, we can easily find
>> the buffer, instead of traversing hp list.
>>
>> With the patch, we can remove the fix c4baad5029.
>>
>> Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
>> Reviewed-by: Shile Zhang <shile.zhang@linux.alibaba.com>
>>   struct hvc_struct {
>>   	struct tty_port port;
>>   	spinlock_t lock;
>>   	int index;
>>   	int do_wakeup;
>> -	char *outbuf;
>> -	int outbuf_size;
>>   	int n_outbuf;
>>   	uint32_t vtermno;
>>   	const struct hv_ops *ops;
>> @@ -48,6 +56,10 @@ struct hvc_struct {
>>   	struct work_struct tty_resize;
>>   	struct list_head next;
>>   	unsigned long flags;
>> +	char out_ch;
>> +	char out_buf[N_OUTBUF] __ALIGNED__;
>> +	int outbuf_size;
>> +	char outbuf[0] __ALIGNED__;
> I'm trying to understand this patch but I am finding it very difficult
> to understand what the difference between `out_buf` and `outbuf`
> (without the underscore) is supposed to be. `out_buf` is statically
> sized and the size of `outbuf` is supposed to depend on the arguments to
> hvc_alloc(), but I can't quite figure out what the roles of each one are
> and their names are confusingly similiar!
>
> I looked briefly at the older revisions of the series but it didn't make
> things much clearer.
>
> Could you give them clearer names?

thanks for the comments,

It is indeed not easy to understand by the name. I will change it to a 
proper name if we have next version patch.

Jiri Slaby is worring about the performance, because we need add two 
locks to protect 'out_ch' and 'out_buf' separately, the origin on-stack 
buffer is lockless.

I don't know whether this solution can be accepted, just waiting for 
Jiri's further commtents.

>
> Also, looking at Documentation/process/deprecated.rst, it looks like
> maybe we want to use a 'flexible array member' instead:
>
> .. note:: If you are using struct_size() on a structure containing a zero-length
>          or a one-element array as a trailing array member, please refactor such
>          array usage and switch to a `flexible array member
>          <#zero-length-and-one-element-arrays>`_ instead.
>
> I think we want:
thanks, we should use [], not [0].
>
>> +	char outbuf[] __ALIGNED__;
> Kind regards,
> Daniel

^ permalink raw reply

* Re: [PATCH v2 57/63] powerpc/signal32: Use struct_group() to zero spe regs
From: Christophe Leroy @ 2021-08-20  7:53 UTC (permalink / raw)
  To: Michael Ellerman, Kees Cook, linux-kernel
  Cc: kernel test robot, Rasmus Villemoes, Greg Kroah-Hartman,
	linux-staging, linux-wireless, Gustavo A. R. Silva, dri-devel,
	linux-block, clang-built-linux, netdev, Paul Mackerras,
	linux-hardening, Sudeep Holla, Andrew Morton, linuxppc-dev,
	linux-kbuild
In-Reply-To: <877dggeesw.fsf@mpe.ellerman.id.au>



Le 20/08/2021 à 09:49, Michael Ellerman a écrit :
> Kees Cook <keescook@chromium.org> writes:
>> In preparation for FORTIFY_SOURCE performing compile-time and run-time
>> field bounds checking for memset(), avoid intentionally writing across
>> neighboring fields.
>>
>> Add a struct_group() for the spe registers so that memset() can correctly reason
>> about the size:
>>
>>     In function 'fortify_memset_chk',
>>         inlined from 'restore_user_regs.part.0' at arch/powerpc/kernel/signal_32.c:539:3:
>>>> include/linux/fortify-string.h:195:4: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning]
>>       195 |    __write_overflow_field();
>>           |    ^~~~~~~~~~~~~~~~~~~~~~~~
>>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Cc: Sudeep Holla <sudeep.holla@arm.com>
>> Cc: linuxppc-dev@lists.ozlabs.org
>> Reported-by: kernel test robot <lkp@intel.com>
>> Signed-off-by: Kees Cook <keescook@chromium.org>
>> ---
>>   arch/powerpc/include/asm/processor.h | 6 ++++--
>>   arch/powerpc/kernel/signal_32.c      | 6 +++---
>>   2 files changed, 7 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
>> index f348e564f7dd..05dc567cb9a8 100644
>> --- a/arch/powerpc/include/asm/processor.h
>> +++ b/arch/powerpc/include/asm/processor.h
>> @@ -191,8 +191,10 @@ struct thread_struct {
>>   	int		used_vsr;	/* set if process has used VSX */
>>   #endif /* CONFIG_VSX */
>>   #ifdef CONFIG_SPE
>> -	unsigned long	evr[32];	/* upper 32-bits of SPE regs */
>> -	u64		acc;		/* Accumulator */
>> +	struct_group(spe,
>> +		unsigned long	evr[32];	/* upper 32-bits of SPE regs */
>> +		u64		acc;		/* Accumulator */
>> +	);
>>   	unsigned long	spefscr;	/* SPE & eFP status */
>>   	unsigned long	spefscr_last;	/* SPEFSCR value on last prctl
>>   					   call or trap return */
>> diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
>> index 0608581967f0..77b86caf5c51 100644
>> --- a/arch/powerpc/kernel/signal_32.c
>> +++ b/arch/powerpc/kernel/signal_32.c
>> @@ -532,11 +532,11 @@ static long restore_user_regs(struct pt_regs *regs,
>>   	regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
>>   	if (msr & MSR_SPE) {
>>   		/* restore spe registers from the stack */
>> -		unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
>> -				      ELF_NEVRREG * sizeof(u32), failed);
>> +		unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
>> +				      sizeof(current->thread.spe), failed);
> 
> This makes me nervous, because the ABI is that we copy ELF_NEVRREG *
> sizeof(u32) bytes, not whatever sizeof(current->thread.spe) happens to
> be.
> 
> ie. if we use sizeof an inadvertent change to the fields in
> thread_struct could change how many bytes we copy out to userspace,
> which would be an ABI break.
> 
> And that's not that hard to do, because it's not at all obvious that the
> size and layout of fields in thread_struct affects the user ABI.
> 
> At the same time we don't want to copy the right number of bytes but
> the wrong content, so from that point of view using sizeof is good :)
> 
> The way we handle it in ptrace is to have BUILD_BUG_ON()s to verify that
> things match up, so maybe we should do that here too.
> 
> ie. add:
> 
> 	BUILD_BUG_ON(sizeof(current->thread.spe) == ELF_NEVRREG * sizeof(u32));

You mean != I guess ?


> 
> 
> Not sure if you are happy doing that as part of this patch. I can always
> do it later if not.
> 
> cheers
> 

^ permalink raw reply

* Re: [PATCH v2 57/63] powerpc/signal32: Use struct_group() to zero spe regs
From: Michael Ellerman @ 2021-08-20  7:49 UTC (permalink / raw)
  To: Kees Cook, linux-kernel
  Cc: Kees Cook, Rasmus Villemoes, Greg Kroah-Hartman, linux-staging,
	linux-wireless, Gustavo A. R. Silva, linux-hardening, linux-block,
	clang-built-linux, netdev, Paul Mackerras, dri-devel,
	Sudeep Holla, Andrew Morton, linuxppc-dev, linux-kbuild,
	kernel test robot
In-Reply-To: <20210818060533.3569517-58-keescook@chromium.org>

Kees Cook <keescook@chromium.org> writes:
> In preparation for FORTIFY_SOURCE performing compile-time and run-time
> field bounds checking for memset(), avoid intentionally writing across
> neighboring fields.
>
> Add a struct_group() for the spe registers so that memset() can correctly reason
> about the size:
>
>    In function 'fortify_memset_chk',
>        inlined from 'restore_user_regs.part.0' at arch/powerpc/kernel/signal_32.c:539:3:
>>> include/linux/fortify-string.h:195:4: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning]
>      195 |    __write_overflow_field();
>          |    ^~~~~~~~~~~~~~~~~~~~~~~~
>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Cc: Sudeep Holla <sudeep.holla@arm.com>
> Cc: linuxppc-dev@lists.ozlabs.org
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
>  arch/powerpc/include/asm/processor.h | 6 ++++--
>  arch/powerpc/kernel/signal_32.c      | 6 +++---
>  2 files changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
> index f348e564f7dd..05dc567cb9a8 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -191,8 +191,10 @@ struct thread_struct {
>  	int		used_vsr;	/* set if process has used VSX */
>  #endif /* CONFIG_VSX */
>  #ifdef CONFIG_SPE
> -	unsigned long	evr[32];	/* upper 32-bits of SPE regs */
> -	u64		acc;		/* Accumulator */
> +	struct_group(spe,
> +		unsigned long	evr[32];	/* upper 32-bits of SPE regs */
> +		u64		acc;		/* Accumulator */
> +	);
>  	unsigned long	spefscr;	/* SPE & eFP status */
>  	unsigned long	spefscr_last;	/* SPEFSCR value on last prctl
>  					   call or trap return */
> diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
> index 0608581967f0..77b86caf5c51 100644
> --- a/arch/powerpc/kernel/signal_32.c
> +++ b/arch/powerpc/kernel/signal_32.c
> @@ -532,11 +532,11 @@ static long restore_user_regs(struct pt_regs *regs,
>  	regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
>  	if (msr & MSR_SPE) {
>  		/* restore spe registers from the stack */
> -		unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
> -				      ELF_NEVRREG * sizeof(u32), failed);
> +		unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
> +				      sizeof(current->thread.spe), failed);

This makes me nervous, because the ABI is that we copy ELF_NEVRREG *
sizeof(u32) bytes, not whatever sizeof(current->thread.spe) happens to
be.

ie. if we use sizeof an inadvertent change to the fields in
thread_struct could change how many bytes we copy out to userspace,
which would be an ABI break.

And that's not that hard to do, because it's not at all obvious that the
size and layout of fields in thread_struct affects the user ABI.

At the same time we don't want to copy the right number of bytes but
the wrong content, so from that point of view using sizeof is good :)

The way we handle it in ptrace is to have BUILD_BUG_ON()s to verify that
things match up, so maybe we should do that here too.

ie. add:

	BUILD_BUG_ON(sizeof(current->thread.spe) == ELF_NEVRREG * sizeof(u32));


Not sure if you are happy doing that as part of this patch. I can always
do it later if not.

cheers

^ permalink raw reply

* Re: [PATCH v8 2/3] tty: hvc: pass DMA capable memory to put_chars()
From: Daniel Axtens @ 2021-08-20  6:49 UTC (permalink / raw)
  To: Xianting Tian, gregkh, jirislaby, amit, arnd, osandov
  Cc: Xianting Tian, shile.zhang, linuxppc-dev, linux-kernel,
	virtualization
In-Reply-To: <20210818082122.166881-3-xianting.tian@linux.alibaba.com>

Xianting Tian <xianting.tian@linux.alibaba.com> writes:

> As well known, hvc backend driver(eg, virtio-console) can register its
> operations to hvc framework. The operations can contain put_chars(),
> get_chars() and so on.
>
> Some hvc backend may do dma in its operations. eg, put_chars() of
> virtio-console. But in the code of hvc framework, it may pass DMA
> incapable memory to put_chars() under a specific configuration, which
> is explained in commit c4baad5029(virtio-console: avoid DMA from stack):

We could also run into issues on powerpc where Andrew is working on
adding vmap-stack but the opal hvc driver assumes that it is passed a
buffer which is not in vmalloc space but in the linear mapping. So it
would be good to fix this (or more clearly document what drivers can
expect).

> 1, c[] is on stack,
>    hvc_console_print():
> 	char c[N_OUTBUF] __ALIGNED__;
> 	cons_ops[index]->put_chars(vtermnos[index], c, i);
> 2, ch is on stack,
>    static void hvc_poll_put_char(,,char ch)
>    {
> 	struct tty_struct *tty = driver->ttys[0];
> 	struct hvc_struct *hp = tty->driver_data;
> 	int n;
>
> 	do {
> 		n = hp->ops->put_chars(hp->vtermno, &ch, 1);
> 	} while (n <= 0);
>    }
>
> Commit c4baad5029 is just the fix to avoid DMA from stack memory, which
> is passed to virtio-console by hvc framework in above code. But I think
> the fix is aggressive, it directly uses kmemdup() to alloc new buffer
> from kmalloc area and do memcpy no matter the memory is in kmalloc area
> or not. But most importantly, it should better be fixed in the hvc
> framework, by changing it to never pass stack memory to the put_chars()
> function in the first place. Otherwise, we still face the same issue if
> a new hvc backend using dma added in the future.
>
> In this patch, we make 'char out_buf[N_OUTBUF]' and 'chat out_ch' part
> of 'struct hvc_struct', so both two buf are no longer the stack memory.
> we can use it in above two cases separately.
>
> Introduce another array(cons_outbufs[]) for buffer pointers next to
> the cons_ops[] and vtermnos[] arrays. With the array, we can easily find
> the buffer, instead of traversing hp list.
>
> With the patch, we can remove the fix c4baad5029.
>
> Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
> Reviewed-by: Shile Zhang <shile.zhang@linux.alibaba.com>

>  struct hvc_struct {
>  	struct tty_port port;
>  	spinlock_t lock;
>  	int index;
>  	int do_wakeup;
> -	char *outbuf;
> -	int outbuf_size;
>  	int n_outbuf;
>  	uint32_t vtermno;
>  	const struct hv_ops *ops;
> @@ -48,6 +56,10 @@ struct hvc_struct {
>  	struct work_struct tty_resize;
>  	struct list_head next;
>  	unsigned long flags;
> +	char out_ch;
> +	char out_buf[N_OUTBUF] __ALIGNED__;
> +	int outbuf_size;
> +	char outbuf[0] __ALIGNED__;

I'm trying to understand this patch but I am finding it very difficult
to understand what the difference between `out_buf` and `outbuf`
(without the underscore) is supposed to be. `out_buf` is statically
sized and the size of `outbuf` is supposed to depend on the arguments to
hvc_alloc(), but I can't quite figure out what the roles of each one are
and their names are confusingly similiar!

I looked briefly at the older revisions of the series but it didn't make
things much clearer.

Could you give them clearer names?

Also, looking at Documentation/process/deprecated.rst, it looks like
maybe we want to use a 'flexible array member' instead:

.. note:: If you are using struct_size() on a structure containing a zero-length
        or a one-element array as a trailing array member, please refactor such
        array usage and switch to a `flexible array member
        <#zero-length-and-one-element-arrays>`_ instead.

I think we want:

> +	char outbuf[] __ALIGNED__;

Kind regards,
Daniel

^ permalink raw reply

* [PATCH] powerpc: Avoid link stack corruption in {add_}reloc_offset
From: Christophe Leroy @ 2021-08-20  5:16 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

reloc_offset() / add_reloc_offset() used bl;mflr to get code
position.

Use bcl 20,31,+4 instead of bl in order to preserve link stack.

See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption
in __get_datapage()") for details.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/misc.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 5be96feccb55..acd557637a3c 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -29,7 +29,7 @@ _GLOBAL(reloc_offset)
 	li	r3, 0
 _GLOBAL(add_reloc_offset)
 	mflr	r0
-	bl	1f
+	bcl	20,31,1f
 1:	mflr	r5
 	PPC_LL	r4,(2f-1b)(r5)
 	subf	r5,r4,r5
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/32: indirect function call use bctrl rather than blrl in ret_from_kernel_thread
From: Christophe Leroy @ 2021-08-20  5:16 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Copied from commit 89bbe4c798bc ("powerpc/64: indirect function call
use bctrl rather than blrl in ret_from_kernel_thread")

blrl is not recommended to use as an indirect function call, as it may
corrupt the link stack predictor.

This is not a performance critical path but this should be fixed for
consistency.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/entry_32.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0273a1349006..61fdd53cdd9a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -161,10 +161,10 @@ ret_from_fork:
 ret_from_kernel_thread:
 	REST_NVGPRS(r1)
 	bl	schedule_tail
-	mtlr	r14
+	mtctr	r14
 	mr	r3,r15
 	PPC440EP_ERR42
-	blrl
+	bctrl
 	li	r3,0
 	b	ret_from_syscall
 
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v2 2/2] powerpc: rectify selection to ARCH_ENABLE_SPLIT_PMD_PTLOCK
From: Michael Ellerman @ 2021-08-20  4:25 UTC (permalink / raw)
  To: Daniel Axtens, Lukas Bulwahn, Paul Mackerras,
	Benjamin Herrenschmidt, Michael Neuling, Anshuman Khandual,
	kvm-ppc, linuxppc-dev
  Cc: Lukas Bulwahn, kernel-janitors, linux-kernel, stable
In-Reply-To: <87pmu99e4j.fsf@dja-thinkpad.axtens.net>

Daniel Axtens <dja@axtens.net> writes:
> Lukas Bulwahn <lukas.bulwahn@gmail.com> writes:
>
>> Commit 66f24fa766e3 ("mm: drop redundant ARCH_ENABLE_SPLIT_PMD_PTLOCK")
>> selects the non-existing config ARCH_ENABLE_PMD_SPLIT_PTLOCK in
>> ./arch/powerpc/platforms/Kconfig.cputype, but clearly it intends to select
>> ARCH_ENABLE_SPLIT_PMD_PTLOCK here (notice the word swapping!), as this
>> commit does select that for all other architectures.
>>
>> Rectify selection to ARCH_ENABLE_SPLIT_PMD_PTLOCK instead.
>>
>
> Yikes, yes, 66f24fa766e3 does seem to have got that wrong. It looks like
> that went into 5.13.
>
> I think we want to specifically target this for stable so that we don't
> lose the perfomance and scalability benefits of split pmd ptlocks:
>
> Cc: stable@vger.kernel.org # v5.13+
>
> (I don't think you need to do another revision for this, I think mpe
> could add it when merging.)

Yeah. I rewrote the change log a bit to make it clear this is a bug fix,
not a harmless cleanup.

cheers


  powerpc: Re-enable ARCH_ENABLE_SPLIT_PMD_PTLOCK
  
  Commit 66f24fa766e3 ("mm: drop redundant ARCH_ENABLE_SPLIT_PMD_PTLOCK")
  broke PMD split page table lock for powerpc.
  
  It selects the non-existent config ARCH_ENABLE_PMD_SPLIT_PTLOCK in
  arch/powerpc/platforms/Kconfig.cputype, but clearly intended to
  select ARCH_ENABLE_SPLIT_PMD_PTLOCK (notice the word swapping!), as
  that commit did for all other architectures.
  
  Fix it by selecting the correct symbol ARCH_ENABLE_SPLIT_PMD_PTLOCK.
  
  Fixes: 66f24fa766e3 ("mm: drop redundant ARCH_ENABLE_SPLIT_PMD_PTLOCK")
  Cc: stable@vger.kernel.org # v5.13+
  Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
  Reviewed-by: Daniel Axtens <dja@axtens.net>
  [mpe: Reword change log to make it clear this is a bug fix]
  Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  Link: https://lore.kernel.org/r/20210819113954.17515-3-lukas.bulwahn@gmail.com

^ permalink raw reply

* [PATCH linux-next] ps3: remove unneeded semicolon
From: jing yangyang @ 2021-08-20  2:49 UTC (permalink / raw)
  To: Geoff Levand
  Cc: jing yangyang, Zeal Robot, linux-kernel, Paul Mackerras,
	linuxppc-dev

Eliminate the following coccicheck warning:

./arch/powerpc/platforms/ps3/system-bus.c:606:2-3: Unneeded semicolon
./arch/powerpc/platforms/ps3/system-bus.c:765:2-3: Unneeded semicolon

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: jing yangyang <jing.yangyang@zte.com.cn>
---
 arch/powerpc/platforms/ps3/system-bus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index c8b50fe..b637bf2 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
 	default:
 		/* not happned */
 		BUG();
-	};
+	}
 	result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
 			     &bus_addr, iopte_flag);
 
@@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
 		break;
 	default:
 		BUG();
-	};
+	}
 
 	dev->core.of_node = NULL;
 	set_dev_node(&dev->core, 0);
-- 
1.8.3.1



^ permalink raw reply related

* [PATCH linux-next] macintosh: fix warning comparing pointer to 0
From: jing yangyang @ 2021-08-20  2:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Zeal Robot, linuxppc-dev, linux-kernel, jing yangyang

Fix the following coccicheck warning:

./drivers/macintosh/windfarm_pm91.c:152:12-13:WARNING comparing pointer to 0

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: jing yangyang <jing.yangyang@zte.com.cn>
---
 drivers/macintosh/windfarm_pm91.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index 3f346af..568f8a2 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -149,7 +149,7 @@ static void wf_smu_create_cpu_fans(void)
 
 	/* First, locate the PID params in SMU SBD */
 	hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL);
-	if (hdr == 0) {
+	if (!hdr) {
 		printk(KERN_WARNING "windfarm: CPU PID fan config not found "
 		       "max fan speed\n");
 		goto fail;
-- 
1.8.3.1



^ permalink raw reply related

* [powerpc:fixes-test] BUILD SUCCESS 9f7853d7609d59172eecfc5e7ccf503bc1b690bd
From: kernel test robot @ 2021-08-20  0:18 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: 9f7853d7609d59172eecfc5e7ccf503bc1b690bd  powerpc/mm: Fix set_memory_*() against concurrent accesses

elapsed time: 1357m

configs tested: 129
configs skipped: 123

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
i386                 randconfig-c001-20210819
i386                 randconfig-c001-20210818
powerpc                      pcm030_defconfig
arm                         socfpga_defconfig
m68k                       m5275evb_defconfig
sh                           se7619_defconfig
powerpc                  iss476-smp_defconfig
mips                         rt305x_defconfig
arc                              allyesconfig
powerpc                 linkstation_defconfig
arm                       netwinder_defconfig
alpha                               defconfig
arm                         hackkit_defconfig
mips                  maltasmvp_eva_defconfig
mips                            ar7_defconfig
x86_64                              defconfig
arm                          ixp4xx_defconfig
powerpc                 mpc834x_mds_defconfig
powerpc                         ps3_defconfig
sh                           se7724_defconfig
parisc                generic-32bit_defconfig
powerpc                      pmac32_defconfig
sh                          rsk7203_defconfig
arm                           stm32_defconfig
powerpc                       ppc64_defconfig
mips                 decstation_r4k_defconfig
powerpc                    sam440ep_defconfig
powerpc                     pq2fads_defconfig
m68k                        m5407c3_defconfig
sh                               j2_defconfig
m68k                         amcore_defconfig
arm                           viper_defconfig
riscv                    nommu_k210_defconfig
powerpc                      ep88xc_defconfig
arm                        cerfcube_defconfig
arm                       aspeed_g4_defconfig
nios2                         10m50_defconfig
m68k                       m5249evb_defconfig
m68k                       bvme6000_defconfig
microblaze                          defconfig
um                                  defconfig
x86_64                           allyesconfig
mips                          rm200_defconfig
sh                   sh7770_generic_defconfig
arm                          iop32x_defconfig
powerpc                        icon_defconfig
x86_64                            allnoconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
nds32                             allnoconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
s390                             allmodconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20210818
x86_64               randconfig-a006-20210818
x86_64               randconfig-a003-20210818
x86_64               randconfig-a005-20210818
x86_64               randconfig-a002-20210818
x86_64               randconfig-a001-20210818
i386                 randconfig-a004-20210818
i386                 randconfig-a006-20210818
i386                 randconfig-a002-20210818
i386                 randconfig-a001-20210818
i386                 randconfig-a003-20210818
i386                 randconfig-a005-20210818
i386                 randconfig-a015-20210819
i386                 randconfig-a011-20210819
i386                 randconfig-a014-20210819
i386                 randconfig-a013-20210819
i386                 randconfig-a016-20210819
i386                 randconfig-a012-20210819
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                    rhel-8.3-kselftests
um                           x86_64_defconfig
um                             i386_defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
i386                 randconfig-c001-20210818
x86_64               randconfig-a004-20210819
x86_64               randconfig-a006-20210819
x86_64               randconfig-a003-20210819
x86_64               randconfig-a002-20210819
x86_64               randconfig-a005-20210819
x86_64               randconfig-a001-20210819
i386                 randconfig-a004-20210819
i386                 randconfig-a006-20210819
i386                 randconfig-a001-20210819
i386                 randconfig-a002-20210819
i386                 randconfig-a003-20210819
i386                 randconfig-a005-20210819
x86_64               randconfig-a013-20210818
x86_64               randconfig-a011-20210818
x86_64               randconfig-a012-20210818
x86_64               randconfig-a016-20210818
x86_64               randconfig-a014-20210818
x86_64               randconfig-a015-20210818

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [Bug 213079] [bisected] IRQ problems and crashes on a PowerMac G5 with 5.12.3
From: bugzilla-daemon @ 2021-08-20  0:13 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213079-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213079

Erhard F. (erhard_f@mailbox.org) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
 Attachment #297439|0                           |1
        is obsolete|                            |

--- Comment #17 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 298373
  --> https://bugzilla.kernel.org/attachment.cgi?id=298373&action=edit
kernel .config (5.14-rc6, PowerMac G5 11,2)

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [Bug 213079] [bisected] IRQ problems and crashes on a PowerMac G5 with 5.12.3
From: bugzilla-daemon @ 2021-08-20  0:12 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213079-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213079

Erhard F. (erhard_f@mailbox.org) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
 Attachment #297473|0                           |1
        is obsolete|                            |

--- Comment #16 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 298371
  --> https://bugzilla.kernel.org/attachment.cgi?id=298371&action=edit
dmesg (5.14-rc6, PowerMac G5 11,2)

As there is a fix now for bug #213803 I was able to build v5.14-rc6 and gave it
a testride. Looks like the issue persists:

[...]
irq 63: nobody cared (try booting with the "irqpoll" option)
CPU: 0 PID: 10732 Comm: emerge Tainted: G        W        
5.14.0-rc6-PowerMacG5+ #2
Call Trace:
[c00000000fff7af0] [c00000000054de24] .dump_stack_lvl+0x98/0xe0 (unreliable)
[c00000000fff7b80] [c0000000000e1724] .__report_bad_irq+0x34/0xf0
[c00000000fff7c20] [c0000000000e160c] .note_interrupt+0x258/0x300
[c00000000fff7ce0] [c0000000000dd840] .handle_irq_event_percpu+0x5c/0x88
[c00000000fff7d70] [c0000000000dd8b0] .handle_irq_event+0x44/0x70
[c00000000fff7e00] [c0000000000e2d34] .handle_fasteoi_irq+0xac/0x158
[c00000000fff7ea0] [c0000000000dc8bc] .handle_irq_desc+0x34/0x54
[c00000000fff7f10] [c000000000012058] .__do_irq+0x15c/0x238
[c00000000fff7f90] [c000000000012978] .__do_IRQ+0xac/0xb4
[c00000001e9cfcf0] [c00000001e9cfd90] 0xc00000001e9cfd90
[c00000001e9cfd90] [c000000000012ac4] .do_IRQ+0x144/0x194
[c00000001e9cfe10] [c000000000008050]
hardware_interrupt_common_virt+0x210/0x220
--- interrupt: 500 at 0x3fffb9b25d9c
NIP:  00003fffb9b25d9c LR: 00003fffb9b2811c CTR: 00003fffb9b25d9c
REGS: c00000001e9cfe80 TRAP: 0500   Tainted: G        W         
(5.14.0-rc6-PowerMacG5+)
MSR:  900000000000f032 <SF,HV,EE,PR,FP,ME,IR,DR,RI>  CR: 22482822  XER:
20000000
IRQMASK: 0 
GPR00: 00003fffb9b28100 00003ffffd4e7550 00003fffb9ef6200 00003fffb7977790 
GPR04: 00003fffb7977790 00003fffb55e8b80 0000000000000000 00003fffb9eccac0 
GPR08: 00003fffb9b25d9c 0000000000000000 000000000000000f 0000000000000000 
GPR12: 00003fffb9b7eeb0 00003fffb9fc8890 00003ffffd4e7658 00003fffb395c548 
GPR16: 00003ffffd4e7670 ffffffffffffffff 00003fffb7902480 ffffffffffffffff 
GPR20: 0000000000000000 00003fffb395c528 000000014b8f7878 0000000000000000 
GPR24: 00003fffb7969a80 000000014b8f7830 00003fffb7a750d0 000000000000000a 
GPR28: 00003fffb7a750dc 000000000000007c 000000014b8f9420 00003fffb395c3c0 
NIP [00003fffb9b25d9c] 0x3fffb9b25d9c
LR [00003fffb9b2811c] 0x3fffb9b2811c
--- interrupt: 500
handlers:
[<c0000000015a6568>] .nvme_irq
[<c0000000015a6568>] .nvme_irq
Disabling IRQ #63

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* Re: [PATCH v2 2/2] powerpc: rectify selection to ARCH_ENABLE_SPLIT_PMD_PTLOCK
From: Daniel Axtens @ 2021-08-20  0:03 UTC (permalink / raw)
  To: Lukas Bulwahn, Paul Mackerras, Michael Ellerman,
	Benjamin Herrenschmidt, Michael Neuling, Anshuman Khandual,
	kvm-ppc, linuxppc-dev
  Cc: Lukas Bulwahn, kernel-janitors, linux-kernel, stable
In-Reply-To: <20210819113954.17515-3-lukas.bulwahn@gmail.com>

Lukas Bulwahn <lukas.bulwahn@gmail.com> writes:

> Commit 66f24fa766e3 ("mm: drop redundant ARCH_ENABLE_SPLIT_PMD_PTLOCK")
> selects the non-existing config ARCH_ENABLE_PMD_SPLIT_PTLOCK in
> ./arch/powerpc/platforms/Kconfig.cputype, but clearly it intends to select
> ARCH_ENABLE_SPLIT_PMD_PTLOCK here (notice the word swapping!), as this
> commit does select that for all other architectures.
>
> Rectify selection to ARCH_ENABLE_SPLIT_PMD_PTLOCK instead.
>

Yikes, yes, 66f24fa766e3 does seem to have got that wrong. It looks like
that went into 5.13.

I think we want to specifically target this for stable so that we don't
lose the perfomance and scalability benefits of split pmd ptlocks:

Cc: stable@vger.kernel.org # v5.13+

(I don't think you need to do another revision for this, I think mpe
could add it when merging.)

I tried to check whether we accidentally broke SPLIT_PMD_PTLOCKs while
they were disabled:

 - There hasn't been any change to the pgtable_pmd_page_ctor or _dtor
   prototypes, and we haven't made any relevant changes to any of the
   files in arch/powerpc that called it.

 - I checked out v5.13 and powerpc/merge, applied this patch, built a
   pseries_le_defconfig and boot tested it in qemu. It didn't crash on
   boot or with /bin/sh and some shell commands, but I didn't exactly
   stress test the VM subsystem either.

This gives me some confidence it's both good for powerpc and stable-worthy.

Overall:
Reviewed-by: Daniel Axtens <dja@axtens.net>

Kind regards,
Daniel

> Fixes: 66f24fa766e3 ("mm: drop redundant ARCH_ENABLE_SPLIT_PMD_PTLOCK")
> Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
> ---
>  arch/powerpc/platforms/Kconfig.cputype | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index 6794145603de..a208997ade88 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -98,7 +98,7 @@ config PPC_BOOK3S_64
>  	select PPC_HAVE_PMU_SUPPORT
>  	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
>  	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
> -	select ARCH_ENABLE_PMD_SPLIT_PTLOCK
> +	select ARCH_ENABLE_SPLIT_PMD_PTLOCK
>  	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
>  	select ARCH_SUPPORTS_HUGETLBFS
>  	select ARCH_SUPPORTS_NUMA_BALANCING
> -- 
> 2.26.2

^ permalink raw reply

* Re: [PATCH 1/5] KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM guest
From: Sean Christopherson @ 2021-08-19 23:48 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: KVM list, Peter Zijlstra, linux-kernel, Will Deacon, Guo Ren,
	linux-kselftest, Ben Gardon, shuah, Paul Mackerras, linux-s390,
	gor, Russell King, ARM Linux, linux-csky, Christian Borntraeger,
	Ingo Molnar, Catalin Marinas, linux-mips, Boqun Feng, paulmck,
	Heiko Carstens, rostedt, Shakeel Butt, Andy Lutomirski,
	Thomas Gleixner, Peter Foley, linux-arm-kernel,
	Thomas Bogendoerfer, Oleg Nesterov, Paolo Bonzini, linuxppc-dev
In-Reply-To: <1673583543.19718.1629409152244.JavaMail.zimbra@efficios.com>

On Thu, Aug 19, 2021, Mathieu Desnoyers wrote:
> ----- On Aug 17, 2021, at 8:12 PM, Sean Christopherson seanjc@google.com wrote:
> > @@ -250,7 +250,7 @@ static int rseq_ip_fixup(struct pt_regs *regs)
> > 	 * If not nested over a rseq critical section, restart is useless.
> > 	 * Clear the rseq_cs pointer and return.
> > 	 */
> > -	if (!in_rseq_cs(ip, &rseq_cs))
> > +	if (!regs || !in_rseq_cs(ip, &rseq_cs))
> 
> I think clearing the thread's rseq_cs unconditionally here when regs is NULL
> is not the behavior we want when this is called from xfer_to_guest_mode_work.
> 
> If we have a scenario where userspace ends up calling this ioctl(KVM_RUN)
> from within a rseq c.s., we really want a CONFIG_DEBUG_RSEQ=y kernel to
> kill this application in the rseq_syscall handler when exiting back to usermode
> when the ioctl eventually returns.
> 
> However, clearing the thread's rseq_cs will prevent this from happening.
> 
> So I would favor an approach where we simply do:
> 
> if (!regs)
>      return 0;
> 
> Immediately at the beginning of rseq_ip_fixup, before getting the instruction
> pointer, so effectively skip all side-effects of the ip fixup code. Indeed, it
> is not relevant to do any fixup here, because it is nested in a ioctl system
> call.
> 
> Effectively, this would preserve the SIGSEGV behavior when this ioctl is
> erroneously called by user-space from a rseq critical section.

Ha, that's effectively what I implemented first, but I changed it because of the
comment in clear_rseq_cs() that says:

  The rseq_cs field is set to NULL on preemption or signal delivery ... as well
  as well as on top of code outside of the rseq assembly block.

Which makes it sound like something might rely on clearing rseq_cs?

Ah, or is it the case that rseq_cs is non-NULL if and only if userspace is in an
rseq critical section, and because syscalls in critical sections are illegal, by
definition clearing rseq_cs is a nop unless userspace is misbehaving.

If that's true, what about explicitly checking that at NOTIFY_RESUME?  Or is it
not worth the extra code to detect an error that will likely be caught anyways?

diff --git a/kernel/rseq.c b/kernel/rseq.c
index 35f7bd0fced0..28b8342290b0 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -282,6 +282,13 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)

        if (unlikely(t->flags & PF_EXITING))
                return;
+       if (!regs) {
+#ifdef CONFIG_DEBUG_RSEQ
+               if (t->rseq && rseq_get_rseq_cs(t, &rseq_cs))
+                       goto error;
+#endif
+               return;
+       }
        ret = rseq_ip_fixup(regs);
        if (unlikely(ret < 0))
                goto error;

> Thanks for looking into this !
> 
> Mathieu
> 
> > 		return clear_rseq_cs(t);
> > 	ret = rseq_need_restart(t, rseq_cs.flags);
> > 	if (ret <= 0)
> > --
> > 2.33.0.rc1.237.g0d66db33f3-goog
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com

^ permalink raw reply related

* Re: [PATCH 4/5] KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs
From: Sean Christopherson @ 2021-08-19 23:33 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: KVM list, Peter Zijlstra, linux-kernel, Will Deacon, Guo Ren,
	linux-kselftest, Ben Gardon, shuah, Paul Mackerras, linux-s390,
	gor, Russell King, ARM Linux, linux-csky, Christian Borntraeger,
	Ingo Molnar, Catalin Marinas, linux-mips, Boqun Feng, paulmck,
	Heiko Carstens, rostedt, Shakeel Butt, Andy Lutomirski,
	Thomas Gleixner, Peter Foley, linux-arm-kernel,
	Thomas Bogendoerfer, Oleg Nesterov, Paolo Bonzini, linuxppc-dev
In-Reply-To: <1540548616.19739.1629409956315.JavaMail.zimbra@efficios.com>

On Thu, Aug 19, 2021, Mathieu Desnoyers wrote:
> ----- On Aug 17, 2021, at 8:12 PM, Sean Christopherson seanjc@google.com wrote:
> 
> > Add a test to verify an rseq's CPU ID is updated correctly if the task is
> > migrated while the kernel is handling KVM_RUN.  This is a regression test
> > for a bug introduced by commit 72c3c0fe54a3 ("x86/kvm: Use generic xfer
> > to guest work function"), where TIF_NOTIFY_RESUME would be cleared by KVM
> > without updating rseq, leading to a stale CPU ID and other badness.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> 
> [...]
> 
> > +	while (!done) {
> > +		vcpu_run(vm, VCPU_ID);
> > +		TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
> > +			    "Guest failed?");
> > +
> > +		cpu = sched_getcpu();
> > +		rseq_cpu = READ_ONCE(__rseq.cpu_id);
> > +
> > +		/*
> > +		 * Verify rseq's CPU matches sched's CPU, and that sched's CPU
> > +		 * is stable.  This doesn't handle the case where the task is
> > +		 * migrated between sched_getcpu() and reading rseq, and again
> > +		 * between reading rseq and sched_getcpu(), but in practice no
> > +		 * false positives have been observed, while on the other hand
> > +		 * blocking migration while this thread reads CPUs messes with
> > +		 * the timing and prevents hitting failures on a buggy kernel.
> > +		 */
> 
> I think you could get a stable cpu id between sched_getcpu and __rseq_abi.cpu_id
> if you add a pthread mutex to protect:
> 
> sched_getcpu and __rseq_abi.cpu_id  reads
> 
> vs
> 
> sched_setaffinity calls within the migration thread.
> 
> Thoughts ?

I tried that and couldn't reproduce the bug.  That's what I attempted to call out
in the blurb "blocking migration while this thread reads CPUs ... prevents hitting
failures on a buggy kernel".

I considered adding arbitrary delays around the mutex to try and hit the bug, but
I was worried that even if I got it "working" for this bug, the test would be too
tailored to this bug and potentially miss future regression.  Letting the two
threads run wild seemed like it would provide the best coverage, at the cost of
potentially causing to false failures.

^ permalink raw reply

* Re: [PATCH v2 1/2] powerpc: kvm: remove obsolete and unneeded select
From: Daniel Axtens @ 2021-08-19 22:39 UTC (permalink / raw)
  To: Lukas Bulwahn, Paul Mackerras, Michael Ellerman,
	Benjamin Herrenschmidt, Michael Neuling, Anshuman Khandual,
	kvm-ppc, linuxppc-dev
  Cc: Lukas Bulwahn, kernel-janitors, linux-kernel, stable
In-Reply-To: <20210819113954.17515-2-lukas.bulwahn@gmail.com>

Hi Lukas,

> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index e45644657d49..ff581d70f20c 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -38,7 +38,6 @@ config KVM_BOOK3S_32_HANDLER
>  config KVM_BOOK3S_64_HANDLER
>  	bool
>  	select KVM_BOOK3S_HANDLER
> -	select PPC_DAWR_FORCE_ENABLE

I looked at some of the history here. It looks like this select was left
over from an earlier version of the patch series that added PPC_DAWR: v2
of the series has a new symbol PPC_DAWR_FORCE_ENABLE but by version 4
that new symbol had disappeared but the select had not.

v2: https://lore.kernel.org/linuxppc-dev/20190513071703.25243-1-mikey@neuling.org/
v5: https://lore.kernel.org/linuxppc-dev/20190604030037.9424-2-mikey@neuling.org/

The rest of the patch reasoning makes sense to me: DAWR support will be
selected anyway by virtue of PPC64->PPC_DAWR so there's no need to try
to select it again anyway.

Reviewed-by: Daniel Axtens <dja@axtens.net>

Kind regards,
Daniel

>  
>  config KVM_BOOK3S_PR_POSSIBLE
>  	bool
> -- 
> 2.26.2

^ permalink raw reply

* Re: [PATCH 4/5] KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs
From: Mathieu Desnoyers @ 2021-08-19 21:52 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: KVM list, Peter Zijlstra, linux-kernel, Will Deacon, Guo Ren,
	linux-kselftest, Ben Gardon, shuah, Paul Mackerras, linux-s390,
	gor, Russell King, ARM Linux, linux-csky, Christian Borntraeger,
	Ingo Molnar, Catalin Marinas, linux-mips, Boqun Feng, paulmck,
	Heiko Carstens, rostedt, Shakeel Butt, Andy Lutomirski,
	Thomas Gleixner, Peter Foley, linux-arm-kernel,
	Thomas Bogendoerfer, Oleg Nesterov, Paolo Bonzini, linuxppc-dev
In-Reply-To: <20210818001210.4073390-5-seanjc@google.com>

----- On Aug 17, 2021, at 8:12 PM, Sean Christopherson seanjc@google.com wrote:

> Add a test to verify an rseq's CPU ID is updated correctly if the task is
> migrated while the kernel is handling KVM_RUN.  This is a regression test
> for a bug introduced by commit 72c3c0fe54a3 ("x86/kvm: Use generic xfer
> to guest work function"), where TIF_NOTIFY_RESUME would be cleared by KVM
> without updating rseq, leading to a stale CPU ID and other badness.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---

[...]

> +
> +static void *migration_worker(void *ign)
> +{
> +	cpu_set_t allowed_mask;
> +	int r, i, nr_cpus, cpu;
> +
> +	CPU_ZERO(&allowed_mask);
> +
> +	nr_cpus = CPU_COUNT(&possible_mask);
> +
> +	for (i = 0; i < 20000; i++) {
> +		cpu = i % nr_cpus;
> +		if (!CPU_ISSET(cpu, &possible_mask))
> +			continue;
> +
> +		CPU_SET(cpu, &allowed_mask);
> +
> +		r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
> +		TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", errno,
> +			    strerror(errno));
> +
> +		CPU_CLR(cpu, &allowed_mask);
> +
> +		usleep(10);
> +	}
> +	done = true;
> +	return NULL;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct kvm_vm *vm;
> +	u32 cpu, rseq_cpu;
> +	int r;
> +
> +	/* Tell stdout not to buffer its content */
> +	setbuf(stdout, NULL);
> +
> +	r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
> +	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
> +		    strerror(errno));
> +
> +	if (CPU_COUNT(&possible_mask) < 2) {
> +		print_skip("Only one CPU, task migration not possible\n");
> +		exit(KSFT_SKIP);
> +	}
> +
> +	sys_rseq(0);
> +
> +	/*
> +	 * Create and run a dummy VM that immediately exits to userspace via
> +	 * GUEST_SYNC, while concurrently migrating the process by setting its
> +	 * CPU affinity.
> +	 */
> +	vm = vm_create_default(VCPU_ID, 0, guest_code);
> +
> +	pthread_create(&migration_thread, NULL, migration_worker, 0);
> +
> +	while (!done) {
> +		vcpu_run(vm, VCPU_ID);
> +		TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
> +			    "Guest failed?");
> +
> +		cpu = sched_getcpu();
> +		rseq_cpu = READ_ONCE(__rseq.cpu_id);
> +
> +		/*
> +		 * Verify rseq's CPU matches sched's CPU, and that sched's CPU
> +		 * is stable.  This doesn't handle the case where the task is
> +		 * migrated between sched_getcpu() and reading rseq, and again
> +		 * between reading rseq and sched_getcpu(), but in practice no
> +		 * false positives have been observed, while on the other hand
> +		 * blocking migration while this thread reads CPUs messes with
> +		 * the timing and prevents hitting failures on a buggy kernel.
> +		 */

I think you could get a stable cpu id between sched_getcpu and __rseq_abi.cpu_id
if you add a pthread mutex to protect:

sched_getcpu and __rseq_abi.cpu_id  reads

vs

sched_setaffinity calls within the migration thread.

Thoughts ?

Thanks,

Mathieu

> +		TEST_ASSERT(rseq_cpu == cpu || cpu != sched_getcpu(),
> +			    "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
> +	}
> +
> +	pthread_join(migration_thread, NULL);
> +
> +	kvm_vm_free(vm);
> +
> +	sys_rseq(RSEQ_FLAG_UNREGISTER);
> +
> +	return 0;
> +}
> --
> 2.33.0.rc1.237.g0d66db33f3-goog

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply

* Re: [PATCH 1/5] KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM guest
From: Mathieu Desnoyers @ 2021-08-19 21:39 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: KVM list, Peter Zijlstra, linux-kernel, Will Deacon, Guo Ren,
	linux-kselftest, Ben Gardon, shuah, Paul Mackerras, linux-s390,
	gor, Russell King, ARM Linux, linux-csky, Christian Borntraeger,
	Ingo Molnar, Catalin Marinas, linux-mips, Boqun Feng, paulmck,
	Heiko Carstens, rostedt, Shakeel Butt, Andy Lutomirski,
	Thomas Gleixner, Peter Foley, linux-arm-kernel,
	Thomas Bogendoerfer, Oleg Nesterov, Paolo Bonzini, linuxppc-dev
In-Reply-To: <20210818001210.4073390-2-seanjc@google.com>

----- On Aug 17, 2021, at 8:12 PM, Sean Christopherson seanjc@google.com wrote:

> Invoke rseq's NOTIFY_RESUME handler when processing the flag prior to
> transferring to a KVM guest, which is roughly equivalent to an exit to
> userspace and processes many of the same pending actions.  While the task
> cannot be in an rseq critical section as the KVM path is reachable only
> via ioctl(KVM_RUN), the side effects that apply to rseq outside of a
> critical section still apply, e.g. the CPU ID needs to be updated if the
> task is migrated.
> 
> Clearing TIF_NOTIFY_RESUME without informing rseq can lead to segfaults
> and other badness in userspace VMMs that use rseq in combination with KVM,
> e.g. due to the CPU ID being stale after task migration.

I agree with the problem assessment, but I would recommend a small change
to this fix.

> 
> Fixes: 72c3c0fe54a3 ("x86/kvm: Use generic xfer to guest work function")
> Reported-by: Peter Foley <pefoley@google.com>
> Bisected-by: Doug Evans <dje@google.com>
> Cc: Shakeel Butt <shakeelb@google.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: stable@vger.kernel.org
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> kernel/entry/kvm.c | 4 +++-
> kernel/rseq.c      | 4 ++--
> 2 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
> index 49972ee99aff..049fd06b4c3d 100644
> --- a/kernel/entry/kvm.c
> +++ b/kernel/entry/kvm.c
> @@ -19,8 +19,10 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu,
> unsigned long ti_work)
> 		if (ti_work & _TIF_NEED_RESCHED)
> 			schedule();
> 
> -		if (ti_work & _TIF_NOTIFY_RESUME)
> +		if (ti_work & _TIF_NOTIFY_RESUME) {
> 			tracehook_notify_resume(NULL);
> +			rseq_handle_notify_resume(NULL, NULL);
> +		}
> 
> 		ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work);
> 		if (ret)
> diff --git a/kernel/rseq.c b/kernel/rseq.c
> index 35f7bd0fced0..58c79a7918cd 100644
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -236,7 +236,7 @@ static bool in_rseq_cs(unsigned long ip, struct rseq_cs
> *rseq_cs)
> 
> static int rseq_ip_fixup(struct pt_regs *regs)
> {
> -	unsigned long ip = instruction_pointer(regs);
> +	unsigned long ip = regs ? instruction_pointer(regs) : 0;
> 	struct task_struct *t = current;
> 	struct rseq_cs rseq_cs;
> 	int ret;
> @@ -250,7 +250,7 @@ static int rseq_ip_fixup(struct pt_regs *regs)
> 	 * If not nested over a rseq critical section, restart is useless.
> 	 * Clear the rseq_cs pointer and return.
> 	 */
> -	if (!in_rseq_cs(ip, &rseq_cs))
> +	if (!regs || !in_rseq_cs(ip, &rseq_cs))

I think clearing the thread's rseq_cs unconditionally here when regs is NULL
is not the behavior we want when this is called from xfer_to_guest_mode_work.

If we have a scenario where userspace ends up calling this ioctl(KVM_RUN)
from within a rseq c.s., we really want a CONFIG_DEBUG_RSEQ=y kernel to
kill this application in the rseq_syscall handler when exiting back to usermode
when the ioctl eventually returns.

However, clearing the thread's rseq_cs will prevent this from happening.

So I would favor an approach where we simply do:

if (!regs)
     return 0;

Immediately at the beginning of rseq_ip_fixup, before getting the instruction
pointer, so effectively skip all side-effects of the ip fixup code. Indeed, it
is not relevant to do any fixup here, because it is nested in a ioctl system
call.

Effectively, this would preserve the SIGSEGV behavior when this ioctl is
erroneously called by user-space from a rseq critical section.

Thanks for looking into this !

Mathieu

> 		return clear_rseq_cs(t);
> 	ret = rseq_need_restart(t, rseq_cs.flags);
> 	if (ret <= 0)
> --
> 2.33.0.rc1.237.g0d66db33f3-goog

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply

* Re: [PATCH 2/5] entry: rseq: Call rseq_handle_notify_resume() in tracehook_notify_resume()
From: Mathieu Desnoyers @ 2021-08-19 21:41 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: KVM list, Peter Zijlstra, linux-kernel, Will Deacon, Guo Ren,
	linux-kselftest, Ben Gardon, shuah, Paul Mackerras, linux-s390,
	gor, Russell King, ARM Linux, linux-csky, Christian Borntraeger,
	Ingo Molnar, Catalin Marinas, linux-mips, Boqun Feng, paulmck,
	Heiko Carstens, rostedt, Shakeel Butt, Andy Lutomirski,
	Thomas Gleixner, Peter Foley, linux-arm-kernel,
	Thomas Bogendoerfer, Oleg Nesterov, Paolo Bonzini, linuxppc-dev
In-Reply-To: <20210818001210.4073390-3-seanjc@google.com>

----- On Aug 17, 2021, at 8:12 PM, Sean Christopherson seanjc@google.com wrote:

> Invoke rseq_handle_notify_resume() from tracehook_notify_resume() now
> that the two function are always called back-to-back by architectures
> that have rseq.  The rseq helper is stubbed out for architectures that
> don't support rseq, i.e. this is a nop across the board.
> 
> Note, tracehook_notify_resume() is horribly named and arguably does not
> belong in tracehook.h as literally every line of code in it has nothing
> to do with tracing.  But, that's been true since commit a42c6ded827d
> ("move key_repace_session_keyring() into tracehook_notify_resume()")
> first usurped tracehook_notify_resume() back in 2012.  Punt cleaning that
> mess up to future patches.
> 
> No functional change intended.

This will make it harder to introduce new code paths which consume the
NOTIFY_RESUME without calling the rseq callback, which introduces issues.
Agreed.

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/arm/kernel/signal.c     | 1 -
> arch/arm64/kernel/signal.c   | 1 -
> arch/csky/kernel/signal.c    | 4 +---
> arch/mips/kernel/signal.c    | 4 +---
> arch/powerpc/kernel/signal.c | 4 +---
> arch/s390/kernel/signal.c    | 1 -
> include/linux/tracehook.h    | 2 ++
> kernel/entry/common.c        | 4 +---
> kernel/entry/kvm.c           | 4 +---
> 9 files changed, 7 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
> index a3a38d0a4c85..9df68d139965 100644
> --- a/arch/arm/kernel/signal.c
> +++ b/arch/arm/kernel/signal.c
> @@ -670,7 +670,6 @@ do_work_pending(struct pt_regs *regs, unsigned int
> thread_flags, int syscall)
> 				uprobe_notify_resume(regs);
> 			} else {
> 				tracehook_notify_resume(regs);
> -				rseq_handle_notify_resume(NULL, regs);
> 			}
> 		}
> 		local_irq_disable();
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index 23036334f4dc..22b55db13da6 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -951,7 +951,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
> 
> 			if (thread_flags & _TIF_NOTIFY_RESUME) {
> 				tracehook_notify_resume(regs);
> -				rseq_handle_notify_resume(NULL, regs);
> 
> 				/*
> 				 * If we reschedule after checking the affinity
> diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
> index 312f046d452d..bc4238b9f709 100644
> --- a/arch/csky/kernel/signal.c
> +++ b/arch/csky/kernel/signal.c
> @@ -260,8 +260,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
> 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> 		do_signal(regs);
> 
> -	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
> +	if (thread_info_flags & _TIF_NOTIFY_RESUME)
> 		tracehook_notify_resume(regs);
> -		rseq_handle_notify_resume(NULL, regs);
> -	}
> }
> diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
> index f1e985109da0..c9b2a75563e1 100644
> --- a/arch/mips/kernel/signal.c
> +++ b/arch/mips/kernel/signal.c
> @@ -906,10 +906,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void
> *unused,
> 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> 		do_signal(regs);
> 
> -	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
> +	if (thread_info_flags & _TIF_NOTIFY_RESUME)
> 		tracehook_notify_resume(regs);
> -		rseq_handle_notify_resume(NULL, regs);
> -	}
> 
> 	user_enter();
> }
> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> index e600764a926c..b93b87df499d 100644
> --- a/arch/powerpc/kernel/signal.c
> +++ b/arch/powerpc/kernel/signal.c
> @@ -293,10 +293,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long
> thread_info_flags)
> 		do_signal(current);
> 	}
> 
> -	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
> +	if (thread_info_flags & _TIF_NOTIFY_RESUME)
> 		tracehook_notify_resume(regs);
> -		rseq_handle_notify_resume(NULL, regs);
> -	}
> }
> 
> static unsigned long get_tm_stackpointer(struct task_struct *tsk)
> diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
> index 78ef53b29958..b307db26bf2d 100644
> --- a/arch/s390/kernel/signal.c
> +++ b/arch/s390/kernel/signal.c
> @@ -537,5 +537,4 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool
> has_signal)
> void do_notify_resume(struct pt_regs *regs)
> {
> 	tracehook_notify_resume(regs);
> -	rseq_handle_notify_resume(NULL, regs);
> }
> diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
> index 3e80c4bc66f7..2564b7434b4d 100644
> --- a/include/linux/tracehook.h
> +++ b/include/linux/tracehook.h
> @@ -197,6 +197,8 @@ static inline void tracehook_notify_resume(struct pt_regs
> *regs)
> 
> 	mem_cgroup_handle_over_high();
> 	blkcg_maybe_throttle_current();
> +
> +	rseq_handle_notify_resume(NULL, regs);
> }
> 
> /*
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> index bf16395b9e13..d5a61d565ad5 100644
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -171,10 +171,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs
> *regs,
> 		if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> 			handle_signal_work(regs, ti_work);
> 
> -		if (ti_work & _TIF_NOTIFY_RESUME) {
> +		if (ti_work & _TIF_NOTIFY_RESUME)
> 			tracehook_notify_resume(regs);
> -			rseq_handle_notify_resume(NULL, regs);
> -		}
> 
> 		/* Architecture specific TIF work */
> 		arch_exit_to_user_mode_work(regs, ti_work);
> diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
> index 049fd06b4c3d..49972ee99aff 100644
> --- a/kernel/entry/kvm.c
> +++ b/kernel/entry/kvm.c
> @@ -19,10 +19,8 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu,
> unsigned long ti_work)
> 		if (ti_work & _TIF_NEED_RESCHED)
> 			schedule();
> 
> -		if (ti_work & _TIF_NOTIFY_RESUME) {
> +		if (ti_work & _TIF_NOTIFY_RESUME)
> 			tracehook_notify_resume(NULL);
> -			rseq_handle_notify_resume(NULL, NULL);
> -		}
> 
> 		ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work);
> 		if (ret)
> --
> 2.33.0.rc1.237.g0d66db33f3-goog

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply

* Re: [PATCH 3/7] powerpc: replace cc-option-yn uses with cc-option
From: Masahiro Yamada @ 2021-08-19 20:04 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Linux Kbuild mailing list, Nick Desaulniers, clang-built-linux,
	Paul Mackerras, linuxppc-dev
In-Reply-To: <87a6lghkdj.fsf@mpe.ellerman.id.au>

On Tue, Aug 17, 2021 at 11:31 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Nick Desaulniers <ndesaulniers@google.com> writes:
> > cc-option-yn can be replaced with cc-option. ie.
> > Checking for support:
> > ifeq ($(call cc-option-yn,$(FLAG)),y)
> > becomes:
> > ifneq ($(call cc-option,$(FLAG)),)
> >
> > Checking for lack of support:
> > ifeq ($(call cc-option-yn,$(FLAG)),n)
> > becomes:
> > ifeq ($(call cc-option,$(FLAG)),)
> >
> > This allows us to pursue removing cc-option-yn.
> >
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > Cc: Paul Mackerras <paulus@samba.org>
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
> > ---
> >  arch/powerpc/Makefile      | 12 ++++++------
> >  arch/powerpc/boot/Makefile |  5 +----
> >  2 files changed, 7 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> > index 9aaf1abbc641..85e224536cf7 100644
> > --- a/arch/powerpc/Makefile
> > +++ b/arch/powerpc/Makefile
> > @@ -12,12 +12,12 @@
> >  # Rewritten by Cort Dougan and Paul Mackerras
> >  #
> >
> > -HAS_BIARCH   := $(call cc-option-yn, -m32)
> > +HAS_BIARCH   := $(call cc-option,-m32)
> >
> >  # Set default 32 bits cross compilers for vdso and boot wrapper
> >  CROSS32_COMPILE ?=
> >
> > -ifeq ($(HAS_BIARCH),y)
> > +ifeq ($(HAS_BIARCH),-m32)
>
> I don't love that we have to repeat "-m32" in each check.
>
> I'm pretty sure you can use ifdef here, because HAS_BIARCH is a simple
> variable (assigned with ":=").
>
> ie, this can be:
>
>   ifdef HAS_BIARCH
>
>
> And that avoids having to spell out "-m32" everywhere.
>
> cheers


Yes.

Comments from Nathan and Michael
both sound good.

-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* Re: [PATCH v2 03/12] x86/sev: Add an x86 version of prot_guest_has()
From: Kuppuswamy, Sathyanarayanan @ 2021-08-19 19:57 UTC (permalink / raw)
  To: Tom Lendacky, Christoph Hellwig
  Cc: linux-efi, Brijesh Singh, kvm, Peter Zijlstra, Dave Hansen,
	dri-devel, platform-driver-x86, linux-s390, Andi Kleen,
	Joerg Roedel, x86, amd-gfx, Ingo Molnar,
	linux-graphics-maintainer, Joerg Roedel, Tianyu Lan,
	Borislav Petkov, Andy Lutomirski, Thomas Gleixner, kexec,
	linux-kernel, iommu, linux-fsdevel, linuxppc-dev
In-Reply-To: <4272eaf5-b654-2669-62ac-ba768acd6b91@amd.com>



On 8/19/21 11:33 AM, Tom Lendacky wrote:
> There was some talk about this on the mailing list where TDX and SEV may
> need to be differentiated, so we wanted to reserve a range of values per
> technology. I guess I can remove them until they are actually needed.

In TDX also we have similar requirements and we need some flags for
TDX specific checks. So I think it is fine to leave some space for vendor
flags.

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply

* Re: [PATCH 5/6] audit: Declare ppc32_classify_syscall()
From: Christophe Leroy @ 2021-08-19 18:36 UTC (permalink / raw)
  To: Cédric Le Goater, linuxppc-dev; +Cc: Christophe Leroy
In-Reply-To: <d268f141-4ec3-eb1d-a6c1-4cd5f535ea49@csgroup.eu>



Le 19/08/2021 à 16:56, Christophe Leroy a écrit :
> 
> 
> Le 19/08/2021 à 14:56, Cédric Le Goater a écrit :
>> This fixes a compile error with W=1.
>>
>> Cc: Christophe Leroy <christophe.leroy@c-s.fr>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>
>>   I don't think this is correct. Which file could we use ?
> 
> I think you can completely remove ppc32_classify_syscall(), and instead add the following in the 
> default case in audit_classify_syscall():
> 
>       default:
> +        if (IS_ENABLED(CONFIG_PPC64) && abi == AUDIT_ARCH_PPC)
> +            return 1;
>           return 0;
> 

After looking more in details, in fact I think we should convert powerpc to 
CONFIG_AUDIT_ARCH_COMPAT_GENERIC

> 
>>
>>   arch/powerpc/include/asm/unistd.h | 3 +++
>>   arch/powerpc/kernel/audit.c       | 1 -
>>   2 files changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
>> index b541c690a31c..d9025a7e973c 100644
>> --- a/arch/powerpc/include/asm/unistd.h
>> +++ b/arch/powerpc/include/asm/unistd.h
>> @@ -47,6 +47,9 @@
>>   #define __ARCH_WANT_SYS_UTIME
>>   #define __ARCH_WANT_SYS_NEWFSTATAT
>>   #define __ARCH_WANT_COMPAT_SYS_SENDFILE
>> +#ifdef CONFIG_AUDIT
>> +extern int ppc32_classify_syscall(unsigned int syscall);
>> +#endif
>>   #endif
>>   #define __ARCH_WANT_SYS_FORK
>>   #define __ARCH_WANT_SYS_VFORK
>> diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
>> index a2dddd7f3d09..c3c6c6a1069b 100644
>> --- a/arch/powerpc/kernel/audit.c
>> +++ b/arch/powerpc/kernel/audit.c
>> @@ -41,7 +41,6 @@ int audit_classify_arch(int arch)
>>   int audit_classify_syscall(int abi, unsigned syscall)
>>   {
>>   #ifdef CONFIG_PPC64
>> -    extern int ppc32_classify_syscall(unsigned);
>>       if (abi == AUDIT_ARCH_PPC)
>>           return ppc32_classify_syscall(syscall);
>>   #endif
>>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox