LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] powerpc/32: Add support for out-of-line static calls
From: Christophe Leroy @ 2021-08-31 13:12 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: Peter Zijlstra, linux-kernel, Steven Rostedt, Jason Baron,
	Josh Poimboeuf, linuxppc-dev, Ard Biesheuvel

Add support for out-of-line static calls on PPC32. This change
improve performance of calls to global function pointers by
using direct calls instead of indirect calls.

The trampoline is initialy populated with a 'blr' or branch to target,
followed by an unreachable long jump sequence.

In order to cater with parallele execution, the trampoline needs to
be updated in a way that ensures it remains consistent at all time.
This means we can't use the traditional lis/addi to load r12 with
the target address, otherwise there would be a window during which
the first instruction contains the upper part of the new target
address while the second instruction still contains the lower part of
the old target address. To avoid that the target address is stored
just after the 'bctr' and loaded from there with a single instruction.

Then, depending on the target distance, arch_static_call_transform()
will either replace the first instruction by a direct 'bl <target>' or
'nop' in order to have the trampoline fall through the long jump
sequence.

Performancewise the long jump sequence is probably not better than
the indirect calls set by GCC when we don't use static calls, but
such calls are unlikely to be required on powerpc32: With most
configurations the kernel size is far below 32 Mbytes so only
modules may happen to be too far. And even modules are likely to
be close enough as they are allocated below the kernel core and
as close as possible of the kernel text.

static_call selftest is running successfully with this change.

With this patch, __do_irq() has the following sequence to trace
irq entries:

	c0004a00 <__SCT__tp_func_irq_entry>:
	c0004a00:	48 00 00 e0 	b       c0004ae0 <__traceiter_irq_entry>
	c0004a04:	3d 80 c0 00 	lis     r12,-16384
	c0004a08:	81 8c 4a 14 	lwz     r12,18964(r12)
	c0004a0c:	7d 89 03 a6 	mtctr   r12
	c0004a10:	4e 80 04 20 	bctr
	c0004a14:	00 00 00 00 	.long 0x0
	c0004a18:	60 00 00 00 	nop
	c0004a1c:	60 00 00 00 	nop
...
	c0005654 <__do_irq>:
...
	c0005664:	7c 7f 1b 78 	mr      r31,r3
...
	c00056a0:	81 22 00 00 	lwz     r9,0(r2)
	c00056a4:	39 29 00 01 	addi    r9,r9,1
	c00056a8:	91 22 00 00 	stw     r9,0(r2)
	c00056ac:	3d 20 c0 af 	lis     r9,-16209
	c00056b0:	81 29 74 cc 	lwz     r9,29900(r9)
	c00056b4:	2c 09 00 00 	cmpwi   r9,0
	c00056b8:	41 82 00 10 	beq     c00056c8 <__do_irq+0x74>
	c00056bc:	80 69 00 04 	lwz     r3,4(r9)
	c00056c0:	7f e4 fb 78 	mr      r4,r31
	c00056c4:	4b ff f3 3d 	bl      c0004a00 <__SCT__tp_func_irq_entry>

Before this patch, __do_irq() was doing the following to trace irq
entries:

	c0005700 <__do_irq>:
...
	c0005710:	7c 7e 1b 78 	mr      r30,r3
...
	c000574c:	93 e1 00 0c 	stw     r31,12(r1)
	c0005750:	81 22 00 00 	lwz     r9,0(r2)
	c0005754:	39 29 00 01 	addi    r9,r9,1
	c0005758:	91 22 00 00 	stw     r9,0(r2)
	c000575c:	3d 20 c0 af 	lis     r9,-16209
	c0005760:	83 e9 f4 cc 	lwz     r31,-2868(r9)
	c0005764:	2c 1f 00 00 	cmpwi   r31,0
	c0005768:	41 82 00 24 	beq     c000578c <__do_irq+0x8c>
	c000576c:	81 3f 00 00 	lwz     r9,0(r31)
	c0005770:	80 7f 00 04 	lwz     r3,4(r31)
	c0005774:	7d 29 03 a6 	mtctr   r9
	c0005778:	7f c4 f3 78 	mr      r4,r30
	c000577c:	4e 80 04 21 	bctrl
	c0005780:	85 3f 00 0c 	lwzu    r9,12(r31)
	c0005784:	2c 09 00 00 	cmpwi   r9,0
	c0005788:	40 82 ff e4 	bne     c000576c <__do_irq+0x6c>

Behind the fact of now using a direct 'bl' instead of a
'load/mtctr/bctr' sequence, we can also see that we get one less
register on the stack.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Use indirect load in long jump sequence to cater with parallele execution and preemption.
---
 arch/powerpc/Kconfig                   |  1 +
 arch/powerpc/include/asm/static_call.h | 25 ++++++++++++++++++
 arch/powerpc/kernel/Makefile           |  2 +-
 arch/powerpc/kernel/static_call.c      | 36 ++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/static_call.h
 create mode 100644 arch/powerpc/kernel/static_call.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 36b72d972568..a0fe69d8ec83 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -247,6 +247,7 @@ config PPC
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
 	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
+	select HAVE_STATIC_CALL			if PPC32
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HUGETLB_PAGE_SIZE_VARIABLE	if PPC_BOOK3S_64 && HUGETLB_PAGE
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
new file mode 100644
index 000000000000..2402c6d32439
--- /dev/null
+++ b/arch/powerpc/include/asm/static_call.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_STATIC_CALL_H
+#define _ASM_POWERPC_STATIC_CALL_H
+
+#define __POWERPC_SCT(name, inst)					\
+	asm(".pushsection .text, \"ax\"				\n"	\
+	    ".align 5						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    inst "						\n"	\
+	    "	lis	12,1f@ha				\n"	\
+	    "	lwz	12,1f@l(12)				\n"	\
+	    "	mtctr	12					\n"	\
+	    "	bctr						\n"	\
+	    "1:	.long 0						\n"	\
+	    "	nop						\n"	\
+	    "	nop						\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)	__POWERPC_SCT(name, "b " #func)
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)	__POWERPC_SCT(name, "blr")
+
+#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7be36c1e1db6..0e3640e14eb1 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,7 +106,7 @@ extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)	+= reloc_$(BITS).o
 
-obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o
+obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o static_call.o
 obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
new file mode 100644
index 000000000000..e5e78205ccb4
--- /dev/null
+++ b/arch/powerpc/kernel/static_call.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memory.h>
+#include <linux/static_call.h>
+
+#include <asm/code-patching.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+	int err;
+	unsigned long target = (long)func;
+	bool is_short = is_offset_in_branch_range((long)target - (long)tramp);
+
+	if (!tramp)
+		return;
+
+	mutex_lock(&text_mutex);
+
+	if (func && !is_short) {
+		err = patch_instruction(tramp + 20, ppc_inst(target));
+		if (err)
+			goto out;
+	}
+
+	if (!func)
+		err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
+	else if (is_short)
+		err = patch_branch(tramp, target, 0);
+	else
+		err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
+out:
+	mutex_unlock(&text_mutex);
+
+	if (err)
+		panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH] powerpc/64: Avoid link stack corruption in kexec_wait()
From: Daniel Axtens @ 2021-08-31 12:43 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <9f43118b-ef81-9a80-0e0b-5e74433e0b8c@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> Le 31/08/2021 à 08:17, Daniel Axtens a écrit :
>> Hi Christophe,
>> 
>>> Use bcl 20,31,+4 instead of bl in order to preserve link stack.
>>>
>>> See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption
>>> in __get_datapage()") for details.
>> 
>>  From my understanding of that commit message, the change helps to keep
>> the link stack correctly balanced which is helpful for performance,
>> rather than for correctness. If I understand correctly, kexec_wait is
>> not in a hot path - rather it is where CPUs spin while waiting for
>> kexec. Is there any benefit in using the more complicated opcode in this
>> situation?
>
> AFAICS the main benefit is to keep things consistent over the kernel and not have to wonder "is it a 
> hot path or not ? If it is I use bcl 20,31, if it is not I use bl". The best way to keep things in 
> order is to always use the right instruction.

Yeah, Nick Piggin convinced me of this offline as well.

>
>> 
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>>   arch/powerpc/kernel/misc_64.S | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
>>> index 4b761a18a74d..613509907166 100644
>>> --- a/arch/powerpc/kernel/misc_64.S
>>> +++ b/arch/powerpc/kernel/misc_64.S
>>> @@ -255,7 +255,7 @@ _GLOBAL(scom970_write)
>>>    * Physical (hardware) cpu id should be in r3.
>>>    */
>>>   _GLOBAL(kexec_wait)
>>> -	bl	1f
>>> +	bcl	20,31,1f
>>>   1:	mflr	r5
>> 
>> Would it be better to create a macro of some sort to wrap this unusual
>> special form so that the meaning is more clear?
>
> Not sure, I think people working with assembly will easily recognise that form whereas an obscure 
> macro is always puzzling.
>
> I like macros when they allow you to not repeat again and again the same sequence of several 
> instructions, but here it is a single quite simple instruction which is not worth a macro in my mind.
>


Sure - I was mostly thinking specifically of the bcl; mflr situation but
I agree that for the single instruction it's not needed.

In short, I am convinced, and so:
Reviewed-by: Daniel Axtens <dja@axtens.net>

Kind regards,
Daniel

> Christophe

^ permalink raw reply

* Re: [PATCH] powerpc/32: Add support for out-of-line static calls
From: Ard Biesheuvel @ 2021-08-31 10:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Josh Poimboeuf, linux-kernel, Jason Baron, Paul Mackerras,
	Steven Rostedt, linuxppc-dev
In-Reply-To: <YS3t/s9nojyCn9ev@hirez.programming.kicks-ass.net>

On Tue, 31 Aug 2021 at 10:53, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Aug 31, 2021 at 08:05:21AM +0000, Christophe Leroy wrote:
>
> > +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                     \
> > +     asm(".pushsection .text, \"ax\"                         \n"     \
> > +         ".align 4                                           \n"     \
> > +         ".globl " STATIC_CALL_TRAMP_STR(name) "             \n"     \
> > +         STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
> > +         "   blr                                             \n"     \
> > +         "   nop                                             \n"     \
> > +         "   nop                                             \n"     \
> > +         "   nop                                             \n"     \
> > +         ".type " STATIC_CALL_TRAMP_STR(name) ", @function   \n"     \
> > +         ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> > +         ".popsection                                        \n")
>
> > +static int patch_trampoline_32(u32 *addr, unsigned long target)
> > +{
> > +     int err;
> > +
> > +     err = patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(target))));
> > +     err |= patch_instruction(addr++, ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(target))));
> > +     err |= patch_instruction(addr++, ppc_inst(PPC_RAW_MTCTR(_R12)));
> > +     err |= patch_instruction(addr, ppc_inst(PPC_RAW_BCTR()));
> > +
> > +     return err;
> > +}
>
> There can be concurrent execution and modification; the above doesn't
> look safe in that regard. What happens if you've say, done the first
> two, but not the latter two and execution happens (on a different
> CPU or through IRQ context, etc..)?
>
> > +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
> > +{
> > +     int err;
> > +     unsigned long target = (long)func;
> > +
> > +     if (!tramp)
> > +             return;
> > +
> > +     mutex_lock(&text_mutex);
> > +
> > +     if (!func)
> > +             err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
> > +     else if (is_offset_in_branch_range((long)target - (long)tramp))
> > +             err = patch_branch(tramp, target, 0);
>
> These two are single instruction modifications and I'm assuming the
> hardware is sane enough that execution sees either the old or the new
> instruction. So this should work.
>
> > +     else if (IS_ENABLED(CONFIG_PPC32))
> > +             err = patch_trampoline_32(tramp, target);
> > +     else
> > +             BUILD_BUG();
> > +
> > +     mutex_unlock(&text_mutex);
> > +
> > +     if (err)
> > +             panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
> > +}
> > +EXPORT_SYMBOL_GPL(arch_static_call_transform);
>
> One possible solution that we explored on ARM64, was having the
> trampoline be in 2 slots:
>
>
>         b 1f
>
> 1:      blr
>         nop
>         nop
>         nop
>
> 2:      blr
>         nop
>         nop
>         nop
>
> Where initially the first slot is active (per "b 1f"), then you write
> the second slot, and as a final act, re-write the initial branch to
> point to slot 2.
>
> Then you execute synchronize_rcu_tasks() under your text mutex
> (careful!) to ensure all users of your slot1 are gone and the next
> modification repeats the whole thing, except for using slot1 etc..
>
> Eventually I think Ard came up with the latest ARM64 proposal which puts
> a literal in a RO section (could be the text section I suppose) and
> loads and branches to that.
>

Yes. The main reason is simply that anything else is premature
optimization: we have a clear use case (CFI) where out-of-line static
calls are faster than compiler generated indirect calls, even if the
static call sequence is based on a literal load and an indirect
branch, but CFI is not upstream [yet].

Once other use cases emerge, we will revisit this.



> Anyway, the thing is, you can really only modify a single instruction at
> the time and need to ensure concurrent execution is correct.

^ permalink raw reply

* Re: [PATCH v3] lockdown,selinux: fix wrong subject in some SELinux lockdown checks
From: Ondrej Mosnacek @ 2021-08-31  9:09 UTC (permalink / raw)
  To: Dan Williams
  Cc: linux-efi, Linux PCI, linux-cxl, Steffen Klassert, Herbert Xu,
	X86 ML, James Morris, Linux ACPI, Ingo Molnar, linux-serial,
	Linux-pm mailing list, SElinux list, Steven Rostedt,
	Casey Schaufler, Paul Moore, Netdev, Stephen Smalley,
	Kexec Mailing List, Linux Kernel Mailing List,
	Linux Security Module list, linux-fsdevel, bpf, linuxppc-dev,
	David S . Miller
In-Reply-To: <CAPcyv4jvR8CT4rYODR5KUHNdiqMwQSwJZ+OkVf61kLT3JfjC_Q@mail.gmail.com>

On Sat, Jun 19, 2021 at 12:18 AM Dan Williams <dan.j.williams@intel.com> wrote:
> On Wed, Jun 16, 2021 at 1:51 AM Ondrej Mosnacek <omosnace@redhat.com> wrote:
> >
> > Commit 59438b46471a ("security,lockdown,selinux: implement SELinux
> > lockdown") added an implementation of the locked_down LSM hook to
> > SELinux, with the aim to restrict which domains are allowed to perform
> > operations that would breach lockdown.
> >
> > However, in several places the security_locked_down() hook is called in
> > situations where the current task isn't doing any action that would
> > directly breach lockdown, leading to SELinux checks that are basically
> > bogus.
> >
> > To fix this, add an explicit struct cred pointer argument to
> > security_lockdown() and define NULL as a special value to pass instead
> > of current_cred() in such situations. LSMs that take the subject
> > credentials into account can then fall back to some default or ignore
> > such calls altogether. In the SELinux lockdown hook implementation, use
> > SECINITSID_KERNEL in case the cred argument is NULL.
> >
> > Most of the callers are updated to pass current_cred() as the cred
> > pointer, thus maintaining the same behavior. The following callers are
> > modified to pass NULL as the cred pointer instead:
> > 1. arch/powerpc/xmon/xmon.c
> >      Seems to be some interactive debugging facility. It appears that
> >      the lockdown hook is called from interrupt context here, so it
> >      should be more appropriate to request a global lockdown decision.
> > 2. fs/tracefs/inode.c:tracefs_create_file()
> >      Here the call is used to prevent creating new tracefs entries when
> >      the kernel is locked down. Assumes that locking down is one-way -
> >      i.e. if the hook returns non-zero once, it will never return zero
> >      again, thus no point in creating these files. Also, the hook is
> >      often called by a module's init function when it is loaded by
> >      userspace, where it doesn't make much sense to do a check against
> >      the current task's creds, since the task itself doesn't actually
> >      use the tracing functionality (i.e. doesn't breach lockdown), just
> >      indirectly makes some new tracepoints available to whoever is
> >      authorized to use them.
> > 3. net/xfrm/xfrm_user.c:copy_to_user_*()
> >      Here a cryptographic secret is redacted based on the value returned
> >      from the hook. There are two possible actions that may lead here:
> >      a) A netlink message XFRM_MSG_GETSA with NLM_F_DUMP set - here the
> >         task context is relevant, since the dumped data is sent back to
> >         the current task.
> >      b) When adding/deleting/updating an SA via XFRM_MSG_xxxSA, the
> >         dumped SA is broadcasted to tasks subscribed to XFRM events -
> >         here the current task context is not relevant as it doesn't
> >         represent the tasks that could potentially see the secret.
> >      It doesn't seem worth it to try to keep using the current task's
> >      context in the a) case, since the eventual data leak can be
> >      circumvented anyway via b), plus there is no way for the task to
> >      indicate that it doesn't care about the actual key value, so the
> >      check could generate a lot of "false alert" denials with SELinux.
> >      Thus, let's pass NULL instead of current_cred() here faute de
> >      mieux.
> >
> > Improvements-suggested-by: Casey Schaufler <casey@schaufler-ca.com>
> > Improvements-suggested-by: Paul Moore <paul@paul-moore.com>
> > Fixes: 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown")
> > Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
> [..]
> > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > index 2acc6173da36..c1747b6555c7 100644
> > --- a/drivers/cxl/mem.c
> > +++ b/drivers/cxl/mem.c
> > @@ -568,7 +568,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
> >         if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
> >                 return false;
> >
> > -       if (security_locked_down(LOCKDOWN_NONE))
> > +       if (security_locked_down(current_cred(), LOCKDOWN_NONE))
>
> Acked-by: Dan Williams <dan.j.williams@intel.com>
>
> ...however that usage looks wrong. The expectation is that if kernel
> integrity protections are enabled then raw command access should be
> disabled. So I think that should be equivalent to LOCKDOWN_PCI_ACCESS
> in terms of the command capabilities to filter.

Yes, the LOCKDOWN_NONE seems wrong here... but it's a pre-existing bug
and I didn't want to go down yet another rabbit hole trying to fix it.
I'll look at this again once this patch is settled - it may indeed be
as simple as replacing LOCKDOWN_NONE with LOCKDOWN_PCI_ACCESS.

--
Ondrej Mosnacek
Software Engineer, Linux Security - SELinux kernel
Red Hat, Inc.


^ permalink raw reply

* Re: [PATCH v3] lockdown,selinux: fix wrong subject in some SELinux lockdown checks
From: Ondrej Mosnacek @ 2021-08-31  9:08 UTC (permalink / raw)
  To: Paul Moore
  Cc: linux-efi, linux-pci, linux-cxl, Steffen Klassert, Herbert Xu,
	x86, James Morris, linux-acpi, Ingo Molnar, linux-serial,
	linux-pm, SElinux list, Steven Rostedt, Casey Schaufler,
	network dev, Stephen Smalley, kexec, Linux kernel mailing list,
	Linux Security Module list, Linux FS Devel, bpf, linuxppc-dev,
	David S . Miller
In-Reply-To: <CAHC9VhSr2KpeBXuyoHR3_hs+qczFUaBx0oCSMfBBA5UNYU+0KA@mail.gmail.com>

On Fri, Jun 18, 2021 at 5:40 AM Paul Moore <paul@paul-moore.com> wrote:
> On Wed, Jun 16, 2021 at 4:51 AM Ondrej Mosnacek <omosnace@redhat.com> wrote:
> >
> > Commit 59438b46471a ("security,lockdown,selinux: implement SELinux
> > lockdown") added an implementation of the locked_down LSM hook to
> > SELinux, with the aim to restrict which domains are allowed to perform
> > operations that would breach lockdown.
> >
> > However, in several places the security_locked_down() hook is called in
> > situations where the current task isn't doing any action that would
> > directly breach lockdown, leading to SELinux checks that are basically
> > bogus.
> >
> > To fix this, add an explicit struct cred pointer argument to
> > security_lockdown() and define NULL as a special value to pass instead
> > of current_cred() in such situations. LSMs that take the subject
> > credentials into account can then fall back to some default or ignore
> > such calls altogether. In the SELinux lockdown hook implementation, use
> > SECINITSID_KERNEL in case the cred argument is NULL.
> >
> > Most of the callers are updated to pass current_cred() as the cred
> > pointer, thus maintaining the same behavior. The following callers are
> > modified to pass NULL as the cred pointer instead:
> > 1. arch/powerpc/xmon/xmon.c
> >      Seems to be some interactive debugging facility. It appears that
> >      the lockdown hook is called from interrupt context here, so it
> >      should be more appropriate to request a global lockdown decision.
> > 2. fs/tracefs/inode.c:tracefs_create_file()
> >      Here the call is used to prevent creating new tracefs entries when
> >      the kernel is locked down. Assumes that locking down is one-way -
> >      i.e. if the hook returns non-zero once, it will never return zero
> >      again, thus no point in creating these files. Also, the hook is
> >      often called by a module's init function when it is loaded by
> >      userspace, where it doesn't make much sense to do a check against
> >      the current task's creds, since the task itself doesn't actually
> >      use the tracing functionality (i.e. doesn't breach lockdown), just
> >      indirectly makes some new tracepoints available to whoever is
> >      authorized to use them.
> > 3. net/xfrm/xfrm_user.c:copy_to_user_*()
> >      Here a cryptographic secret is redacted based on the value returned
> >      from the hook. There are two possible actions that may lead here:
> >      a) A netlink message XFRM_MSG_GETSA with NLM_F_DUMP set - here the
> >         task context is relevant, since the dumped data is sent back to
> >         the current task.
> >      b) When adding/deleting/updating an SA via XFRM_MSG_xxxSA, the
> >         dumped SA is broadcasted to tasks subscribed to XFRM events -
> >         here the current task context is not relevant as it doesn't
> >         represent the tasks that could potentially see the secret.
> >      It doesn't seem worth it to try to keep using the current task's
> >      context in the a) case, since the eventual data leak can be
> >      circumvented anyway via b), plus there is no way for the task to
> >      indicate that it doesn't care about the actual key value, so the
> >      check could generate a lot of "false alert" denials with SELinux.
> >      Thus, let's pass NULL instead of current_cred() here faute de
> >      mieux.
> >
> > Improvements-suggested-by: Casey Schaufler <casey@schaufler-ca.com>
> > Improvements-suggested-by: Paul Moore <paul@paul-moore.com>
> > Fixes: 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown")
> > Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
>
> This seems reasonable to me, but before I merge it into the SELinux
> tree I think it would be good to get some ACKs from the relevant
> subsystem folks.  I don't believe we ever saw a response to the last
> question for the PPC folks, did we?

Can we move this forward somehow, please?

Quoting the yet-unanswered question from the v2 thread for convenience:

> > > The callers migrated to the new hook, passing NULL as cred:
> > > 1. arch/powerpc/xmon/xmon.c
[...]
> >
> > This definitely sounds like kernel_t based on the description above.
>
> Here I'm a little concerned that the hook might be called from some
> unusual interrupt, which is not masked by spin_lock_irqsave()... We
> ran into this with PMI (Platform Management Interrupt) before, see
> commit 5ae5fbd21079 ("powerpc/perf: Fix handling of privilege level
> checks in perf interrupt context"). While I can't see anything that
> would suggest something like this happening here, the whole thing is
> so foreign to me that I'm wary of making assumptions :)
>
> @Michael/PPC devs, can you confirm to us that xmon_is_locked_down() is
> only called from normal syscall/interrupt context (as opposed to
> something tricky like PMI)?

I strongly suspect the answer will be just "Of course it is, why would
you even ask such a silly question?", but please let's have it on
record so we can finally get this patch merged...


> > ---
> >
> > v3:
> > - add the cred argument to security_locked_down() and adapt all callers
> > - keep using current_cred() in BPF, as the hook calls have been shifted
> >   to program load time (commit ff40e51043af ("bpf, lockdown, audit: Fix
> >   buggy SELinux lockdown permission checks"))
> > - in SELinux, don't ignore hook calls where cred == NULL, but use
> >   SECINITSID_KERNEL as the subject instead
> > - update explanations in the commit message
> >
> > v2: https://lore.kernel.org/lkml/20210517092006.803332-1-omosnace@redhat.com/
> > - change to a single hook based on suggestions by Casey Schaufler
> >
> > v1: https://lore.kernel.org/lkml/20210507114048.138933-1-omosnace@redhat.com/
> >
> >  arch/powerpc/xmon/xmon.c             |  4 ++--
> >  arch/x86/kernel/ioport.c             |  4 ++--
> >  arch/x86/kernel/msr.c                |  4 ++--
> >  arch/x86/mm/testmmiotrace.c          |  2 +-
> >  drivers/acpi/acpi_configfs.c         |  2 +-
> >  drivers/acpi/custom_method.c         |  2 +-
> >  drivers/acpi/osl.c                   |  3 ++-
> >  drivers/acpi/tables.c                |  2 +-
> >  drivers/char/mem.c                   |  2 +-
> >  drivers/cxl/mem.c                    |  2 +-
> >  drivers/firmware/efi/efi.c           |  2 +-
> >  drivers/firmware/efi/test/efi_test.c |  2 +-
> >  drivers/pci/pci-sysfs.c              |  6 +++---
> >  drivers/pci/proc.c                   |  6 +++---
> >  drivers/pci/syscall.c                |  2 +-
> >  drivers/pcmcia/cistpl.c              |  2 +-
> >  drivers/tty/serial/serial_core.c     |  2 +-
> >  fs/debugfs/file.c                    |  2 +-
> >  fs/debugfs/inode.c                   |  2 +-
> >  fs/proc/kcore.c                      |  2 +-
> >  fs/tracefs/inode.c                   |  2 +-
> >  include/linux/lsm_hook_defs.h        |  2 +-
> >  include/linux/lsm_hooks.h            |  1 +
> >  include/linux/security.h             |  4 ++--
> >  kernel/bpf/helpers.c                 | 10 ++++++----
> >  kernel/events/core.c                 |  2 +-
> >  kernel/kexec.c                       |  2 +-
> >  kernel/kexec_file.c                  |  2 +-
> >  kernel/module.c                      |  2 +-
> >  kernel/params.c                      |  2 +-
> >  kernel/power/hibernate.c             |  3 ++-
> >  kernel/trace/bpf_trace.c             | 20 ++++++++++++--------
> >  kernel/trace/ftrace.c                |  4 ++--
> >  kernel/trace/ring_buffer.c           |  2 +-
> >  kernel/trace/trace.c                 | 10 +++++-----
> >  kernel/trace/trace_events.c          |  2 +-
> >  kernel/trace/trace_events_hist.c     |  4 ++--
> >  kernel/trace/trace_events_synth.c    |  2 +-
> >  kernel/trace/trace_events_trigger.c  |  2 +-
> >  kernel/trace/trace_kprobe.c          |  6 +++---
> >  kernel/trace/trace_printk.c          |  2 +-
> >  kernel/trace/trace_stack.c           |  2 +-
> >  kernel/trace/trace_stat.c            |  2 +-
> >  kernel/trace/trace_uprobe.c          |  4 ++--
> >  net/xfrm/xfrm_user.c                 | 11 +++++++++--
> >  security/lockdown/lockdown.c         |  3 ++-
> >  security/security.c                  |  4 ++--
> >  security/selinux/hooks.c             |  7 +++++--
> >  48 files changed, 97 insertions(+), 77 deletions(-)
>
> --
> paul moore
> www.paul-moore.com
>

--
Ondrej Mosnacek
Software Engineer, Linux Security - SELinux kernel
Red Hat, Inc


^ permalink raw reply

* Re: [PATCH] powerpc/64: Avoid link stack corruption in kexec_wait()
From: Christophe Leroy @ 2021-08-31  8:54 UTC (permalink / raw)
  To: Daniel Axtens, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87ilzm6str.fsf@dja-thinkpad.axtens.net>



Le 31/08/2021 à 08:17, Daniel Axtens a écrit :
> Hi Christophe,
> 
>> Use bcl 20,31,+4 instead of bl in order to preserve link stack.
>>
>> See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption
>> in __get_datapage()") for details.
> 
>  From my understanding of that commit message, the change helps to keep
> the link stack correctly balanced which is helpful for performance,
> rather than for correctness. If I understand correctly, kexec_wait is
> not in a hot path - rather it is where CPUs spin while waiting for
> kexec. Is there any benefit in using the more complicated opcode in this
> situation?

AFAICS the main benefit is to keep things consistent over the kernel and not have to wonder "is it a 
hot path or not ? If it is I use bcl 20,31, if it is not I use bl". The best way to keep things in 
order is to always use the right instruction.

> 
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>   arch/powerpc/kernel/misc_64.S | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
>> index 4b761a18a74d..613509907166 100644
>> --- a/arch/powerpc/kernel/misc_64.S
>> +++ b/arch/powerpc/kernel/misc_64.S
>> @@ -255,7 +255,7 @@ _GLOBAL(scom970_write)
>>    * Physical (hardware) cpu id should be in r3.
>>    */
>>   _GLOBAL(kexec_wait)
>> -	bl	1f
>> +	bcl	20,31,1f
>>   1:	mflr	r5
> 
> Would it be better to create a macro of some sort to wrap this unusual
> special form so that the meaning is more clear?

Not sure, I think people working with assembly will easily recognise that form whereas an obscure 
macro is always puzzling.

I like macros when they allow you to not repeat again and again the same sequence of several 
instructions, but here it is a single quite simple instruction which is not worth a macro in my mind.

Christophe

^ permalink raw reply

* Re: [PATCH] powerpc/32: Add support for out-of-line static calls
From: Peter Zijlstra @ 2021-08-31  8:53 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: linux-kernel, Steven Rostedt, Jason Baron, Paul Mackerras,
	Josh Poimboeuf, linuxppc-dev, Ard Biesheuvel
In-Reply-To: <cbfc0376461d02867c75cee72bb9167e16f4d0b0.1630396954.git.christophe.leroy@csgroup.eu>

On Tue, Aug 31, 2021 at 08:05:21AM +0000, Christophe Leroy wrote:

> +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
> +	asm(".pushsection .text, \"ax\"				\n"	\
> +	    ".align 4						\n"	\
> +	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
> +	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
> +	    "	blr						\n"	\
> +	    "	nop						\n"	\
> +	    "	nop						\n"	\
> +	    "	nop						\n"	\
> +	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
> +	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> +	    ".popsection					\n")

> +static int patch_trampoline_32(u32 *addr, unsigned long target)
> +{
> +	int err;
> +
> +	err = patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(target))));
> +	err |= patch_instruction(addr++, ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(target))));
> +	err |= patch_instruction(addr++, ppc_inst(PPC_RAW_MTCTR(_R12)));
> +	err |= patch_instruction(addr, ppc_inst(PPC_RAW_BCTR()));
> +
> +	return err;
> +}

There can be concurrent execution and modification; the above doesn't
look safe in that regard. What happens if you've say, done the first
two, but not the latter two and execution happens (on a different
CPU or through IRQ context, etc..)?

> +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
> +{
> +	int err;
> +	unsigned long target = (long)func;
> +
> +	if (!tramp)
> +		return;
> +
> +	mutex_lock(&text_mutex);
> +
> +	if (!func)
> +		err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
> +	else if (is_offset_in_branch_range((long)target - (long)tramp))
> +		err = patch_branch(tramp, target, 0);

These two are single instruction modifications and I'm assuming the
hardware is sane enough that execution sees either the old or the new
instruction. So this should work.

> +	else if (IS_ENABLED(CONFIG_PPC32))
> +		err = patch_trampoline_32(tramp, target);
> +	else
> +		BUILD_BUG();
> +
> +	mutex_unlock(&text_mutex);
> +
> +	if (err)
> +		panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
> +}
> +EXPORT_SYMBOL_GPL(arch_static_call_transform);

One possible solution that we explored on ARM64, was having the
trampoline be in 2 slots:


	b 1f

1:	blr
	nop
	nop
	nop

2:	blr
	nop
	nop
	nop

Where initially the first slot is active (per "b 1f"), then you write
the second slot, and as a final act, re-write the initial branch to
point to slot 2.

Then you execute synchronize_rcu_tasks() under your text mutex
(careful!) to ensure all users of your slot1 are gone and the next
modification repeats the whole thing, except for using slot1 etc..

Eventually I think Ard came up with the latest ARM64 proposal which puts
a literal in a RO section (could be the text section I suppose) and
loads and branches to that.

Anyway, the thing is, you can really only modify a single instruction at
the time and need to ensure concurrent execution is correct.

^ permalink raw reply

* [PATCH] powerpc/machdep: Remove stale functions from ppc_md structure
From: Christophe Leroy @ 2021-08-31  8:30 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

ppc_md.iommu_save() is not set anymore by any platform after
commit c40785ad305b ("powerpc/dart: Use a cachable DART").
So iommu_save() has become a nop and can be removed.

ppc_md.show_percpuinfo() is not set anymore by any platform after
commit 4350147a816b ("[PATCH] ppc64: SMU based macs cpufreq support").

Last users of ppc_md.rtc_read_val() and ppc_md.rtc_write_val() were
removed by commit 0f03a43b8f0f ("[POWERPC] Remove todc code from
ARCH=powerpc")

Last user of kgdb_map_scc() was removed by commit 17ce452f7ea3 ("kgdb,
powerpc: arch specific powerpc kgdb support").

ppc.machine_kexec_prepare() has not been used since
commit 8ee3e0d69623 ("powerpc: Remove the main legacy iSerie platform
code"). This allows the removal of machine_kexec_prepare() and the
rename of default_machine_kexec_prepare() into machine_kexec_prepare()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/iommu.h   |  6 ------
 arch/powerpc/include/asm/machdep.h | 13 -------------
 arch/powerpc/kernel/setup-common.c |  3 ---
 arch/powerpc/kernel/swsusp_64.c    |  5 -----
 arch/powerpc/kernel/swsusp_asm64.S |  1 -
 arch/powerpc/kexec/core.c          | 13 -------------
 arch/powerpc/kexec/core_32.c       |  2 +-
 arch/powerpc/kexec/core_64.c       |  2 +-
 8 files changed, 2 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index bf3b84128525..c361212ac160 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -280,12 +280,6 @@ extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
 extern void iommu_init_early_pasemi(void);
 
 #if defined(CONFIG_PPC64) && defined(CONFIG_PM)
-static inline void iommu_save(void)
-{
-	if (ppc_md.iommu_save)
-		ppc_md.iommu_save();
-}
-
 static inline void iommu_restore(void)
 {
 	if (ppc_md.iommu_restore)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 764f2732a821..a68311077d32 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,7 +29,6 @@ struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PM
-	void		(*iommu_save)(void);
 	void		(*iommu_restore)(void);
 #endif
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -43,7 +42,6 @@ struct machdep_calls {
 	void		(*setup_arch)(void); /* Optional, may be NULL */
 	/* Optional, may be NULL. */
 	void		(*show_cpuinfo)(struct seq_file *m);
-	void		(*show_percpuinfo)(struct seq_file *m, int i);
 	/* Returns the current operating frequency of "cpu" in Hz */
 	unsigned long  	(*get_proc_freq)(unsigned int cpu);
 
@@ -74,8 +72,6 @@ struct machdep_calls {
 	int		(*set_rtc_time)(struct rtc_time *);
 	void		(*get_rtc_time)(struct rtc_time *);
 	time64_t	(*get_boot_time)(void);
-	unsigned char 	(*rtc_read_val)(int addr);
-	void		(*rtc_write_val)(int addr, unsigned char val);
 
 	void		(*calibrate_decr)(void);
 
@@ -141,8 +137,6 @@ struct machdep_calls {
 	   May be NULL. */
 	void		(*init)(void);
 
-	void		(*kgdb_map_scc)(void);
-
 	/*
 	 * optional PCI "hooks"
 	 */
@@ -187,13 +181,6 @@ struct machdep_calls {
 #ifdef CONFIG_KEXEC_CORE
 	void (*kexec_cpu_down)(int crash_shutdown, int secondary);
 
-	/* Called to do what every setup is needed on image and the
-	 * reboot code buffer. Returns 0 on success.
-	 * Provide your own (maybe dummy) implementation if your platform
-	 * claims to support kexec.
-	 */
-	int (*machine_kexec_prepare)(struct kimage *image);
-
 	/* Called to perform the _real_ kexec.
 	 * Do NOT allocate memory or fail here. We are past the point of
 	 * no return.
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b1e43b69a559..0b7894eed58d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -278,9 +278,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
 			   proc_freq / 1000000, proc_freq % 1000000);
 
-	if (ppc_md.show_percpuinfo != NULL)
-		ppc_md.show_percpuinfo(m, cpu_id);
-
 	/* If we are a Freescale core do a simple check so
 	 * we dont have to keep adding cases in the future */
 	if (PVR_VER(pvr) & 0x8000) {
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index aeea97ad85cf..16ee3baaf09a 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -17,8 +17,3 @@ void do_after_copyback(void)
 	touch_softlockup_watchdog();
 	mb();
 }
-
-void _iommu_save(void)
-{
-	iommu_save();
-}
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 6d3189830dd3..96bb20715aa9 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -128,7 +128,6 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
 	 * stack pointer on the stack like a real stackframe */
 	addi	r1,r1,-128
 
-	bl _iommu_save
 	bl swsusp_save
 
 	/* restore LR */
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 48525e8b5730..a2242017e55f 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -48,19 +48,6 @@ void machine_crash_shutdown(struct pt_regs *regs)
 	default_machine_crash_shutdown(regs);
 }
 
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-	if (ppc_md.machine_kexec_prepare)
-		return ppc_md.machine_kexec_prepare(image);
-	else
-		return default_machine_kexec_prepare(image);
-}
-
 void machine_kexec_cleanup(struct kimage *image)
 {
 }
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
index bf9f1f906d64..b50aed48d09d 100644
--- a/arch/powerpc/kexec/core_32.c
+++ b/arch/powerpc/kexec/core_32.c
@@ -63,7 +63,7 @@ void default_machine_kexec(struct kimage *image)
 	(*rnk)(page_list, reboot_code_buffer_phys, image->start);
 }
 
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
 {
 	return 0;
 }
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 89c069d664a5..66678518b938 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -32,7 +32,7 @@
 #include <asm/svm.h>
 #include <asm/ultravisor.h>
 
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
 {
 	int i;
 	unsigned long begin, end;	/* limits of segment */
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/machdep: Remove stale functions from ppc_md structure
From: Christophe Leroy @ 2021-08-31  8:30 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

ppc_md.iommu_save() is not set anymore by any platform after
commit c40785ad305b ("powerpc/dart: Use a cachable DART").
So iommu_save() has become a nop and can be removed.

ppc_md.show_percpuinfo() is not set anymore by any platform after
commit 4350147a816b ("[PATCH] ppc64: SMU based macs cpufreq support").

Last users of ppc_md.rtc_read_val() and ppc_md.rtc_write_val() were
removed by commit 0f03a43b8f0f ("[POWERPC] Remove todc code from
ARCH=powerpc")

Last user of kgdb_map_scc() was removed by commit 17ce452f7ea3 ("kgdb,
powerpc: arch specific powerpc kgdb support").

ppc.machine_kexec_prepare() has not been used since
commit 8ee3e0d69623 ("powerpc: Remove the main legacy iSerie platform
code"). This allows the removal of machine_kexec_prepare() and the
rename of default_machine_kexec_prepare() into machine_kexec_prepare()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/iommu.h   |  6 ------
 arch/powerpc/include/asm/machdep.h | 13 -------------
 arch/powerpc/kernel/setup-common.c |  3 ---
 arch/powerpc/kernel/swsusp_64.c    |  5 -----
 arch/powerpc/kernel/swsusp_asm64.S |  1 -
 arch/powerpc/kexec/core.c          | 13 -------------
 arch/powerpc/kexec/core_32.c       |  2 +-
 arch/powerpc/kexec/core_64.c       |  2 +-
 8 files changed, 2 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index bf3b84128525..c361212ac160 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -280,12 +280,6 @@ extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
 extern void iommu_init_early_pasemi(void);
 
 #if defined(CONFIG_PPC64) && defined(CONFIG_PM)
-static inline void iommu_save(void)
-{
-	if (ppc_md.iommu_save)
-		ppc_md.iommu_save();
-}
-
 static inline void iommu_restore(void)
 {
 	if (ppc_md.iommu_restore)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 764f2732a821..a68311077d32 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,7 +29,6 @@ struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PM
-	void		(*iommu_save)(void);
 	void		(*iommu_restore)(void);
 #endif
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -43,7 +42,6 @@ struct machdep_calls {
 	void		(*setup_arch)(void); /* Optional, may be NULL */
 	/* Optional, may be NULL. */
 	void		(*show_cpuinfo)(struct seq_file *m);
-	void		(*show_percpuinfo)(struct seq_file *m, int i);
 	/* Returns the current operating frequency of "cpu" in Hz */
 	unsigned long  	(*get_proc_freq)(unsigned int cpu);
 
@@ -74,8 +72,6 @@ struct machdep_calls {
 	int		(*set_rtc_time)(struct rtc_time *);
 	void		(*get_rtc_time)(struct rtc_time *);
 	time64_t	(*get_boot_time)(void);
-	unsigned char 	(*rtc_read_val)(int addr);
-	void		(*rtc_write_val)(int addr, unsigned char val);
 
 	void		(*calibrate_decr)(void);
 
@@ -141,8 +137,6 @@ struct machdep_calls {
 	   May be NULL. */
 	void		(*init)(void);
 
-	void		(*kgdb_map_scc)(void);
-
 	/*
 	 * optional PCI "hooks"
 	 */
@@ -187,13 +181,6 @@ struct machdep_calls {
 #ifdef CONFIG_KEXEC_CORE
 	void (*kexec_cpu_down)(int crash_shutdown, int secondary);
 
-	/* Called to do what every setup is needed on image and the
-	 * reboot code buffer. Returns 0 on success.
-	 * Provide your own (maybe dummy) implementation if your platform
-	 * claims to support kexec.
-	 */
-	int (*machine_kexec_prepare)(struct kimage *image);
-
 	/* Called to perform the _real_ kexec.
 	 * Do NOT allocate memory or fail here. We are past the point of
 	 * no return.
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b1e43b69a559..0b7894eed58d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -278,9 +278,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
 			   proc_freq / 1000000, proc_freq % 1000000);
 
-	if (ppc_md.show_percpuinfo != NULL)
-		ppc_md.show_percpuinfo(m, cpu_id);
-
 	/* If we are a Freescale core do a simple check so
 	 * we dont have to keep adding cases in the future */
 	if (PVR_VER(pvr) & 0x8000) {
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index aeea97ad85cf..16ee3baaf09a 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -17,8 +17,3 @@ void do_after_copyback(void)
 	touch_softlockup_watchdog();
 	mb();
 }
-
-void _iommu_save(void)
-{
-	iommu_save();
-}
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 6d3189830dd3..96bb20715aa9 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -128,7 +128,6 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
 	 * stack pointer on the stack like a real stackframe */
 	addi	r1,r1,-128
 
-	bl _iommu_save
 	bl swsusp_save
 
 	/* restore LR */
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 48525e8b5730..a2242017e55f 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -48,19 +48,6 @@ void machine_crash_shutdown(struct pt_regs *regs)
 	default_machine_crash_shutdown(regs);
 }
 
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-	if (ppc_md.machine_kexec_prepare)
-		return ppc_md.machine_kexec_prepare(image);
-	else
-		return default_machine_kexec_prepare(image);
-}
-
 void machine_kexec_cleanup(struct kimage *image)
 {
 }
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
index bf9f1f906d64..b50aed48d09d 100644
--- a/arch/powerpc/kexec/core_32.c
+++ b/arch/powerpc/kexec/core_32.c
@@ -63,7 +63,7 @@ void default_machine_kexec(struct kimage *image)
 	(*rnk)(page_list, reboot_code_buffer_phys, image->start);
 }
 
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
 {
 	return 0;
 }
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 89c069d664a5..66678518b938 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -32,7 +32,7 @@
 #include <asm/svm.h>
 #include <asm/ultravisor.h>
 
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
 {
 	int i;
 	unsigned long begin, end;	/* limits of segment */
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/time: Remove generic_suspend_{dis/en}able_irqs()
From: Christophe Leroy @ 2021-08-31  8:29 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Commit d75d68cfef49 ("powerpc: Clean up obsolete code relating to
decrementer and timebase") made generic_suspend_enable_irqs() and
generic_suspend_disable_irqs() static.

Fold them into their only caller.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/time.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 934d8ae66cc6..cae8f03a44fe 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -631,8 +631,12 @@ void timer_broadcast_interrupt(void)
 #endif
 
 #ifdef CONFIG_SUSPEND
-static void generic_suspend_disable_irqs(void)
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_disable_irqs(void)
 {
+	if (ppc_md.suspend_disable_irqs)
+		ppc_md.suspend_disable_irqs();
+
 	/* Disable the decrementer, so that it doesn't interfere
 	 * with suspending.
 	 */
@@ -642,23 +646,11 @@ static void generic_suspend_disable_irqs(void)
 	set_dec(decrementer_max);
 }
 
-static void generic_suspend_enable_irqs(void)
-{
-	local_irq_enable();
-}
-
-/* Overrides the weak version in kernel/power/main.c */
-void arch_suspend_disable_irqs(void)
-{
-	if (ppc_md.suspend_disable_irqs)
-		ppc_md.suspend_disable_irqs();
-	generic_suspend_disable_irqs();
-}
-
 /* Overrides the weak version in kernel/power/main.c */
 void arch_suspend_enable_irqs(void)
 {
-	generic_suspend_enable_irqs();
+	local_irq_enable();
+
 	if (ppc_md.suspend_enable_irqs)
 		ppc_md.suspend_enable_irqs();
 }
-- 
2.25.0


^ permalink raw reply related

* [PATCH] powerpc/32: Add support for out-of-line static calls
From: Christophe Leroy @ 2021-08-31  8:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: Peter Zijlstra, linux-kernel, Steven Rostedt, Jason Baron,
	Josh Poimboeuf, linuxppc-dev, Ard Biesheuvel

Add support for out-of-line static calls on PPC32. This change
improve performance of calls to global function pointers by
using direct calls instead of indirect calls.

The trampoline is initialy populated with a 'blr' and 3 'nop'.
Then, depending on the target distance, arch_static_call_transform()
will either replace the 'blr' by a direct 'bl <target>' or an
indirect jump throught CTR register via a lis/addi/mtctr/bctr sequence.

static_call selftest is running successfully with this change.

With this patch, __do_irq() has the following sequence to trace
irq entries:

	c00049c0 <__SCT__tp_func_irq_entry>:
	c00049c0:	48 00 00 70 	b       c0004a30 <__traceiter_irq_entry>
	c00049c4:	60 00 00 00 	nop
	c00049c8:	60 00 00 00 	nop
	c00049cc:	60 00 00 00 	nop
...
	c00055a4 <__do_irq>:
...
	c00055b4:	7c 7f 1b 78 	mr      r31,r3
...
	c00055f0:	81 22 00 00 	lwz     r9,0(r2)
	c00055f4:	39 29 00 01 	addi    r9,r9,1
	c00055f8:	91 22 00 00 	stw     r9,0(r2)
	c00055fc:	3d 20 c0 af 	lis     r9,-16209
	c0005600:	81 29 74 cc 	lwz     r9,29900(r9)
	c0005604:	2c 09 00 00 	cmpwi   r9,0
	c0005608:	41 82 00 10 	beq     c0005618 <__do_irq+0x74>
	c000560c:	80 69 00 04 	lwz     r3,4(r9)
	c0005610:	7f e4 fb 78 	mr      r4,r31
	c0005614:	4b ff f3 ad 	bl      c00049c0 <__SCT__tp_func_irq_entry>

Before this patch, __do_irq() was doing the following to trace irq
entries:

	c0005700 <__do_irq>:
...
	c0005710:	7c 7e 1b 78 	mr      r30,r3
...
	c000574c:	93 e1 00 0c 	stw     r31,12(r1)
	c0005750:	81 22 00 00 	lwz     r9,0(r2)
	c0005754:	39 29 00 01 	addi    r9,r9,1
	c0005758:	91 22 00 00 	stw     r9,0(r2)
	c000575c:	3d 20 c0 af 	lis     r9,-16209
	c0005760:	83 e9 f4 cc 	lwz     r31,-2868(r9)
	c0005764:	2c 1f 00 00 	cmpwi   r31,0
	c0005768:	41 82 00 24 	beq     c000578c <__do_irq+0x8c>
	c000576c:	81 3f 00 00 	lwz     r9,0(r31)
	c0005770:	80 7f 00 04 	lwz     r3,4(r31)
	c0005774:	7d 29 03 a6 	mtctr   r9
	c0005778:	7f c4 f3 78 	mr      r4,r30
	c000577c:	4e 80 04 21 	bctrl
	c0005780:	85 3f 00 0c 	lwzu    r9,12(r31)
	c0005784:	2c 09 00 00 	cmpwi   r9,0
	c0005788:	40 82 ff e4 	bne     c000576c <__do_irq+0x6c>

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/Kconfig                   |  1 +
 arch/powerpc/include/asm/static_call.h | 31 +++++++++++++++++++
 arch/powerpc/kernel/Makefile           |  2 +-
 arch/powerpc/kernel/static_call.c      | 43 ++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/static_call.h
 create mode 100644 arch/powerpc/kernel/static_call.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 36b72d972568..a0fe69d8ec83 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -247,6 +247,7 @@ config PPC
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
 	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
+	select HAVE_STATIC_CALL			if PPC32
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HUGETLB_PAGE_SIZE_VARIABLE	if PPC_BOOK3S_64 && HUGETLB_PAGE
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
new file mode 100644
index 000000000000..7cbefd845afc
--- /dev/null
+++ b/arch/powerpc/include/asm/static_call.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_STATIC_CALL_H
+#define _ASM_POWERPC_STATIC_CALL_H
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\
+	asm(".pushsection .text, \"ax\"				\n"	\
+	    ".align 4						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    "	b " #func "					\n"	\
+	    "	nop						\n"	\
+	    "	nop						\n"	\
+	    "	nop						\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
+	asm(".pushsection .text, \"ax\"				\n"	\
+	    ".align 4						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    "	blr						\n"	\
+	    "	nop						\n"	\
+	    "	nop						\n"	\
+	    "	nop						\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7be36c1e1db6..0e3640e14eb1 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,7 +106,7 @@ extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)	+= reloc_$(BITS).o
 
-obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o
+obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o static_call.o
 obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
new file mode 100644
index 000000000000..72754bcaf758
--- /dev/null
+++ b/arch/powerpc/kernel/static_call.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memory.h>
+#include <linux/static_call.h>
+
+#include <asm/code-patching.h>
+
+static int patch_trampoline_32(u32 *addr, unsigned long target)
+{
+	int err;
+
+	err = patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(target))));
+	err |= patch_instruction(addr++, ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(target))));
+	err |= patch_instruction(addr++, ppc_inst(PPC_RAW_MTCTR(_R12)));
+	err |= patch_instruction(addr, ppc_inst(PPC_RAW_BCTR()));
+
+	return err;
+}
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+	int err;
+	unsigned long target = (long)func;
+
+	if (!tramp)
+		return;
+
+	mutex_lock(&text_mutex);
+
+	if (!func)
+		err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
+	else if (is_offset_in_branch_range((long)target - (long)tramp))
+		err = patch_branch(tramp, target, 0);
+	else if (IS_ENABLED(CONFIG_PPC32))
+		err = patch_trampoline_32(tramp, target);
+	else
+		BUILD_BUG();
+
+	mutex_unlock(&text_mutex);
+
+	if (err)
+		panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v2 1/3] powerpc/smp: Fix a crash while booting kvm guest with nr_cpus=2
From: Gautham R Shenoy @ 2021-08-31  7:08 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: Nathan Lynch, Gautham R Shenoy, Vincent Guittot, Peter Zijlstra,
	Valentin Schneider, Aneesh Kumar K . V, linuxppc-dev, Ingo Molnar
In-Reply-To: <20210826100401.412519-2-srikar@linux.vnet.ibm.com>

On Thu, Aug 26, 2021 at 03:33:59PM +0530, Srikar Dronamraju wrote:
> Aneesh reported a crash with a fairly recent upstream kernel when
> booting kernel whose commandline was appended with nr_cpus=2
> 
> 1:mon> e
> cpu 0x1: Vector: 300 (Data Access) at [c000000008a67bd0]
>     pc: c00000000002557c: cpu_to_chip_id+0x3c/0x100
>     lr: c000000000058380: start_secondary+0x460/0xb00
>     sp: c000000008a67e70
>    msr: 8000000000001033
>    dar: 10
>  dsisr: 80000
>   current = 0xc00000000891bb00
>   paca    = 0xc0000018ff981f80   irqmask: 0x03   irq_happened: 0x01
>     pid   = 0, comm = swapper/1
> Linux version 5.13.0-rc3-15704-ga050a6d2b7e8 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #433 SMP Tue May 25 02:38:49 CDT 2021
> 1:mon> t
> [link register   ] c000000000058380 start_secondary+0x460/0xb00
> [c000000008a67e70] c000000008a67eb0 (unreliable)
> [c000000008a67eb0] c0000000000589d4 start_secondary+0xab4/0xb00
> [c000000008a67f90] c00000000000c654 start_secondary_prolog+0x10/0x14
> 
> Current code assumes that num_possible_cpus() is always greater than
> threads_per_core. However this may not be true when using nr_cpus=2 or
> similar options. Handle the case where num_possible_cpus() is not an
> exact multiple of  threads_per_core.
> 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Cc: Nathan Lynch <nathanl@linux.ibm.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Valentin Schneider <valentin.schneider@arm.com>
> Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
> Cc: Vincent Guittot <vincent.guittot@linaro.org>
> Fixes: c1e53367dab1 ("powerpc/smp: Cache CPU to chip lookup")
> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Debugged-by: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>


This looks good to me.
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

> ---
> Changelog: v1 -> v2:
> v1: - https://lore.kernel.org/linuxppc-dev/20210821092419.167454-2-srikar@linux.vnet.ibm.com/t/#u
> Handled comment from Gautham Shenoy
> [ Updated to use DIV_ROUND_UP instead of max to handle more situations ]
> 
>  arch/powerpc/kernel/smp.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 6c6e4d934d86..bf11b3c4eb28 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1074,7 +1074,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>  	}
> 
>  	if (cpu_to_chip_id(boot_cpuid) != -1) {
> -		int idx = num_possible_cpus() / threads_per_core;
> +		int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
> 
>  		/*
>  		 * All threads of a core will all belong to the same core,
> -- 
> 2.18.2
> 

^ permalink raw reply

* [powerpc:next] BUILD SUCCESS 57dbbe590f152e5e8a3ff8bf5ba163df34eeae0b
From: kernel test robot @ 2021-08-31  6:31 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
branch HEAD: 57dbbe590f152e5e8a3ff8bf5ba163df34eeae0b  powerpc/pseries/iommu: Rename "direct window" to "dma window"

elapsed time: 6605m

configs tested: 158
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                               defconfig
arm                              allyesconfig
arm64                            allyesconfig
arm                              allmodconfig
s390                       zfcpdump_defconfig
mips                         tb0287_defconfig
m68k                       m5475evb_defconfig
powerpc                     sequoia_defconfig
arm                              alldefconfig
powerpc                     tqm8541_defconfig
mips                          ath25_defconfig
arc                     nsimosci_hs_defconfig
arm                         nhk8815_defconfig
powerpc                      cm5200_defconfig
powerpc                      acadia_defconfig
sh                   sh7770_generic_defconfig
arm                       multi_v4t_defconfig
sh                            migor_defconfig
arm                          pxa168_defconfig
um                           x86_64_defconfig
openrisc                            defconfig
arm                         vf610m4_defconfig
xtensa                          iss_defconfig
sh                  sh7785lcr_32bit_defconfig
sh                   rts7751r2dplus_defconfig
mips                     loongson2k_defconfig
powerpc                     redwood_defconfig
sh                           se7724_defconfig
arm                           stm32_defconfig
s390                             alldefconfig
arm                       aspeed_g4_defconfig
sh                          r7780mp_defconfig
arm                            mmp2_defconfig
arm                        mvebu_v7_defconfig
xtensa                           alldefconfig
arm                          imote2_defconfig
sh                          kfr2r09_defconfig
arm                           sama5_defconfig
sparc                            alldefconfig
parisc                generic-32bit_defconfig
arm                        keystone_defconfig
arm                       imx_v4_v5_defconfig
h8300                     edosk2674_defconfig
alpha                            alldefconfig
powerpc                      walnut_defconfig
mips                          ath79_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
x86_64                            allnoconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
nds32                               defconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
nios2                            allyesconfig
sh                               allmodconfig
h8300                            allyesconfig
arc                                 defconfig
xtensa                           allyesconfig
parisc                              defconfig
s390                             allyesconfig
s390                             allmodconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a014-20210827
x86_64               randconfig-a015-20210827
x86_64               randconfig-a016-20210827
x86_64               randconfig-a013-20210827
x86_64               randconfig-a012-20210827
x86_64               randconfig-a011-20210827
x86_64               randconfig-a014-20210829
x86_64               randconfig-a016-20210829
x86_64               randconfig-a015-20210829
x86_64               randconfig-a012-20210829
x86_64               randconfig-a013-20210829
x86_64               randconfig-a011-20210829
i386                 randconfig-a016-20210830
i386                 randconfig-a011-20210830
i386                 randconfig-a015-20210830
i386                 randconfig-a014-20210830
i386                 randconfig-a012-20210830
i386                 randconfig-a013-20210830
i386                 randconfig-a011-20210827
i386                 randconfig-a016-20210827
i386                 randconfig-a012-20210827
i386                 randconfig-a014-20210827
i386                 randconfig-a013-20210827
i386                 randconfig-a015-20210827
i386                 randconfig-a011-20210829
i386                 randconfig-a016-20210829
i386                 randconfig-a012-20210829
i386                 randconfig-a014-20210829
i386                 randconfig-a013-20210829
i386                 randconfig-a015-20210829
arc                  randconfig-r043-20210827
riscv                randconfig-r042-20210827
s390                 randconfig-r044-20210827
arc                  randconfig-r043-20210829
riscv                randconfig-r042-20210829
s390                 randconfig-r044-20210829
arc                  randconfig-r043-20210826
riscv                    nommu_k210_defconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allyesconfig
riscv                            allmodconfig
um                             i386_defconfig
x86_64                           allyesconfig
x86_64                    rhel-8.3-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a001-20210829
x86_64               randconfig-a006-20210829
x86_64               randconfig-a005-20210829
x86_64               randconfig-a003-20210829
x86_64               randconfig-a004-20210829
x86_64               randconfig-a002-20210829
i386                 randconfig-a001-20210829
i386                 randconfig-a006-20210829
i386                 randconfig-a005-20210829
i386                 randconfig-a004-20210829
i386                 randconfig-a003-20210829
x86_64               randconfig-a016-20210828
x86_64               randconfig-a015-20210828
x86_64               randconfig-a012-20210828
x86_64               randconfig-a013-20210828
x86_64               randconfig-a011-20210828
i386                 randconfig-a011-20210828
i386                 randconfig-a016-20210828
i386                 randconfig-a012-20210828
i386                 randconfig-a014-20210828
i386                 randconfig-a013-20210828
i386                 randconfig-a015-20210828
hexagon              randconfig-r041-20210829
hexagon              randconfig-r045-20210829
hexagon              randconfig-r041-20210826
hexagon              randconfig-r045-20210826
riscv                randconfig-r042-20210826
s390                 randconfig-r044-20210826

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH] powerpc/64: Avoid link stack corruption in kexec_wait()
From: Daniel Axtens @ 2021-08-31  6:17 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <3ffe7775f3fcda8e5fca6a7bc7db0b8251153c67.1629705147.git.christophe.leroy@csgroup.eu>

Hi Christophe,

> Use bcl 20,31,+4 instead of bl in order to preserve link stack.
>
> See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption
> in __get_datapage()") for details.

From my understanding of that commit message, the change helps to keep
the link stack correctly balanced which is helpful for performance,
rather than for correctness. If I understand correctly, kexec_wait is
not in a hot path - rather it is where CPUs spin while waiting for
kexec. Is there any benefit in using the more complicated opcode in this
situation?

> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/kernel/misc_64.S | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index 4b761a18a74d..613509907166 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -255,7 +255,7 @@ _GLOBAL(scom970_write)
>   * Physical (hardware) cpu id should be in r3.
>   */
>  _GLOBAL(kexec_wait)
> -	bl	1f
> +	bcl	20,31,1f
>  1:	mflr	r5

Would it be better to create a macro of some sort to wrap this unusual
special form so that the meaning is more clear?

Kind regards,
Daniel

>  	addi	r5,r5,kexec_flag-1b
>  
> -- 
> 2.25.0

^ permalink raw reply

* Re: [PATCH linux-next] power:pkeys: fix bugon.cocci warnings
From: Daniel Axtens @ 2021-08-31  6:01 UTC (permalink / raw)
  To: CGEL, Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Sandipan Das, linuxppc-dev, linux-kernel
  Cc: Zeal Robot, Jing Yangyang
In-Reply-To: <20210825064228.70487-1-deng.changcheng@zte.com.cn>

Hi Jing,

Thanks for your patch.

The patch looks good, but looking at the output of `make coccicheck
M=arch/powerpc MODE=report`, it looks like there might be a few other
things that we might want to fix. Would it be worth trying to make the
arch/powerpc directory free from coccinelle warnings in one big patch
series, and then we could add coccicheck to our automatic patch testing?
(see
e.g. https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20210825064228.70487-1-deng.changcheng@zte.com.cn/ )

For this patch, I think we should try to fix all of arch/powerpc at the
same time. The check points out the following other possible uses of
BUG_ON:

arch/powerpc/include/asm/book3s/64/pgtable-64k.h:68:2-5: WARNING: Use BUG_ON instead of if condition followed by BUG.
Please make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)
arch/powerpc/platforms/cell/spufs/sched.c:908:2-5: WARNING: Use BUG_ON instead of if condition followed by BUG.
Please make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)
arch/powerpc/platforms/powernv/idle.c:968:3-6: WARNING: Use BUG_ON instead of if condition followed by BUG.
Please make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)
arch/powerpc/platforms/powernv/idle.c:456:2-5: WARNING: Use BUG_ON instead of if condition followed by BUG.
Please make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)

Kind regards,
Daniel


> Use BUG_ON instead of a if condition followed by BUG.
>
> ./arch/powerpc/include/asm/book3s/64/pkeys.h:21:2-5:WARNING
> Use BUG_ON instead of if condition followed by BUG.
> ./arch/powerpc/include/asm/book3s/64/pkeys.h:14:2-5:WARNING
> Use BUG_ON instead of if condition followed by BUG.
>
> Generated by: scripts/coccinelle/misc/bugon.cocci
>
> Reported-by: Zeal Robot <zealci@zte.com.cn>
> Signed-off-by: Jing Yangyang <jing.yangyang@zte.com.cn>
> ---
>  arch/powerpc/include/asm/book3s/64/pkeys.h | 6 ++----
>  1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h
> index 5b17813..5f74f0c 100644
> --- a/arch/powerpc/include/asm/book3s/64/pkeys.h
> +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h
> @@ -10,15 +10,13 @@ static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
>  	if (!mmu_has_feature(MMU_FTR_PKEY))
>  		return 0x0UL;
>  
> -	if (radix_enabled())
> -		BUG();
> +	BUG_ON(radix_enabled());
>  	return hash__vmflag_to_pte_pkey_bits(vm_flags);
>  }
>  
>  static inline u16 pte_to_pkey_bits(u64 pteflags)
>  {
> -	if (radix_enabled())
> -		BUG();
> +	BUG_ON(radix_enabled());
>  	return hash__pte_to_pkey_bits(pteflags);
>  }
>  
> -- 
> 1.8.3.1

^ permalink raw reply

* Re: [PATCH printk v1 00/10] printk: introduce atomic consoles and sync mode
From: Sergey Senozhatsky @ 2021-08-31  0:33 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Gautham R. Shenoy, Gustavo A. R. Silva, Srikar Dronamraju,
	Jiri Slaby, Peter Zijlstra, Al Cooper, Douglas Anderson,
	Paul Cercueil, Matthias Brugger, Paul Mackerras, H. Peter Anvin,
	Cengiz Can, Chengyang Fan, Daniel Thompson, Eddie Huang,
	Bhaskar Chowdhury, John Ogness, Changbin Du, Masahiro Yamada, x86,
	linux-mediatek, Tetsuo Handa, Ingo Molnar, linux-serial,
	kgdb-bugreport, linux-mips, Wang Qing, Sergey Senozhatsky,
	Paul E. McKenney, Johan Hovold, Steven Rostedt, Borislav Petkov,
	Nicholas Piggin, Hsin-Yi Wang, Sedat Dilek, Claire Chang,
	Thomas Gleixner, Andy Shevchenko, Andrij Abyzov, linux-arm-kernel,
	Sumit Garg, kuldip dwivedi, Andrew Jeffery, Greg Kroah-Hartman,
	Zhang Qilong, Nick Desaulniers, linux-kernel, Sergey Senozhatsky,
	Guenter Roeck, Jason Wessel, Christophe JAILLET, Andrew Morton,
	Maciej W. Rozycki, linuxppc-dev, Vitor Massaru Iha,
	Cédric Le Goater
In-Reply-To: <YQwHwT2wYM1dJfVk@alley>

On (21/08/05 17:47), Petr Mladek wrote:
[..]
> 3. After introducing console kthread(s):
> 
> 	int printk(...)
> 	{
> 		vprintk_store();
> 		wake_consoles_via_irqwork();
> 	}
> 
> 	+ in panic:
> 
> 	    + with atomic console like after this patchset?
> 	    + without atomic consoles?
> 
> 	+ during early boot?

I guess I'd also add netconsole to the list.

^ permalink raw reply

* Re: [PATCH v6 00/11] DDW + Indirect Mapping
From: David Christensen @ 2021-08-30 17:48 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210817063929.38701-1-leobras.c@gmail.com>



On 8/16/21 11:39 PM, Leonardo Bras wrote:
> So far it's assumed possible to map the guest RAM 1:1 to the bus, which
> works with a small number of devices. SRIOV changes it as the user can
> configure hundreds VFs and since phyp preallocates TCEs and does not
> allow IOMMU pages bigger than 64K, it has to limit the number of TCEs
> per a PE to limit waste of physical pages.
> 
> As of today, if the assumed direct mapping is not possible, DDW creation
> is skipped and the default DMA window "ibm,dma-window" is used instead.
> 
> Using the DDW instead of the default DMA window may allow to expand the
> amount of memory that can be DMA-mapped, given the number of pages (TCEs)
> may stay the same (or increase) and the default DMA window offers only
> 4k-pages while DDW may offer larger pages (4k, 64k, 16M ...).

So if I'm reading this correctly, VFIO applications requiring hugepage 
DMA mappings (e.g. 16M or 2GB) can be supported on an LPAR or DLPAR 
after this change, is that correct?  Any limitations based on processor 
or pHyp revision levels?

Dave

^ permalink raw reply

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
From: Christophe Leroy @ 2021-08-30 13:16 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <87r1ebdu4t.fsf@mpe.ellerman.id.au>



Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>
>>>>>
>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>> config [1] when booting up in QEMU with [2]:
>>>>>
>>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>> [    1.625015] Modules linked in:
>>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>> [    1.635755] Call Trace:
>>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>> [    1.640597] Instruction dump:
>>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>> [    1.643220]
>>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>
>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>> please let me know.
>>>>
>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>
>>> It seems to be walking of the end of the pgd.
>>>
>>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>
>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
> 
> But the page table doesn't span that far? 0_o
> 
>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>> 	{TASK_SIZE_MAX, ~0UL},
>> 	{0, 0}
>> };
>>
>> Ok, well, ppc32 go up to 0xffffffff
>>
>> What's the top address to be used for ppc64 ?
> 
> It's different for (hash | radix) x page size.
> 
> The below works, and matches what we used to do.
> 
> Possibly we can come up with something cleaner, not sure.
> 
> cheers
> 
> 
> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
> index 2d80d775d15e..3d3778a74969 100644
> --- a/arch/powerpc/mm/ptdump/ptdump.c
> +++ b/arch/powerpc/mm/ptdump/ptdump.c
> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
>   		ptdump_range[0].start = KERN_VIRT_START;
>   	else
>   		ptdump_range[0].start = PAGE_OFFSET;
> +
> +	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);

Hum ...

It was:

	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {

And there is

#define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))


Do we have the following ?

	pgd_index(KERN_VIRT_START) == 0


Shouldn't it be something like

	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);


Christophe

^ permalink raw reply

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
From: Michael Ellerman @ 2021-08-30 11:55 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <2bd9fa19-07b0-c187-c7dd-c6d544e34739@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>
>>>>
>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>> config [1] when booting up in QEMU with [2]:
>>>>
>>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>> [    1.625015] Modules linked in:
>>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>> [    1.635755] Call Trace:
>>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>> [    1.640597] Instruction dump:
>>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>> [    1.643220]
>>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>
>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>> binutils 2.37. If there is any additional information I can provide,
>>>> please let me know.
>>>
>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>> 
>> It seems to be walking of the end of the pgd.
>> 
>> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
>> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>
> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

But the page table doesn't span that far? 0_o

> static struct ptdump_range ptdump_range[] __ro_after_init = {
> 	{TASK_SIZE_MAX, ~0UL},
> 	{0, 0}
> };
>
> Ok, well, ppc32 go up to 0xffffffff
>
> What's the top address to be used for ppc64 ?

It's different for (hash | radix) x page size.

The below works, and matches what we used to do.

Possibly we can come up with something cleaner, not sure.

cheers


diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 2d80d775d15e..3d3778a74969 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -359,6 +359,8 @@ static int __init ptdump_init(void)
 		ptdump_range[0].start = KERN_VIRT_START;
 	else
 		ptdump_range[0].start = PAGE_OFFSET;
+
+	ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
 #endif
 
 	populate_markers();

^ permalink raw reply related

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
From: Christophe Leroy @ 2021-08-30  8:16 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <87tuj7e5e5.fsf@mpe.ellerman.id.au>



Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Hi Nathan,
>>
>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>> Hi Christophe,
>>>
>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>>
>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>> config [1] when booting up in QEMU with [2]:
>>>
>>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>> [    1.625015] Modules linked in:
>>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>> [    1.635755] Call Trace:
>>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>> [    1.640597] Instruction dump:
>>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>> [    1.643220]
>>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>
>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>> binutils 2.37. If there is any additional information I can provide,
>>> please let me know.
>>
>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
> 
> It seems to be walking of the end of the pgd.
> 
> [    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
> [    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
> [    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000

Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

static struct ptdump_range ptdump_range[] __ro_after_init = {
	{TASK_SIZE_MAX, ~0UL},
	{0, 0}
};


Ok, well, ppc32 go up to 0xffffffff

What's the top address to be used for ppc64 ?

^ permalink raw reply

* Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
From: Michael Ellerman @ 2021-08-30  7:52 UTC (permalink / raw)
  To: Christophe Leroy, Nathan Chancellor
  Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <5c479866-f31a-3579-9d71-357c85b777d0@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Hi Nathan,
>
> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>> Hi Christophe,
>> 
>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> 
>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>> config [1] when booting up in QEMU with [2]:
>> 
>> [    1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>> [    1.623058] Faulting instruction address: 0xc00000000045e5fc
>> [    1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>> [    1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>> [    1.625015] Modules linked in:
>> [    1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>> [    1.626237] NIP:  c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>> [    1.626839] REGS: c00000000752b820 TRAP: 0380   Not tainted  (5.14.0-rc7-next-20210827)
>> [    1.627528] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84002482  XER: 20000000
>> [    1.628449] CFAR: c000000000518300 IRQMASK: 0
>> [    1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>> [    1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>> [    1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>> [    1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>> [    1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>> [    1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>> [    1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>> [    1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>> [    1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>> [    1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>> [    1.635755] Call Trace:
>> [    1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>> [    1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>> [    1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>> [    1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>> [    1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>> [    1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>> [    1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>> [    1.640597] Instruction dump:
>> [    1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>> [    1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>> [    1.642771] ---[ end trace 6cf72b085097ad52 ]---
>> [    1.643220]
>> [    2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>> [    2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>> 
>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>> binutils 2.37. If there is any additional information I can provide,
>> please let me know.
>
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

It seems to be walking of the end of the pgd.

[    3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
[    3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000	<- end of pgd at PAGE_OFFSET + 4PB
[    3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
[    3.373957] walk_p4d_range: addr c010008000000000 end c010010000000000
[    3.374009] walk_p4d_range: addr c010010000000000 end c010018000000000
[    3.374060] walk_p4d_range: addr c010018000000000 end c010020000000000
[    3.376727] walk_p4d_range: addr c010020000000000 end c010028000000000
[    3.376780] walk_p4d_range: addr c010028000000000 end c010030000000000
[    3.376831] walk_p4d_range: addr c010030000000000 end c010038000000000
[    3.376883] walk_p4d_range: addr c010038000000000 end c010040000000000
[    3.376935] walk_p4d_range: addr c010040000000000 end c010048000000000
[    3.376988] walk_p4d_range: addr c010048000000000 end c010050000000000
[    3.377039] walk_p4d_range: addr c010050000000000 end c010058000000000
[    3.377091] walk_p4d_range: addr c010058000000000 end c010060000000000
[    3.377143] walk_p4d_range: addr c010060000000000 end c010068000000000
[    3.377244] walk_pud_range: addr c010060000000000 end c010068000000000
[    3.377374] walk_pmd_range: addr c010060100000000 end c010060140000000
[    3.377817] BUG: Unable to handle kernel data access on read at 0xf906a038d8ba8400
[    3.378247] Faulting instruction address: 0xc00000000045b4a4
[    3.378725] Oops: Kernel access of bad area, sig: 11 [#1]
[    3.378843] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
[    3.379118] Modules linked in:
[    3.379422] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc2+ #75
[    3.379751] NIP:  c00000000045b4a4 LR: c00000000045b430 CTR: c000000000b4b580
[    3.379833] REGS: c0000000085637c0 TRAP: 0300   Not tainted  (5.14.0-rc2+)
[    3.379940] MSR:  8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 8800228f  XER: 20040000
[    3.380284] CFAR: c0000000001f5744 DAR: f906a038d8ba8400 DSISR: 40000000 IRQMASK: 0
[    3.380284] GPR00: c00000000045b430 c000000008563a60 c0000000028a7d00 000000000000003a
[    3.380284] GPR04: 00000000ffffe14d c000000008563748 ffffffffffffffff 00000000000001ff
[    3.380284] GPR08: f906a038d8ba8400 c0100601001fffff c01006013fffffff fffffffffffc51e0
[    3.380284] GPR12: 0000000000002000 c0000000ffffee80 0000000000000001 c000000008563be0
[    3.380284] GPR16: c000000001198118 c0000000028daef8 c000000002971a60 c0000000014bc868
[    3.380284] GPR20: c010060100200000 c000000002971a58 c000000002971a68 c010060100000000
[    3.380284] GPR24: 0000000000000003 c000000001198118 c000000001198118 c000000001000020
[    3.380284] GPR28: 0000000000000000 c010060140000000 c010068000000000 f906a038d8ba8400
[    3.381235] NIP [c00000000045b4a4] __walk_page_range+0x7f4/0xbd0
[    3.381906] LR [c00000000045b430] __walk_page_range+0x780/0xbd0
[    3.382120] Call Trace:
[    3.382240] [c000000008563a60] [c00000000045b430] __walk_page_range+0x780/0xbd0 (unreliable)
[    3.382445] [c000000008563bc0] [c00000000045ba94] walk_page_range_novma+0x74/0xb0
[    3.382548] [c000000008563c10] [c000000000514cd8] ptdump_walk_pgd+0x98/0x170
[    3.382630] [c000000008563c60] [c0000000000aaf70] ptdump_check_wx+0xb0/0x100
[    3.382774] [c000000008563d40] [c00000000008db18] mark_rodata_ro+0x48/0x80
[    3.382849] [c000000008563da0] [c000000000012a18] kernel_init+0x78/0x1c0
[    3.382926] [c000000008563e10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[    3.383092] Instruction dump:
[    3.383341] 78c8f00e 7fe84a14 e9360000 e94100a8 39290010 7dc94836 7e89ba14 7d2900d0
[    3.383516] 7e944838 3934ffff 7c295040 40800098 <e93f0000> 2c290000 40820094 e9990028
[    3.384126] ---[ end trace d8e6479034d7a9d1 ]---

cheers

^ permalink raw reply

* Re: [PATCH v2 1/4] powerpc/64: handle MSR EE and RI in interrupt entry wrapper
From: Nicholas Piggin @ 2021-08-30  7:32 UTC (permalink / raw)
  To: Daniel Axtens, linuxppc-dev
In-Reply-To: <87mtp3e43c.fsf@linkitivity.dja.id.au>

Excerpts from Daniel Axtens's message of August 27, 2021 5:31 pm:
> Hi,
> 
>> Similarly to the system call change in the previous patch, the mtmsrd to
> 
> I don't actually know what patch this was - I assume it's from a series
> that has since been applied?

Good catch yes that used to be in the same series. Now merged, it's 
dd152f, I'll update the changelog.

>> enable RI can be combined with the mtmsrd to enable EE for interrupts
>> which enable the latter, which tends to be the important synchronous
>> interrupts (i.e., page faults).
>>
>> Do this by enabling EE and RI together at the beginning of the entry
>> wrapper if PACA_IRQ_HARD_DIS is clear, and just enabling RI if it is set
>> (which means something wanted EE=0).
> 
> 
>> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
>> index 6b800d3e2681..e3228a911b35 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -148,9 +148,21 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
>>  #endif
>>  
>>  #ifdef CONFIG_PPC64
>> -	if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
>> +	bool trace_enable = false;
>> +
>> +	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) {
>> +		if (irq_soft_mask_set_return(IRQS_DISABLED) == IRQS_ENABLED)
> 
> In the previous code we had IRQS_ALL_DISABLED, now we just have
> IRQS_DISABLED. Is that intentional?

Hmm, no it should be IRQS_ALL_DISABLED. It doesn't matter much in
practice I think because MSR[EE] is disabled, but obviously shouldn't
be changed by this patch (and shouldn't be changed at all IMO having
ALL_DISABLED).

> 
>> +			trace_enable = true;
>> +	} else {
>> +		irq_soft_mask_set(IRQS_DISABLED);
>> +	}
>> +	/* If the interrupt was taken with HARD_DIS set, don't enable MSR[EE] */
>> +	if (local_paca->irq_happened & PACA_IRQ_HARD_DIS)
>> +		__hard_RI_enable();
>> +	else
>> +		__hard_irq_enable();
>> +	if (trace_enable)
>>  		trace_hardirqs_off();
>> -	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
>>  
>>  	if (user_mode(regs)) {
>>  		CT_WARN_ON(ct_state() != CONTEXT_USER);
> 
> 
>> @@ -901,11 +892,6 @@ INT_DEFINE_BEGIN(system_reset)
>>  	IVEC=0x100
>>  	IAREA=PACA_EXNMI
>>  	IVIRT=0 /* no virt entry point */
>> -	/*
>> -	 * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
>> -	 * being used, so a nested NMI exception would corrupt it.
>> -	 */
>> -	ISET_RI=0
>>  	ISTACK=0
>>  	IKVM_REAL=1
>>  INT_DEFINE_END(system_reset)
> 
>> @@ -986,8 +972,6 @@ EXC_COMMON_BEGIN(system_reset_common)
> 
> Right before this change, there's a comment that reads:
> 
> 	/*
> 	 * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
> 	 * to recover, but nested NMI will notice in_nmi and not recover
>     ...
> 
> You've taken out the bit that enables MSR_RI, which means the comment is
> no longer accurate.

Ah yep, that should be okay because we moved the RI enable to the NMI 
entry wrapper. Comment just needs a tweak.

> 
> Beyond that, I'm still trying to follow all the various changes in code
> flows. It seems to make at least vague sense: we move the setting of
> MSR_RI from the early asm to interrupt*enter_prepare. I'm just
> struggling to convince myself that when we hit the underlying handler
> that the RI states all line up.

Thanks,
Nick

^ permalink raw reply

* Re: [linux-next:master 2837/10638] powerpc64-linux-ld: warning: orphan section `.stubs' from `drivers/platform/chrome/cros_ec_trace.o' being placed in section `.stubs'
From: Nicholas Piggin @ 2021-08-30  7:19 UTC (permalink / raw)
  To: Gwendal Grignou, kernel test robot
  Cc: Enric Balletbo i Serra, Linux Memory Management List, kbuild-all,
	linuxppc-dev
In-Reply-To: <202108271745.fAmJos3o-lkp@intel.com>

Excerpts from kernel test robot's message of August 27, 2021 7:29 pm:
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
> head:   88fac11862d38306dd0d2398015744877140390d
> commit: d453ceb6549af8798913de6a20444cb7200fdb69 [2837/10638] platform/chrome: sensorhub: Add trace events for sample
> config: powerpc64-randconfig-r021-20210827 (attached as .config)
> compiler: powerpc64-linux-gcc (GCC) 11.2.0
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=d453ceb6549af8798913de6a20444cb7200fdb69
>         git remote add linux-next https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
>         git fetch --no-tags linux-next master
>         git checkout d453ceb6549af8798913de6a20444cb7200fdb69
>         # save the attached .config to linux build tree
>         mkdir build_dir
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
> 
> All warnings (new ones prefixed by >>):
> 
>>> powerpc64-linux-ld: warning: orphan section `.stubs' from `drivers/platform/chrome/cros_ec_trace.o' being placed in section `.stubs'
>>> powerpc64-linux-ld: warning: orphan section `.stubs' from `drivers/platform/chrome/cros_ec_trace.o' being placed in section `.stubs'
>>> powerpc64-linux-ld: warning: orphan section `.stubs' from `drivers/platform/chrome/cros_ec_trace.o' being placed in section `.stubs'

What creates .stubs? We have something in modules code which does but 
not vmlinux code. I can't see anything in modern binutils linker that
creates .stubs. And why is this particular file setting it off? Strange.

Thanks,
Nick

^ permalink raw reply

* Re: [RFC PATCH 4/6] powerpc/64s: Make hash MMU code build configurable
From: Christophe Leroy @ 2021-08-30  7:12 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <1630306319.j6p7gkgn6s.astroid@bobo.none>



Le 30/08/2021 à 08:55, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of August 28, 2021 7:59 pm:
>>
>>
>> Le 27/08/2021 à 18:34, Nicholas Piggin a écrit :
>>> Introduce a new option CONFIG_PPC_64S_HASH_MMU which allows the 64s hash
>>> MMU code to be compiled out if radix is selected and the minimum
>>> supported CPU type is POWER9 or higher, and KVM is not selected.
>>>
>>> This saves 128kB kernel image size (90kB text) on powernv_defconfig
>>> minus KVM, 350kB on pseries_defconfig minus KVM, 40kB on a tiny config.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>    arch/powerpc/Kconfig                          |  1 +
>>>    arch/powerpc/include/asm/book3s/64/mmu.h      | 22 +++++-
>>>    .../include/asm/book3s/64/tlbflush-hash.h     |  7 ++
>>>    arch/powerpc/include/asm/book3s/pgtable.h     |  4 ++
>>>    arch/powerpc/include/asm/mmu.h                | 38 +++++++++--
>>>    arch/powerpc/include/asm/mmu_context.h        |  2 +
>>>    arch/powerpc/include/asm/paca.h               |  8 +++
>>>    arch/powerpc/kernel/asm-offsets.c             |  2 +
>>>    arch/powerpc/kernel/dt_cpu_ftrs.c             | 10 ++-
>>>    arch/powerpc/kernel/entry_64.S                |  4 +-
>>>    arch/powerpc/kernel/exceptions-64s.S          | 16 +++++
>>>    arch/powerpc/kernel/mce.c                     |  2 +-
>>>    arch/powerpc/kernel/mce_power.c               | 10 ++-
>>>    arch/powerpc/kernel/paca.c                    | 18 ++---
>>>    arch/powerpc/kernel/process.c                 | 13 ++--
>>>    arch/powerpc/kernel/prom.c                    |  2 +
>>>    arch/powerpc/kernel/setup_64.c                |  4 ++
>>>    arch/powerpc/kexec/core_64.c                  |  4 +-
>>>    arch/powerpc/kexec/ranges.c                   |  4 ++
>>>    arch/powerpc/kvm/Kconfig                      |  1 +
>>>    arch/powerpc/mm/book3s64/Makefile             | 17 +++--
>>>    arch/powerpc/mm/book3s64/hash_pgtable.c       |  1 -
>>>    arch/powerpc/mm/book3s64/hash_utils.c         | 10 ---
>>>    .../{hash_hugetlbpage.c => hugetlbpage.c}     |  6 ++
>>>    arch/powerpc/mm/book3s64/mmu_context.c        | 16 +++++
>>>    arch/powerpc/mm/book3s64/pgtable.c            | 22 +++++-
>>>    arch/powerpc/mm/book3s64/radix_pgtable.c      |  4 ++
>>>    arch/powerpc/mm/book3s64/slb.c                | 16 -----
>>>    arch/powerpc/mm/copro_fault.c                 |  2 +
>>>    arch/powerpc/mm/fault.c                       | 17 +++++
>>>    arch/powerpc/mm/pgtable.c                     | 10 ++-
>>>    arch/powerpc/platforms/Kconfig.cputype        | 20 +++++-
>>>    arch/powerpc/platforms/cell/Kconfig           |  1 +
>>>    arch/powerpc/platforms/maple/Kconfig          |  1 +
>>>    arch/powerpc/platforms/microwatt/Kconfig      |  2 +-
>>>    arch/powerpc/platforms/pasemi/Kconfig         |  1 +
>>>    arch/powerpc/platforms/powermac/Kconfig       |  1 +
>>>    arch/powerpc/platforms/powernv/Kconfig        |  2 +-
>>>    arch/powerpc/platforms/powernv/idle.c         |  2 +
>>>    arch/powerpc/platforms/powernv/setup.c        |  2 +
>>>    arch/powerpc/platforms/pseries/lpar.c         | 68 ++++++++++---------
>>>    arch/powerpc/platforms/pseries/lparcfg.c      |  2 +-
>>>    arch/powerpc/platforms/pseries/mobility.c     |  6 ++
>>>    arch/powerpc/platforms/pseries/ras.c          |  4 ++
>>>    arch/powerpc/platforms/pseries/reconfig.c     |  2 +
>>>    arch/powerpc/platforms/pseries/setup.c        |  6 +-
>>>    arch/powerpc/xmon/xmon.c                      |  8 ++-
>>>    47 files changed, 310 insertions(+), 111 deletions(-)
>>>    rename arch/powerpc/mm/book3s64/{hash_hugetlbpage.c => hugetlbpage.c} (95%)
>>
>> Whaou ! Huge patch.
>>
>> Several places you should be able to use IS_ENABLED() or simply radix_enabled() instead of #ifdefs
>> and rely on GCC to opt out stuff when radix_enabled() folds into 'true'.
> 
> A lot of it couldn't be done because of data structures but I'm sure I
> missed a lot. I will go over it again.
> 
>> I may do more detailed comments later when I have time.
> 
> Very much appreciated, but let me send out another version before you
> get the fine toothed comb out so I don't waste too much of your time.

One thing that would probably help reduce the size of the patch and help focus on the real changes 
would be to put pure code moves in a previous patch I think.

> If there are no objections to the idea from a high level.

I think the idea is good, I always wondered why we kept HASH and RADIX at the same time.

I did similar thing on book3s/32 when I separated 603 and 604+ so that you could build one or the 
other or both.

Christophe

^ permalink raw reply

* Re: [RFC PATCH 6/6] powerpc/microwatt: Stop building the hash MMU code
From: Nicholas Piggin @ 2021-08-30  6:58 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, Michael Ellerman
In-Reply-To: <87y28jehrt.fsf@mpe.ellerman.id.au>

Excerpts from Michael Ellerman's message of August 30, 2021 1:24 pm:
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Le 27/08/2021 à 18:34, Nicholas Piggin a écrit :
>>> Microwatt is radix-only, so stop selecting the hash MMU code.
>>> 
>>> This saves 20kB compressed dtbImage and 56kB vmlinux size.
>>> 
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>   arch/powerpc/configs/microwatt_defconfig | 1 -
>>>   arch/powerpc/platforms/Kconfig.cputype   | 1 +
>>>   arch/powerpc/platforms/microwatt/Kconfig | 1 -
>>>   3 files changed, 1 insertion(+), 2 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
>>> index bf5f2e5905eb..6fbad42f9e3d 100644
>>> --- a/arch/powerpc/configs/microwatt_defconfig
>>> +++ b/arch/powerpc/configs/microwatt_defconfig
>>> @@ -26,7 +26,6 @@ CONFIG_PPC_MICROWATT=y
>>>   # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
>>>   CONFIG_CPU_FREQ=y
>>>   CONFIG_HZ_100=y
>>> -# CONFIG_PPC_MEM_KEYS is not set
>>>   # CONFIG_SECCOMP is not set
>>>   # CONFIG_MQ_IOSCHED_KYBER is not set
>>>   # CONFIG_COREDUMP is not set
>>> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
>>> index 9b90fc501ed4..b9acd6c68c81 100644
>>> --- a/arch/powerpc/platforms/Kconfig.cputype
>>> +++ b/arch/powerpc/platforms/Kconfig.cputype
>>> @@ -371,6 +371,7 @@ config SPE
>>>   config PPC_64S_HASH_MMU
>>>   	bool "Hash MMU Support"
>>>   	depends on PPC_BOOK3S_64
>>> +	depends on !PPC_MICROWATT
>>
>> Do we really want to start nit picking which platforms can select such or such option ?
>> Isn't it enough to get it off through the defconfig ?
>>
>> Is PPC_MICROWATT mutually exclusive with other book3s64 configs ? Don't we build multiplatform kernels ?
> 
> Yeah that belongs in the microwatt defconfig, not as a forced exclusion
> in Kconfig.

Okay I'll fix that up. In that case the select POWER9 should not be in 
Kconfig but just defconfig too. Which is fine, it just means oldconfig
won't change it.

Thanks,
Nick

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox