LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [RFC PATCH 12/12] powerpc/64s: power4 nap fixup in C
From: Nicholas Piggin @ 2020-09-07  4:02 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linuxppc-dev
In-Reply-To: <53f5fa9b-03d4-150e-199b-7ffa75d91666@csgroup.eu>

Excerpts from Christophe Leroy's message of September 6, 2020 5:32 pm:
> 
> 
> Le 05/09/2020 à 19:43, Nicholas Piggin a écrit :
>> There is no need for this to be in asm, use the new intrrupt entry wrapper.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/include/asm/interrupt.h   | 14 ++++++++
>>   arch/powerpc/include/asm/processor.h   |  1 +
>>   arch/powerpc/include/asm/thread_info.h |  6 ++++
>>   arch/powerpc/kernel/exceptions-64s.S   | 45 --------------------------
>>   arch/powerpc/kernel/idle_book3s.S      |  4 +++
>>   5 files changed, 25 insertions(+), 45 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
>> index 3ae3d2f93b61..acfcc7d5779b 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -8,6 +8,16 @@
>>   #include <asm/ftrace.h>
>>   #include <asm/runlatch.h>
>>   
>> +static inline void nap_adjust_return(struct pt_regs *regs)
>> +{
>> +#ifdef CONFIG_PPC_970_NAP
> 
> Avoid #ifdef, you can use IS_ENABLED(CONFIG_PPC_970_NAP) in the 'if' below

Yeah I guess.

>> +	if (test_thread_local_flags(_TLF_NAPPING)) {
>> +		clear_thread_local_flags(_TLF_NAPPING);
>> +		regs->nip = (unsigned long)power4_idle_nap_return;
>> +	}
>> +#endif
>> +}
>> +
>>   #ifdef CONFIG_PPC_BOOK3S_64
>>   static inline void interrupt_enter_prepare(struct pt_regs *regs)
>>   {
>> @@ -33,6 +43,8 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs)
>>   	if (cpu_has_feature(CPU_FTR_CTRL) &&
>>   	    !test_thread_local_flags(_TLF_RUNLATCH))
>>   		__ppc64_runlatch_on();
>> +
>> +	nap_adjust_return(regs);
>>   }
>>   
>>   #else /* CONFIG_PPC_BOOK3S_64 */
>> @@ -72,6 +84,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
>>   
>>   	this_cpu_set_ftrace_enabled(0);
>>   
>> +	nap_adjust_return(regs);
>> +
>>   	nmi_enter();
>>   }
>>   
>> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
>> index ed0d633ab5aa..3da1dba91386 100644
>> --- a/arch/powerpc/include/asm/processor.h
>> +++ b/arch/powerpc/include/asm/processor.h
>> @@ -424,6 +424,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
>>   extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
>>   #ifdef CONFIG_PPC_970_NAP
>>   extern void power4_idle_nap(void);
>> +extern void power4_idle_nap_return(void);
> 
> Please please please, 'extern' keyword is pointless and deprecated for 
> function prototypes. Don't add new ones.
> 
> Also, put it outside the #ifdef, so that you can use IS_ENABLED() 
> instead of #ifdef when using it.

I just copy paste and forget to remove it. I expect someone will do a 
"cleanup" patch to get rid of them in one go, I find a random assortment
of extern and not extern to be even uglier :(

>>   #endif
>>   
>>   extern unsigned long cpuidle_disable;
>> diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
>> index ca6c97025704..9b15f7edb0cb 100644
>> --- a/arch/powerpc/include/asm/thread_info.h
>> +++ b/arch/powerpc/include/asm/thread_info.h
>> @@ -156,6 +156,12 @@ void arch_setup_new_exec(void);
>>   
>>   #ifndef __ASSEMBLY__
>>   
>> +static inline void clear_thread_local_flags(unsigned int flags)
>> +{
>> +	struct thread_info *ti = current_thread_info();
>> +	ti->local_flags &= ~flags;
>> +}
>> +
>>   static inline bool test_thread_local_flags(unsigned int flags)
>>   {
>>   	struct thread_info *ti = current_thread_info();
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
>> index 227bad3a586d..1db6b3438c88 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
>>   	ld	r1,GPR1(r1)
>>   .endm
>>   
>> -/*
>> - * When the idle code in power4_idle puts the CPU into NAP mode,
>> - * it has to do so in a loop, and relies on the external interrupt
>> - * and decrementer interrupt entry code to get it out of the loop.
>> - * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
>> - * to signal that it is in the loop and needs help to get out.
>> - */
>> -#ifdef CONFIG_PPC_970_NAP
>> -#define FINISH_NAP				\
>> -BEGIN_FTR_SECTION				\
>> -	ld	r11, PACA_THREAD_INFO(r13);	\
>> -	ld	r9,TI_LOCAL_FLAGS(r11);		\
>> -	andi.	r10,r9,_TLF_NAPPING;		\
>> -	bnel	power4_fixup_nap;		\
>> -END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
>> -#else
>> -#define FINISH_NAP
>> -#endif
>> -
>>   /*
>>    * There are a few constraints to be concerned with.
>>    * - Real mode exceptions code/data must be located at their physical location.
>> @@ -1250,7 +1231,6 @@ EXC_COMMON_BEGIN(machine_check_common)
>>   	 */
>>   	GEN_COMMON machine_check
>>   
>> -	FINISH_NAP
>>   	/* Enable MSR_RI when finished with PACA_EXMC */
>>   	li	r10,MSR_RI
>>   	mtmsrd 	r10,1
>> @@ -1572,7 +1552,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
>>   EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
>>   EXC_COMMON_BEGIN(hardware_interrupt_common)
>>   	GEN_COMMON hardware_interrupt
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   	bl	do_IRQ
>>   	b	interrupt_return
>> @@ -1757,7 +1736,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
>>   EXC_VIRT_END(decrementer, 0x4900, 0x80)
>>   EXC_COMMON_BEGIN(decrementer_common)
>>   	GEN_COMMON decrementer
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   	bl	timer_interrupt
>>   	b	interrupt_return
>> @@ -1842,7 +1820,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
>>   EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
>>   EXC_COMMON_BEGIN(doorbell_super_common)
>>   	GEN_COMMON doorbell_super
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   #ifdef CONFIG_PPC_DOORBELL
>>   	bl	doorbell_exception
>> @@ -2196,7 +2173,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
>>   
>>   EXC_COMMON_BEGIN(hmi_exception_common)
>>   	GEN_COMMON hmi_exception
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   	bl	handle_hmi_exception
>>   	b	interrupt_return
>> @@ -2225,7 +2201,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
>>   EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
>>   EXC_COMMON_BEGIN(h_doorbell_common)
>>   	GEN_COMMON h_doorbell
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   #ifdef CONFIG_PPC_DOORBELL
>>   	bl	doorbell_exception
>> @@ -2258,7 +2233,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
>>   EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
>>   EXC_COMMON_BEGIN(h_virt_irq_common)
>>   	GEN_COMMON h_virt_irq
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   	bl	do_IRQ
>>   	b	interrupt_return
>> @@ -2304,7 +2278,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
>>   EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
>>   EXC_COMMON_BEGIN(performance_monitor_common)
>>   	GEN_COMMON performance_monitor
>> -	FINISH_NAP
>>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>>   	bl	performance_monitor_exception
>>   	b	interrupt_return
>> @@ -3032,24 +3005,6 @@ USE_FIXED_SECTION(virt_trampolines)
>>   __end_interrupts:
>>   DEFINE_FIXED_SYMBOL(__end_interrupts)
>>   
>> -#ifdef CONFIG_PPC_970_NAP
>> -	/*
>> -	 * Called by exception entry code if _TLF_NAPPING was set, this clears
>> -	 * the NAPPING flag, and redirects the exception exit to
>> -	 * power4_fixup_nap_return.
>> -	 */
>> -	.globl power4_fixup_nap
>> -EXC_COMMON_BEGIN(power4_fixup_nap)
>> -	andc	r9,r9,r10
>> -	std	r9,TI_LOCAL_FLAGS(r11)
>> -	LOAD_REG_ADDR(r10, power4_idle_nap_return)
>> -	std	r10,_NIP(r1)
>> -	blr
>> -
>> -power4_idle_nap_return:
>> -	blr
>> -#endif
>> -
>>   CLOSE_FIXED_SECTION(real_vectors);
>>   CLOSE_FIXED_SECTION(real_trampolines);
>>   CLOSE_FIXED_SECTION(virt_vectors);
>> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
>> index 22f249b6f58d..27d2e6a72ec9 100644
>> --- a/arch/powerpc/kernel/idle_book3s.S
>> +++ b/arch/powerpc/kernel/idle_book3s.S
>> @@ -201,4 +201,8 @@ _GLOBAL(power4_idle_nap)
>>   	mtmsrd	r7
>>   	isync
>>   	b	1b
>> +
>> +	.globl power4_idle_nap_return
>> +power4_idle_nap_return:
>> +	blr
> 
> Can't this be written in C somewhere ?

Yes I think so if you did the entire power4_idle_nap function in C and 
used inline asm for the mtmsrd and fixup label (basically the same way
as copy user exceptions return to a fixup location).

You have to return to the same C function of course because you can't
control the stack otherwise. But I don't care too much about avoiding
an extra function call/return here, all the important stuff is in C now. 

Thanks,
Nick

^ permalink raw reply

* Re: [RFC PATCH 12/12] powerpc/64s: power4 nap fixup in C
From: Christophe Leroy @ 2020-09-07  4:48 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev
In-Reply-To: <1599450777.weoux16jk2.astroid@bobo.none>



Le 07/09/2020 à 06:02, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of September 6, 2020 5:32 pm:
>>
>>
>> Le 05/09/2020 à 19:43, Nicholas Piggin a écrit :
>>> There is no need for this to be in asm, use the new intrrupt entry wrapper.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>    arch/powerpc/include/asm/interrupt.h   | 14 ++++++++
>>>    arch/powerpc/include/asm/processor.h   |  1 +
>>>    arch/powerpc/include/asm/thread_info.h |  6 ++++
>>>    arch/powerpc/kernel/exceptions-64s.S   | 45 --------------------------
>>>    arch/powerpc/kernel/idle_book3s.S      |  4 +++
>>>    5 files changed, 25 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
>>> index ed0d633ab5aa..3da1dba91386 100644
>>> --- a/arch/powerpc/include/asm/processor.h
>>> +++ b/arch/powerpc/include/asm/processor.h
>>> @@ -424,6 +424,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
>>>    extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
>>>    #ifdef CONFIG_PPC_970_NAP
>>>    extern void power4_idle_nap(void);
>>> +extern void power4_idle_nap_return(void);
>>
>> Please please please, 'extern' keyword is pointless and deprecated for
>> function prototypes. Don't add new ones.
>>
>> Also, put it outside the #ifdef, so that you can use IS_ENABLED()
>> instead of #ifdef when using it.
> 
> I just copy paste and forget to remove it. I expect someone will do a
> "cleanup" patch to get rid of them in one go, I find a random assortment
> of extern and not extern to be even uglier :(

If we don't want to make fixes backporting a huge headache, some 
transition with random assortment is the price to pay.

One day, when 'extern' have become the minority, we can get rid of the 
few last ones.

But if someone believe it is not such a problem with backporting, I can 
provide a cleanup patch now.

Christophe

^ permalink raw reply

* [Bug 209181] New: kernel BUG at arch/powerpc/mm/pgtable.c:304!
From: bugzilla-daemon @ 2020-09-07  5:44 UTC (permalink / raw)
  To: linuxppc-dev

https://bugzilla.kernel.org/show_bug.cgi?id=209181

            Bug ID: 209181
           Summary: kernel BUG at arch/powerpc/mm/pgtable.c:304!
           Product: Memory Management
           Version: 2.5
    Kernel Version: Linux 5.9-rc4
          Hardware: All
                OS: Linux
              Tree: Mainline
            Status: NEW
          Severity: normal
          Priority: P1
         Component: Page Allocator
          Assignee: akpm@linux-foundation.org
          Reporter: zlang@redhat.com
                CC: linuxppc-dev@lists.ozlabs.org
        Regression: No

Description of problem:
The latest upstream mainline kernel always panic on ppc64le machine (P9) as
below:

[    1.406462] Loading compiled-in X.509 certificates 
[    1.436966] Loaded X.509 cert 'Build time autogenerated kernel key:
834a47793f474746e698c2f3a32aa53ffded35db' 
[    1.437154] zswap: loaded using pool lzo/zbud 
[    1.437509] debug_vm_pgtable: [debug_vm_pgtable         ]: Validating
architecture page table helpers 
[    1.437571] ------------[ cut here ]------------ 
[    1.437584] WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/pgtable.c:185
set_pte_at+0xd8/0x1c0 
[    1.437589] Modules linked in: 
[    1.437596] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc4 #1 
[    1.437602] NIP:  c00000000009bb28 LR: c00000000152e1c0 CTR:
0000000000000000 
[    1.437608] REGS: c0000001fb6eb7b0 TRAP: 0700   Not tainted  (5.9.0-rc4) 
[    1.437613] MSR:  8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 24002824 
XER: 0000000a 
[    1.437624] CFAR: c00000000009ba74 IRQMASK: 0  
[    1.437624] GPR00: c00000000152e1c0 c0000001fb6eba40 c000000002138900
c0000000120f4100  
[    1.437624] GPR04: 000b701718150000 c0000000122400a8 05014e0100000080
0000000000000000  
[    1.437624] GPR08: 0000000000000080 07000000000000c0 05000000000000c0
0000000000000001  
[    1.437624] GPR12: 0000000000002000 c000000004050000 0000000000000000
c000000001569d38  
[    1.437624] GPR16: c000000012210000 f0ffffffffffffff c0000001fb52f8a0
c000000002231cb8  
[    1.437624] GPR20: c0000000010d8de0 c000000001000000 c00000001220b2e8
c0000000122f8000  
[    1.437624] GPR24: 0000000000000100 000000000000014e c0000000122f8028
8000000000000105  
[    1.437624] GPR28: 000b701718150000 c0000000122118c0 c0000000120f4100
c0000000122400a8  
[    1.437668] NIP [c00000000009bb28] set_pte_at+0xd8/0x1c0 
[    1.437674] LR [c00000000152e1c0] debug_vm_pgtable+0x8f4/0x1e14 
[    1.437679] Call Trace: 
[    1.437685] [c0000001fb6eba40] [c000000001082f48] _raw_spin_lock+0x88/0x100
(unreliable) 
[    1.437693] [c0000001fb6eba80] [c00000000152dfd4]
debug_vm_pgtable+0x708/0x1e14 
[    1.437700] [c0000001fb6ebb90] [c00000000001208c] do_one_initcall+0xbc/0x5f0 
[    1.437707] [c0000001fb6ebc80] [c0000000014e4d04]
kernel_init_freeable+0x4bc/0x58c 
[    1.437714] [c0000001fb6ebdb0] [c000000000012de8] kernel_init+0x2c/0x164 
[    1.437721] [c0000001fb6ebe20] [c00000000000d5d0]
ret_from_kernel_thread+0x5c/0x6c 
[    1.437726] Instruction dump: 
[    1.437731] 41820068 e8010050 ebc10030 7c0803a6 4bffff8c 4bffff88 3d200700
792907c6  
[    1.437741] 612900c0 7d4a4838 2faa00c0 419eff54 <0fe00000> 4bffff4c 3fe0bfef
63ffffff  
[    1.437751] irq event stamp: 275292 
[    1.437757] hardirqs last  enabled at (275291): [<c0000000004ef4d0>]
inc_zone_page_state+0xa0/0xd0 
[    1.437764] hardirqs last disabled at (275292): [<c0000000000096fc>]
program_check_common_virt+0x2bc/0x310 
[    1.437771] softirqs last  enabled at (273036): [<c000000000f97044>]
inet6_register_protosw+0x154/0x2a0 
[    1.437778] softirqs last disabled at (273034): [<c000000000f96f34>]
inet6_register_protosw+0x44/0x2a0 
[    1.437784] ---[ end trace 39aeb34808a575d2 ]--- 
[    1.437790] ------------[ cut here ]------------ 
[    1.437795] kernel BUG at arch/powerpc/mm/pgtable.c:304! 
[    1.437801] Oops: Exception in kernel mode, sig: 5 [#1] 
[    1.437805] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries 
[    1.437807] Modules linked in: 
[    1.437811] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W        
5.9.0-rc4 #1 
[    1.437815] NIP:  c00000000009c1a8 LR: c0000000005f9de0 CTR:
0000000000000000 
[    1.437819] REGS: c0000001fb6eb720 TRAP: 0700   Tainted: G        W         
(5.9.0-rc4) 
[    1.437822] MSR:  8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 24002828 
XER: 0000000a 
[    1.437829] CFAR: c00000000009c148 IRQMASK: 0  
[    1.437829] GPR00: c0000000005f9de0 c0000001fb6eb9b0 c000000002138900
c0000000120f4100  
[    1.437829] GPR04: 000b701718150000 c0000000122400a8 00000000122f8000
0000000000802f12  
[    1.437829] GPR08: 0000000000000000 0000000000000001 0000000000000028
0000000000000001  
[    1.437829] GPR12: 0000000000002000 c000000004050000 0000000000000000
c000000001569d38  
[    1.437829] GPR16: c000000012210000 f0ffffffffffffff c0000001fb52f8a0
c000000002231cb8  
[    1.437829] GPR20: c0000000010d8de0 c000000001000000 c00000001220b2e8
c0000000122f8000  
[    1.437829] GPR24: 0000000000000100 0000000000000008 c000000002231ca8
000000000000000a  
[    1.437829] GPR28: c000000002231cb8 c000000002231cb0 000b701718150000
000000000002dc05  
[    1.437860] NIP [c00000000009c1a8] assert_pte_locked+0x218/0x360 
[    1.437864] LR [c0000000005f9de0] pte_update+0xc0/0x180 
[    1.437867] Call Trace: 
[    1.437870] [c0000001fb6eb9b0] [0000000000000100] 0x100 (unreliable) 
[    1.437875] [c0000001fb6eba20] [c0000000005f9de0] pte_update+0xc0/0x180 
[    1.437879] [c0000001fb6eba80] [c00000000152e1e0]
debug_vm_pgtable+0x914/0x1e14 
[    1.437884] [c0000001fb6ebb90] [c00000000001208c] do_one_initcall+0xbc/0x5f0 
[    1.437888] [c0000001fb6ebc80] [c0000000014e4d04]
kernel_init_freeable+0x4bc/0x58c 
[    1.437893] [c0000001fb6ebdb0] [c000000000012de8] kernel_init+0x2c/0x164 
[    1.437897] [c0000001fb6ebe20] [c00000000000d5d0]
ret_from_kernel_thread+0x5c/0x6c 
[    1.437900] Instruction dump: 
[    1.437903] 7c0803a6 60000000 39400001 7fdffc36 7d4ad830 394affff 7d4a07b4
7d4af838  
[    1.437909] 794a1f24 7d09502a 7d090074 7929d182 <0b090000> 79090022 550ac03e
ebfc0000  
[    1.437916] ---[ end trace 39aeb34808a575d3 ]--

How reproducible:
100% on our ppc64le machines

Steps to Reproduce:
1. git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
2. build and intall the kernel (I'll update the .config file later)
3. boot the kernel
<panic at here>

Additional info:
The HEAD of my test kernel is:
commit f4d51dffc6c01a9e94650d95ce0104964f8ae822
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun Sep 6 17:11:40 2020 -0700

    Linux 5.9-rc4

-- 
You are receiving this mail because:
You are on the CC list for the bug.

^ permalink raw reply

* [Bug 209181] kernel BUG at arch/powerpc/mm/pgtable.c:304!
From: bugzilla-daemon @ 2020-09-07  5:52 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-209181-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=209181

Christophe Leroy (christophe.leroy@csgroup.eu) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |christophe.leroy@csgroup.eu

--- Comment #1 from Christophe Leroy (christophe.leroy@csgroup.eu) ---
See https://bugzilla.kernel.org/show_bug.cgi?id=209029

Patch at
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20200902040122.136414-1-aneesh.kumar@linux.ibm.com/
to deactivate CONFIG_DEBUG_VM_PGTABLE on powerpc until the issue is fixes.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

^ permalink raw reply

* [Bug 209181] kernel BUG at arch/powerpc/mm/pgtable.c:304!
From: bugzilla-daemon @ 2020-09-07  6:18 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-209181-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=209181

--- Comment #2 from Zorro Lang (zlang@redhat.com) ---
(In reply to Christophe Leroy from comment #1)
> See https://bugzilla.kernel.org/show_bug.cgi?id=209029
> 
> Patch at
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20200902040122.
> 136414-1-aneesh.kumar@linux.ibm.com/ to deactivate CONFIG_DEBUG_VM_PGTABLE
> on powerpc until the issue is fixes.

Thanks for this info

-- 
You are receiving this mail because:
You are on the CC list for the bug.

^ permalink raw reply

* [Bug 209181] kernel BUG at arch/powerpc/mm/pgtable.c:304!
From: bugzilla-daemon @ 2020-09-07  6:18 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-209181-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=209181

Zorro Lang (zlang@redhat.com) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|---                         |DUPLICATE

--- Comment #3 from Zorro Lang (zlang@redhat.com) ---

*** This bug has been marked as a duplicate of bug 209029 ***

-- 
You are receiving this mail because:
You are on the CC list for the bug.

^ permalink raw reply

* [Bug 209029] kernel 5.9-rc2 fails to boot on a PowerMac G5 11,2 - BUG: Kernel NULL pointer dereference on read at 0x00000020
From: bugzilla-daemon @ 2020-09-07  6:18 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-209029-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=209029

Zorro Lang (zlang@redhat.com) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |zlang@redhat.com

--- Comment #5 from Zorro Lang (zlang@redhat.com) ---
*** Bug 209181 has been marked as a duplicate of this bug. ***

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [PATCH] powerepc/book3s64/hash: Align start/end address correctly with bolt mapping
From: Aneesh Kumar K.V @ 2020-09-07  7:25 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: Aneesh Kumar K.V

This ensures we don't do a partial mapping of memory. With nvdimm, when
creating namespaces with size not aligned to 16MB, the kernel ends up partially
mapping the pages. This can result in kernel adding multiple hash page table
entries for the same range. A new namespace will result in
create_section_mapping() with start and end overlapping an already existing
bolted hash page table entry.

commit: 6acd7d5ef264 ("libnvdimm/namespace: Enforce memremap_compat_align()")
made sure that we always create namespaces aligned to 16MB. But we can do
better by avoiding mapping pages that are not aligned. This helps to catch
access to these partially mapped pages early.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/book3s64/hash_utils.c    | 12 +++++++++---
 arch/powerpc/mm/book3s64/radix_pgtable.c |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index c663e7ba801f..7185bc43b24f 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -260,8 +260,12 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
 	    vstart, vend, pstart, prot, psize, ssize);
 
-	for (vaddr = vstart, paddr = pstart; vaddr < vend;
-	     vaddr += step, paddr += step) {
+	/* Carefully map only the possible range */
+	vaddr = ALIGN(vstart, step);
+	paddr = ALIGN(pstart, step);
+	vend  = ALIGN_DOWN(vend, step);
+
+	for (; vaddr < vend; vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
 		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
@@ -343,7 +347,9 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 	if (!mmu_hash_ops.hpte_removebolted)
 		return -ENODEV;
 
-	for (vaddr = vstart; vaddr < vend; vaddr += step) {
+	/* Unmap the full range specificied */
+	vaddr = ALIGN_DOWN(vstart, step);
+	for (;vaddr < vend; vaddr += step) {
 		rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
 		if (rc == -ENOENT) {
 			ret = -ENOENT;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index d5f0c10d752a..5c8adeb8c955 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -276,6 +276,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 	int psize;
 
 	start = ALIGN(start, PAGE_SIZE);
+	end   = ALIGN_DOWN(end, PAGE_SIZE);
 	for (addr = start; addr < end; addr += mapping_size) {
 		unsigned long gap, previous_size;
 		int rc;
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH -next] powerpc/book3s64: fix link error with CONFIG_PPC_RADIX_MMU=n
From: Christophe Leroy @ 2020-09-07  8:11 UTC (permalink / raw)
  To: Yang Yingliang, linuxppc-dev, linux-kernel
In-Reply-To: <af37c513-6232-c35c-33e3-f6d8d82c8175@huawei.com>



Le 07/09/2020 à 03:51, Yang Yingliang a écrit :
> 
> On 2020/9/6 14:50, Christophe Leroy wrote:
>>
>>
>> Le 05/09/2020 à 13:25, Yang Yingliang a écrit :
>>> Fix link error when CONFIG_PPC_RADIX_MMU is disabled:
>>> powerpc64-linux-gnu-ld: 
>>> arch/powerpc/platforms/pseries/lpar.o:(.toc+0x0): undefined reference 
>>> to `mmu_pid_bits'
>>>
>>> Reported-by: Hulk Robot <hulkci@huawei.com>
>>> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
>>> ---
>>>   arch/powerpc/mm/book3s64/mmu_context.c | 4 ++++
>>
>> In your commit log, you are just mentionning 
>> arch/powerpc/platforms/pseries/lpar.o, which is right.
>>
>> You shouldn't need to modify arch/powerpc/mm/book3s64/mmu_context.c at 
>> all, see below.
>>
>>>   arch/powerpc/platforms/pseries/lpar.c  | 2 ++
>>>   2 files changed, 6 insertions(+)
>>>
>>> diff --git a/arch/powerpc/mm/book3s64/mmu_context.c 
>>> b/arch/powerpc/mm/book3s64/mmu_context.c
>>> index 0ba30b8b935b..a8e292cd88f0 100644
>>> --- a/arch/powerpc/mm/book3s64/mmu_context.c
>>> +++ b/arch/powerpc/mm/book3s64/mmu_context.c
>>> @@ -152,6 +152,7 @@ void hash__setup_new_exec(void)
>>>     static int radix__init_new_context(struct mm_struct *mm)
>>>   {
>>> +#ifdef CONFIG_PPC_RADIX_MMU
>>
>> This shouldn't be required. radix__init_new_context() is only called 
>> when radix_enabled() returns true.
>> As it is a static function, when it is not called it gets optimised 
>> away, so you will never get an undefined reference to `mmu_pid_bits` 
>> there.
> powerpc64-linux-gnu-ld: 
> arch/powerpc/mm/book3s64/mmu_context.o:(.toc+0x0): undefined reference 
> to `mmu_pid_bits'
> powerpc64-linux-gnu-ld: 
> arch/powerpc/mm/book3s64/mmu_context.o:(.toc+0x8): undefined reference 
> to `mmu_base_pid'
> 
> 
> mmu_context.c is always compiled, it uses mmu_pid_bits and mmu_base_pid.

Yes, mmu_context.c is always compiled, but as I explained, 
radix__init_new_context() is defined as 'static' so it is optimised out 
when radix_enabled() returns false because there is no caller in that case.

I just made the test with ppc64_defconfig + CONFIG_PPC_RADIX_MMU=n (GCC 8.1)

The only failure I got was on lpar.c, which I fixed by enclosing the 
entire radix_init_pseries() in an #ifdef.

Once this is fixed, the build is OK, without any modification to 
mmu_context.c

powerpc64-linux-objdump -x arch/powerpc/mm/book3s64/mmu_context.o shows 
only the following objects in the .toc:

RELOCATION RECORDS FOR [.toc]:
OFFSET           TYPE              VALUE
0000000000000000 R_PPC64_ADDR64    kmalloc_caches
0000000000000008 R_PPC64_ADDR64    vmemmap
0000000000000010 R_PPC64_ADDR64    __pmd_frag_nr
0000000000000018 R_PPC64_ADDR64    __pmd_frag_size_shift

mmu_pid_bits and mmu_base_pid are not part of the undefined objetcs:

0000000000000000         *UND*	0000000000000000 vmemmap
0000000000000000         *UND*	0000000000000000 .mm_iommu_init
0000000000000000         *UND*	0000000000000000 __pmd_frag_nr
0000000000000000         *UND*	0000000000000000 .ida_alloc_range
0000000000000000         *UND*	0000000000000000 .slb_setup_new_exec
0000000000000000         *UND*	0000000000000000 mmu_feature_keys
0000000000000000         *UND*	0000000000000000 .memset
0000000000000000         *UND*	0000000000000000 .memcpy
0000000000000000         *UND*	0000000000000000 .slice_init_new_context_exec
0000000000000000         *UND*	0000000000000000 ._mcount
0000000000000000         *UND*	0000000000000000 .__free_pages
0000000000000000         *UND*	0000000000000000 __pmd_frag_size_shift
0000000000000000         *UND*	0000000000000000 .slice_setup_new_exec
0000000000000000         *UND*	0000000000000000 .ida_free
0000000000000000         *UND*	0000000000000000 .pte_frag_destroy
0000000000000000         *UND*	0000000000000000 .kfree
0000000000000000         *UND*	0000000000000000 .pkey_mm_init
0000000000000000         *UND*	0000000000000000 .kmem_cache_alloc_trace
0000000000000000         *UND*	0000000000000000 .__warn_printk
0000000000000000         *UND*	0000000000000000 _mcount
0000000000000000         *UND*	0000000000000000 kmalloc_caches

Christophe

^ permalink raw reply

* Re: [RFC PATCH 09/12] powerpc: move NMI entry/exit code into wrapper
From: Christophe Leroy @ 2020-09-07  8:25 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev
In-Reply-To: <20200905174335.3161229-10-npiggin@gmail.com>



On 9/5/20 5:43 PM, Nicholas Piggin wrote:
> This moves the common NMI entry and exit code into the interrupt handler
> wrappers.
> 
> This changes the behaviour of soft-NMI (watchdog) and HMI interrupts, and
> also MCE interrupts on 64e, by adding missing parts of the NMI entry to
> them.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/include/asm/interrupt.h | 26 +++++++++++++++++++
>   arch/powerpc/kernel/mce.c            | 12 ---------
>   arch/powerpc/kernel/traps.c          | 38 +++++-----------------------
>   arch/powerpc/kernel/watchdog.c       | 10 +++-----
>   4 files changed, 37 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
> index 83fe1d64cf23..69eb8a432984 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -31,6 +31,27 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
>   }
>   #endif /* CONFIG_PPC_BOOK3S_64 */
>   
> +struct interrupt_nmi_state {
> +#ifdef CONFIG_PPC64
> +	u8 ftrace_enabled;
> +#endif
> +};
> +
> +static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
> +{
> +	this_cpu_set_ftrace_enabled(0);
> +
> +	nmi_enter();
> +}
> +
> +static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
> +{
> +	nmi_exit();
> +
> +	this_cpu_set_ftrace_enabled(state->ftrace_enabled);

PPC32 build:

In file included from arch/powerpc/kernel/irq.c:57:0:
./arch/powerpc/include/asm/interrupt.h: In function 
‘interrupt_nmi_exit_prepare’:
./arch/powerpc/include/asm/interrupt.h:96:35: error: ‘struct 
interrupt_nmi_state’ has no member named ‘ftrace_enabled’
   this_cpu_set_ftrace_enabled(state->ftrace_enabled);
                                    ^

> +}
> +
> +
>   /**
>    * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
>    * @func:	Function name of the entry point
> @@ -177,10 +198,15 @@ static __always_inline long ___##func(struct pt_regs *regs);		\
>   									\
>   __visible noinstr long func(struct pt_regs *regs)			\
>   {									\
> +	struct interrupt_nmi_state state;				\
>   	long ret;							\
>   									\
> +	interrupt_nmi_enter_prepare(regs, &state);			\
> +									\
>   	ret = ___##func (regs);						\
>   									\
> +	interrupt_nmi_exit_prepare(regs, &state);			\
> +									\
>   	return ret;							\
>   }									\
>   									\

Christophe

^ permalink raw reply

* Re: [RFC PATCH 02/12] powerpc: remove arguments from interrupt handler functions
From: Christophe Leroy @ 2020-09-07  9:20 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev
In-Reply-To: <20200905174335.3161229-3-npiggin@gmail.com>



Le 05/09/2020 à 19:43, Nicholas Piggin a écrit :
> Make interrupt handlers all just take the pt_regs * argument and load
> DAR/DSISR etc from that. Make those that return a value return long.

I like this, it will likely simplify a bit the VMAP_STACK mess.

Not sure it is that easy. My board is stuck after the start of init.


On the 8xx, on Instruction TLB Error exception, we do

	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */

On book3s/32, on ISI exception we do:
	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */

On 40x and bookE, on ISI exception we do:
	li	r5,0			/* Pass zero as arg3 */


And regs->dsisr will just contain nothing

So it means we should at least write back r5 into regs->dsisr from there 
? The performance impact should be minimal as we already write _DAR so 
the cache line should already be in the cache.

A hacky 'stw r5, _DSISR(r1)' in handle_page_fault() does the trick, 
allthough we don't want to do it for both ISI and DSI at the end, so 
you'll have to do it in every head_xxx.S


While you are at it, it would probably also make sense to do remove the 
address param of bad_page_fault(), there is no point in loading back 
regs->dar in handle_page_fault() and machine_check_8xx() and 
alignment_exception(), just read regs->dar in bad_page_fault()

The case of do_break() should also be looked at.

Why changing return code from int to long ?

Christophe

> 
> This is done to make the function signatures match more closely, which
> will help with a future patch to add wrappers. Explicit arguments could
> be re-added for performance in future but that would require more
> complex wrapper macros.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/include/asm/asm-prototypes.h |  4 ++--
>   arch/powerpc/include/asm/bug.h            |  4 ++--
>   arch/powerpc/kernel/exceptions-64e.S      |  2 --
>   arch/powerpc/kernel/exceptions-64s.S      | 14 ++------------
>   arch/powerpc/mm/book3s64/hash_utils.c     |  8 +++++---
>   arch/powerpc/mm/book3s64/slb.c            | 11 +++++++----
>   arch/powerpc/mm/fault.c                   | 16 +++++++++-------
>   7 files changed, 27 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
> index d714d83bbc7c..2fa0cf6c6011 100644
> --- a/arch/powerpc/include/asm/bug.h
> +++ b/arch/powerpc/include/asm/bug.h
> @@ -111,8 +111,8 @@
>   #ifndef __ASSEMBLY__
>   
>   struct pt_regs;
> -extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
> -extern int hash__do_page_fault(struct pt_regs *, unsigned long, unsigned long);
> +extern long do_page_fault(struct pt_regs *);
> +extern long hash__do_page_fault(struct pt_regs *);

no extern

>   extern void bad_page_fault(struct pt_regs *, unsigned long, int);
>   extern void _exception(int, struct pt_regs *, int, unsigned long);
>   extern void _exception_pkey(struct pt_regs *, unsigned long, int);

Christophe

^ permalink raw reply

* Re: fsl_espi errors on v5.7.15
From: Joakim Tjernlund @ 2020-09-07  9:53 UTC (permalink / raw)
  To: mpe@ellerman.id.au, broonie@kernel.org, paulus@samba.org,
	npiggin@gmail.com, Chris.Packham@alliedtelesis.co.nz,
	benh@kernel.crashing.org, hkallweit1@gmail.com
  Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	linux-spi@vger.kernel.org
In-Reply-To: <6054f0ec-d994-105b-6399-6cdb65ddd1b6@alliedtelesis.co.nz>

On Thu, 2020-09-03 at 23:58 +0000, Chris Packham wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> On 1/09/20 6:14 pm, Nicholas Piggin wrote:
> > Excerpts from Chris Packham's message of September 1, 2020 11:25 am:
> > > On 1/09/20 12:33 am, Heiner Kallweit wrote:
> > > > On 30.08.2020 23:59, Chris Packham wrote:
> > > > > On 31/08/20 9:41 am, Heiner Kallweit wrote:
> > > > > > On 30.08.2020 23:00, Chris Packham wrote:
> > > > > > > On 31/08/20 12:30 am, Nicholas Piggin wrote:
> > > > > > > > Excerpts from Chris Packham's message of August 28, 2020 8:07 am:
> > > > > > > <snip>
> > > > > > > 
> > > > > > > > > > > > > I've also now seen the RX FIFO not empty error on the T2080RDB
> > > > > > > > > > > > > 
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
> > > > > > > > > > > > > 
> > > > > > > > > > > > > With my current workaround of emptying the RX FIFO. It seems
> > > > > > > > > > > > > survivable. Interestingly it only ever seems to be 1 extra byte in the
> > > > > > > > > > > > > RX FIFO and it seems to be after either a READ_SR or a READ_FSR.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > fsl_espi ffe110000.spi: tx 70
> > > > > > > > > > > > > fsl_espi ffe110000.spi: rx 03
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Extra RX 00
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
> > > > > > > > > > > > > fsl_espi ffe110000.spi: tx 05
> > > > > > > > > > > > > fsl_espi ffe110000.spi: rx 00
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Extra RX 03
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
> > > > > > > > > > > > > fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
> > > > > > > > > > > > > fsl_espi ffe110000.spi: tx 05
> > > > > > > > > > > > > fsl_espi ffe110000.spi: rx 00
> > > > > > > > > > > > > fsl_espi ffe110000.spi: Extra RX 03
> > > > > > > > > > > > > 
> > > > > > > > > > > > >      From all the Micron SPI-NOR datasheets I've got access to it is
> > > > > > > > > > > > > possible to continually read the SR/FSR. But I've no idea why it
> > > > > > > > > > > > > happens some times and not others.
> > > > > > > > > > > > So I think I've got a reproduction and I think I've bisected the problem
> > > > > > > > > > > > to commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in
> > > > > > > > > > > > C"). My day is just finishing now so I haven't applied too much scrutiny
> > > > > > > > > > > > to this result. Given the various rabbit holes I've been down on this
> > > > > > > > > > > > issue already I'd take this information with a good degree of skepticism.
> > > > > > > > > > > > 
> > > > > > > > > > > OK, so an easy test should be to re-test with a 5.4 kernel.
> > > > > > > > > > > It doesn't have yet the change you're referring to, and the fsl-espi driver
> > > > > > > > > > > is basically the same as in 5.7 (just two small changes in 5.7).
> > > > > > > > > > There's 6cc0c16d82f88 and maybe also other interrupt related patches
> > > > > > > > > > around this time that could affect book E, so it's good if that exact
> > > > > > > > > > patch is confirmed.
> > > > > > > > > My confirmation is basically that I can induce the issue in a 5.4 kernel
> > > > > > > > > by cherry-picking 3282a3da25bd. I'm also able to "fix" the issue in
> > > > > > > > > 5.9-rc2 by reverting that one commit.
> > > > > > > > > 
> > > > > > > > > I both cases it's not exactly a clean cherry-pick/revert so I also
> > > > > > > > > confirmed the bisection result by building at 3282a3da25bd (which sees
> > > > > > > > > the issue) and the commit just before (which does not).
> > > > > > > > Thanks for testing, that confirms it well.
> > > > > > > > 
> > > > > > > > [snip patch]
> > > > > > > > 
> > > > > > > > > I still saw the issue with this change applied. PPC_IRQ_SOFT_MASK_DEBUG
> > > > > > > > > didn't report anything (either with or without the change above).
> > > > > > > > Okay, it was a bit of a shot in the dark. I still can't see what
> > > > > > > > else has changed.
> > > > > > > > 
> > > > > > > > What would cause this, a lost interrupt? A spurious interrupt? Or
> > > > > > > > higher interrupt latency?
> > > > > > > > 
> > > > > > > > I don't think the patch should cause significantly worse latency,
> > > > > > > > (it's supposed to be a bit better if anything because it doesn't set
> > > > > > > > up the full interrupt frame). But it's possible.
> > > > > > > My working theory is that the SPI_DON indication is all about the TX
> > > > > > > direction an now that the interrupts are faster we're hitting an error
> > > > > > > because there is still RX activity going on. Heiner disagrees with my
> > > > > > > interpretation of the SPI_DON indication and the fact that it doesn't
> > > > > > > happen every time does throw doubt on it.
> > > > > > > 
> > > > > > It's right that the eSPI spec can be interpreted that SPI_DON refers to
> > > > > > TX only. However this wouldn't really make sense, because also for RX
> > > > > > we program the frame length, and therefore want to be notified once the
> > > > > > full frame was received. Also practical experience shows that SPI_DON
> > > > > > is set also after RX-only transfers.
> > > > > > Typical SPI NOR use case is that you write read command + start address,
> > > > > > followed by a longer read. If the TX-only interpretation would be right,
> > > > > > we'd always end up with SPI_DON not being set.
> > > > > > 
> > > > > > > I can't really explain the extra RX byte in the fifo. We know how many
> > > > > > > bytes to expect and we pull that many from the fifo so it's not as if
> > > > > > > we're missing an interrupt causing us to skip the last byte. I've been
> > > > > > > looking for some kind of off-by-one calculation but again if it were
> > > > > > > something like that it'd happen all the time.
> > > > > > > 
> > > > > > Maybe it helps to know what value this extra byte in the FIFO has. Is it:
> > > > > > - a duplicate of the last read byte
> > > > > > - or the next byte (at <end address> + 1)
> > > > > > - or a fixed value, e.g. always 0x00 or 0xff
> > > > > The values were up thread a bit but I'll repeat them here
> > > > > 
> > > > > fsl_espi ffe110000.spi: tx 70
> > > > > fsl_espi ffe110000.spi: rx 03
> > > > > fsl_espi ffe110000.spi: Extra RX 00
> > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
> > > > > fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
> > > > > fsl_espi ffe110000.spi: tx 05
> > > > > fsl_espi ffe110000.spi: rx 00
> > > > > fsl_espi ffe110000.spi: Extra RX 03
> > > > > fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
> > > > > fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
> > > > > fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
> > > > > fsl_espi ffe110000.spi: tx 05
> > > > > fsl_espi ffe110000.spi: rx 00
> > > > > fsl_espi ffe110000.spi: Extra RX 03
> > > > > 
> > > > > 
> > > > > The rx 00 Extra RX 03 is a bit concerning. I've only ever seen them with
> > > > > either a READ_SR or a READ_FSR. Never a data read.
> > > > > 
> > > > Just remembered something about SPIE_DON:
> > > > Transfers are always full duplex, therefore in case of a read the chip
> > > > sends dummy zero's. Having said that in case of a read SPIE_DON means
> > > > that the last dummy zero was shifted out.
> > > > 
> > > > READ_SR and READ_FSR are the shortest transfers, 1 byte out and 1 byte in.
> > > > So the issue may have a dependency on the length of the transfer.
> > > > However I see no good explanation so far. You can try adding a delay of
> > > > a few miroseconds between the following to commands in fsl_espi_bufs().
> > > > 
> > > >     fsl_espi_write_reg(espi, ESPI_SPIM, mask);
> > > > 
> > > >     /* Prevent filling the fifo from getting interrupted */
> > > >     spin_lock_irq(&espi->lock);
> > > > 
> > > > Maybe enabling interrupts and seeing the SPIE_DON interrupt are too close.
> > > I think this might be heading in the right direction. Playing about with
> > > a delay does seem to make the two symptoms less likely. Although I have
> > > to set it quite high (i.e. msleep(100)) to completely avoid any
> > > possibility of seeing either message.
> > The patch might replay the interrupt a little bit faster, but it would
> > be a few microseconds at most I think (just from improved code).
> > 
> > Would you be able to ftrace the interrupt handler function and see if you
> > can see a difference in number or timing of interrupts? I'm at a bit of
> > a loss.
> 
> I tried ftrace but I really wasn't sure what I was looking for.
> Capturing a "bad" case was pretty tricky. But I think I've identified a
> fix (I'll send it as a proper patch shortly). The gist is
> 
> diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
> index 7e7c92cafdbb..cb120b68c0e2 100644
> --- a/drivers/spi/spi-fsl-espi.c
> +++ b/drivers/spi/spi-fsl-espi.c
> @@ -574,13 +574,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi
> *espi, u32 events)
>   static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
>   {
>          struct fsl_espi *espi = context_data;
> -       u32 events;
> +       u32 events, mask;
> 
>          spin_lock(&espi->lock);
> 
>          /* Get interrupt events(tx/rx) */
>          events = fsl_espi_read_reg(espi, ESPI_SPIE);
> -       if (!events) {
> +       mask = fsl_espi_read_reg(espi, ESPI_SPIM);
> +       if (!(events & mask)) {
>                  spin_unlock(&espi->lock);
>                  return IRQ_NONE;
>          }
> 
> The SPIE register contains the TXCNT so events is pretty much always
> going to have something set. By checking events against what we've
> actually requested interrupts for we don't see any spurious events.
> 
> I've tested this on the T2080RDB and on our custom hardware and it seems
> to resolve the problem.
> 

I looked at the fsl_espi_irq() too and noticed that clearing of the IRQ events
are after processing TX/RX. That looks a bit odd to me.

  Jocke

^ permalink raw reply

* [PATCH v2] powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute
From: Vaibhav Jain @ 2020-09-07 11:05 UTC (permalink / raw)
  To: linuxppc-dev, linux-nvdimm
  Cc: Santosh Sivaraj, Oliver O'Halloran, Aneesh Kumar K . V,
	Vaibhav Jain, Dan Williams, Ira Weiny

The newly introduced 'perf_stats' attribute uses the default access
mode of 0444 letting non-root users access performance stats of an
nvdimm and potentially force the kernel into issuing large number of
expensive HCALLs. Since the information exposed by this attribute
cannot be cached hence its better to ward of access to this attribute
from users who don't need to access these performance statistics.

Hence this patch updates access mode of 'perf_stats' attribute to
be only readable by root users.

Fixes: 2d02bf835e573 ('powerpc/papr_scm: Fetch nvdimm performance stats from PHYP')
Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Change-log:

v2:
* Instead of checking for perfmon_capable() inside show_perf_stats()
  set the attribute as DEVICE_ATTR_ADMIN_RO [ Aneesh ]
* Update patch description
---
 arch/powerpc/platforms/pseries/papr_scm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f439f0dfea7d1..a88a707a608aa 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -822,7 +822,7 @@ static ssize_t perf_stats_show(struct device *dev,
 	kfree(stats);
 	return rc ? rc : seq_buf_used(&s);
 }
-DEVICE_ATTR_RO(perf_stats);
+DEVICE_ATTR_ADMIN_RO(perf_stats);
 
 static ssize_t flags_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
-- 
2.26.2


^ permalink raw reply related

* Re: [RFC PATCH 02/12] powerpc: remove arguments from interrupt handler functions
From: Christophe Leroy @ 2020-09-07 11:34 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev
In-Reply-To: <e34fead9-a356-3ae6-aa33-544380230bd5@csgroup.eu>

On Mon, 2020-09-07 at 11:20 +0200, Christophe Leroy wrote:
> 
> Le 05/09/2020 à 19:43, Nicholas Piggin a écrit :
> > Make interrupt handlers all just take the pt_regs * argument and load
> > DAR/DSISR etc from that. Make those that return a value return long.
> 
> I like this, it will likely simplify a bit the VMAP_STACK mess.
> 
> Not sure it is that easy. My board is stuck after the start of init.
> 
> 
> On the 8xx, on Instruction TLB Error exception, we do
> 
> 	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
> 
> On book3s/32, on ISI exception we do:
> 	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
> 
> On 40x and bookE, on ISI exception we do:
> 	li	r5,0			/* Pass zero as arg3 */
> 
> 
> And regs->dsisr will just contain nothing
> 
> So it means we should at least write back r5 into regs->dsisr from there 
> ? The performance impact should be minimal as we already write _DAR so 
> the cache line should already be in the cache.
> 
> A hacky 'stw r5, _DSISR(r1)' in handle_page_fault() does the trick, 
> allthough we don't want to do it for both ISI and DSI at the end, so 
> you'll have to do it in every head_xxx.S

To get you series build and work, I did the following hacks:

diff --git a/arch/powerpc/include/asm/interrupt.h
b/arch/powerpc/include/asm/interrupt.h
index acfcc7d5779b..c11045d3113a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -93,7 +93,9 @@ static inline void interrupt_nmi_exit_prepare(struct
pt_regs *regs, struct inter
 {
 	nmi_exit();
 
+#ifdef CONFIG_PPC64
 	this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+#endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Check we didn't change the pending interrupt mask. */
diff --git a/arch/powerpc/kernel/entry_32.S
b/arch/powerpc/kernel/entry_32.S
index f4d0af8e1136..66f7adbe1076 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -663,6 +663,7 @@ ppc_swapcontext:
  */
 	.globl	handle_page_fault
 handle_page_fault:
+	stw	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_BOOK3S_32
 	andis.  r0,r5,DSISR_DABRMATCH@h
---

Christophe


^ permalink raw reply related

* [PATCH] scsi: ibmvfc: Fix error return in ibmvfc_probe()
From: Jing Xiangfeng @ 2020-09-07  8:39 UTC (permalink / raw)
  To: tyreld, mpe, benh, paulus, jejb, martin.petersen
  Cc: linuxppc-dev, linux-kernel, linux-scsi, jingxiangfeng

Fix to return error code PTR_ERR() from the error handling case instead
of 0.

Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ea7c8930592d..70daa0605082 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4928,6 +4928,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	if (IS_ERR(vhost->work_thread)) {
 		dev_err(dev, "Couldn't create kernel thread: %ld\n",
 			PTR_ERR(vhost->work_thread));
+		rc = PTR_ERR(vhost->work_thread);
 		goto free_host_mem;
 	}
 
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH] arch: vdso: add vdso linker script to 'targets' instead of extra-y
From: Masahiro Yamada @ 2020-09-07 12:44 UTC (permalink / raw)
  To: Linux Kbuild mailing list
  Cc: linux-s390, Catalin Marinas, Vasily Gorbik, Nick Hu,
	Heiko Carstens, linuxppc-dev, Linux Kernel Mailing List,
	Christian Borntraeger, Paul Mackerras, Greentime Hu, Vincent Chen,
	Will Deacon, linux-arm-kernel
In-Reply-To: <20200831182239.480317-1-masahiroy@kernel.org>

On Tue, Sep 1, 2020 at 3:23 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> The vdso linker script is preprocessed on demand.
> Adding it to 'targets' is enough to include the .cmd file.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> ---


Applied to linux-kbuild.


>  arch/arm64/kernel/vdso/Makefile     | 2 +-
>  arch/arm64/kernel/vdso32/Makefile   | 2 +-
>  arch/nds32/kernel/vdso/Makefile     | 2 +-
>  arch/powerpc/kernel/vdso32/Makefile | 2 +-
>  arch/powerpc/kernel/vdso64/Makefile | 2 +-
>  arch/s390/kernel/vdso64/Makefile    | 2 +-
>  6 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index 45d5cfe46429..7cd8aafbe96e 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -54,7 +54,7 @@ endif
>  GCOV_PROFILE := n
>
>  obj-y += vdso.o
> -extra-y += vdso.lds
> +targets += vdso.lds
>  CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
>
>  # Force dependency (incbin is bad)
> diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
> index d6adb4677c25..572475b7b7ed 100644
> --- a/arch/arm64/kernel/vdso32/Makefile
> +++ b/arch/arm64/kernel/vdso32/Makefile
> @@ -155,7 +155,7 @@ asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
>  obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
>
>  obj-y += vdso.o
> -extra-y += vdso.lds
> +targets += vdso.lds
>  CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
>
>  # Force dependency (vdso.s includes vdso.so through incbin)
> diff --git a/arch/nds32/kernel/vdso/Makefile b/arch/nds32/kernel/vdso/Makefile
> index 7c3c1ccb196e..55df25ef0057 100644
> --- a/arch/nds32/kernel/vdso/Makefile
> +++ b/arch/nds32/kernel/vdso/Makefile
> @@ -20,7 +20,7 @@ GCOV_PROFILE := n
>
>
>  obj-y += vdso.o
> -extra-y += vdso.lds
> +targets += vdso.lds
>  CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
>
>  # Force dependency
> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
> index 87ab1152d5ce..fd5072a4c73c 100644
> --- a/arch/powerpc/kernel/vdso32/Makefile
> +++ b/arch/powerpc/kernel/vdso32/Makefile
> @@ -29,7 +29,7 @@ ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
>  asflags-y := -D__VDSO32__ -s
>
>  obj-y += vdso32_wrapper.o
> -extra-y += vdso32.lds
> +targets += vdso32.lds
>  CPPFLAGS_vdso32.lds += -P -C -Upowerpc
>
>  # Force dependency (incbin is bad)
> diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
> index 38c317f25141..c737b3ea3207 100644
> --- a/arch/powerpc/kernel/vdso64/Makefile
> +++ b/arch/powerpc/kernel/vdso64/Makefile
> @@ -17,7 +17,7 @@ ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
>  asflags-y := -D__VDSO64__ -s
>
>  obj-y += vdso64_wrapper.o
> -extra-y += vdso64.lds
> +targets += vdso64.lds
>  CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
>
>  # Force dependency (incbin is bad)
> diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
> index 4a66a1cb919b..d0d406cfffa9 100644
> --- a/arch/s390/kernel/vdso64/Makefile
> +++ b/arch/s390/kernel/vdso64/Makefile
> @@ -25,7 +25,7 @@ $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
>  $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
>
>  obj-y += vdso64_wrapper.o
> -extra-y += vdso64.lds
> +targets += vdso64.lds
>  CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
>
>  # Disable gcov profiling, ubsan and kasan for VDSO code
> --
> 2.25.1
>


-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* Re: [RFC PATCH 12/12] powerpc/64s: power4 nap fixup in C
From: Nicholas Piggin @ 2020-09-07 13:05 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: linuxppc-dev
In-Reply-To: <9a647445-a438-ae93-f8d5-c177b7fe9662@csgroup.eu>

Excerpts from Christophe Leroy's message of September 7, 2020 2:48 pm:
> 
> 
> Le 07/09/2020 à 06:02, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of September 6, 2020 5:32 pm:
>>>
>>>
>>> Le 05/09/2020 à 19:43, Nicholas Piggin a écrit :
>>>> There is no need for this to be in asm, use the new intrrupt entry wrapper.
>>>>
>>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>>> ---
>>>>    arch/powerpc/include/asm/interrupt.h   | 14 ++++++++
>>>>    arch/powerpc/include/asm/processor.h   |  1 +
>>>>    arch/powerpc/include/asm/thread_info.h |  6 ++++
>>>>    arch/powerpc/kernel/exceptions-64s.S   | 45 --------------------------
>>>>    arch/powerpc/kernel/idle_book3s.S      |  4 +++
>>>>    5 files changed, 25 insertions(+), 45 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
>>>> index ed0d633ab5aa..3da1dba91386 100644
>>>> --- a/arch/powerpc/include/asm/processor.h
>>>> +++ b/arch/powerpc/include/asm/processor.h
>>>> @@ -424,6 +424,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
>>>>    extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
>>>>    #ifdef CONFIG_PPC_970_NAP
>>>>    extern void power4_idle_nap(void);
>>>> +extern void power4_idle_nap_return(void);
>>>
>>> Please please please, 'extern' keyword is pointless and deprecated for
>>> function prototypes. Don't add new ones.
>>>
>>> Also, put it outside the #ifdef, so that you can use IS_ENABLED()
>>> instead of #ifdef when using it.
>> 
>> I just copy paste and forget to remove it. I expect someone will do a
>> "cleanup" patch to get rid of them in one go, I find a random assortment
>> of extern and not extern to be even uglier :(
> 
> If we don't want to make fixes backporting a huge headache, some 
> transition with random assortment is the price to pay.
> 
> One day, when 'extern' have become the minority, we can get rid of the 
> few last ones.
> 
> But if someone believe it is not such a problem with backporting, I can 
> provide a cleanup patch now.

I can't really see declarations being a big problem for backporting
or git history. But maybe Michael won't like a patch.

I will try to remember externs though.

Thanks,
Nick

^ permalink raw reply

* Re: KVM on POWER8 host lock up since 10d91611f426 ("powerpc/64s: Reimplement book3s idle code in C")
From: Nicholas Piggin @ 2020-09-07 13:13 UTC (permalink / raw)
  To: Michael Ellerman, Michal Suchánek; +Cc: ro, linuxppc-dev, Hari Bathini
In-Reply-To: <87y2lv1430.fsf@mpe.ellerman.id.au>

Excerpts from Michael Ellerman's message of August 31, 2020 8:50 pm:
> Michal Suchánek <msuchanek@suse.de> writes:
>> On Mon, Aug 31, 2020 at 11:14:18AM +1000, Nicholas Piggin wrote:
>>> Excerpts from Michal Suchánek's message of August 31, 2020 6:11 am:
>>> > Hello,
>>> > 
>>> > on POWER8 KVM hosts lock up since commit 10d91611f426 ("powerpc/64s:
>>> > Reimplement book3s idle code in C").
>>> > 
>>> > The symptom is host locking up completely after some hours of KVM
>>> > workload with messages like
>>> > 
>>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 47
>>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 71
>>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 47
>>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 71
>>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 47
>>> > 
>>> > printed before the host locks up.
>>> > 
>>> > The machines run sandboxed builds which is a mixed workload resulting in
>>> > IO/single core/mutiple core load over time and there are periods of no
>>> > activity and no VMS runnig as well. The VMs are shortlived so VM
>>> > setup/terdown is somewhat excercised as well.
>>> > 
>>> > POWER9 with the new guest entry fast path does not seem to be affected.
>>> > 
>>> > Reverted the patch and the followup idle fixes on top of 5.2.14 and
>>> > re-applied commit a3f3072db6ca ("powerpc/powernv/idle: Restore IAMR
>>> > after idle") which gives same idle code as 5.1.16 and the kernel seems
>>> > stable.
>>> > 
>>> > Config is attached.
>>> > 
>>> > I cannot easily revert this commit, especially if I want to use the same
>>> > kernel on POWER8 and POWER9 - many of the POWER9 fixes are applicable
>>> > only to the new idle code.
>>> > 
>>> > Any idea what can be the problem?
>>> 
>>> So hwthread_state is never getting back to to HWTHREAD_IN_IDLE on
>>> those threads. I wonder what they are doing. POWER8 doesn't have a good
>>> NMI IPI and I don't know if it supports pdbg dumping registers from the
>>> BMC unfortunately.
>>
>> It may be possible to set up fadump with a later kernel version that
>> supports it on powernv and dump the whole kernel.
> 
> Your firmware won't support it AFAIK.
> 
> You could try kdump, but if we have CPUs stuck in KVM then there's a
> good chance it won't work :/

I haven't had any luck yet reproducing this still. Testing with sub 
cores of various different combinations, etc. I'll keep trying though.

I don't know if there's much we can add to debug it. Can we run pdbg
on the BMCs on these things?

Thanks,
Nick

^ permalink raw reply

* Re: KVM on POWER8 host lock up since 10d91611f426 ("powerpc/64s: Reimplement book3s idle code in C")
From: Michal Suchánek @ 2020-09-07 13:25 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: ro, linuxppc-dev, Hari Bathini
In-Reply-To: <1599484062.vgmycu6q5i.astroid@bobo.none>

On Mon, Sep 07, 2020 at 11:13:47PM +1000, Nicholas Piggin wrote:
> Excerpts from Michael Ellerman's message of August 31, 2020 8:50 pm:
> > Michal Suchánek <msuchanek@suse.de> writes:
> >> On Mon, Aug 31, 2020 at 11:14:18AM +1000, Nicholas Piggin wrote:
> >>> Excerpts from Michal Suchánek's message of August 31, 2020 6:11 am:
> >>> > Hello,
> >>> > 
> >>> > on POWER8 KVM hosts lock up since commit 10d91611f426 ("powerpc/64s:
> >>> > Reimplement book3s idle code in C").
> >>> > 
> >>> > The symptom is host locking up completely after some hours of KVM
> >>> > workload with messages like
> >>> > 
> >>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 47
> >>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 71
> >>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 47
> >>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 71
> >>> > 2020-08-30T10:51:31+00:00 obs-power8-01 kernel: KVM: couldn't grab cpu 47
> >>> > 
> >>> > printed before the host locks up.
> >>> > 
> >>> > The machines run sandboxed builds which is a mixed workload resulting in
> >>> > IO/single core/mutiple core load over time and there are periods of no
> >>> > activity and no VMS runnig as well. The VMs are shortlived so VM
> >>> > setup/terdown is somewhat excercised as well.
> >>> > 
> >>> > POWER9 with the new guest entry fast path does not seem to be affected.
> >>> > 
> >>> > Reverted the patch and the followup idle fixes on top of 5.2.14 and
> >>> > re-applied commit a3f3072db6ca ("powerpc/powernv/idle: Restore IAMR
> >>> > after idle") which gives same idle code as 5.1.16 and the kernel seems
> >>> > stable.
> >>> > 
> >>> > Config is attached.
> >>> > 
> >>> > I cannot easily revert this commit, especially if I want to use the same
> >>> > kernel on POWER8 and POWER9 - many of the POWER9 fixes are applicable
> >>> > only to the new idle code.
> >>> > 
> >>> > Any idea what can be the problem?
> >>> 
> >>> So hwthread_state is never getting back to to HWTHREAD_IN_IDLE on
> >>> those threads. I wonder what they are doing. POWER8 doesn't have a good
> >>> NMI IPI and I don't know if it supports pdbg dumping registers from the
> >>> BMC unfortunately.
> >>
> >> It may be possible to set up fadump with a later kernel version that
> >> supports it on powernv and dump the whole kernel.
> > 
> > Your firmware won't support it AFAIK.
> > 
> > You could try kdump, but if we have CPUs stuck in KVM then there's a
> > good chance it won't work :/
> 
> I haven't had any luck yet reproducing this still. Testing with sub 
> cores of various different combinations, etc. I'll keep trying though.
> 
> I don't know if there's much we can add to debug it. Can we run pdbg
> on the BMCs on these things?

I suppose it depends on the machine type?

Thanks

Michal

^ permalink raw reply

* Re: [PATCH] kbuild: preprocess module linker script
From: Will Deacon @ 2020-09-07 13:30 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: linux-ia64, Peter Zijlstra, Catalin Marinas, Paul Mackerras,
	linux-riscv, Ard Biesheuvel, Anton Ivanov, linux-arch,
	Mauro Carvalho Chehab, Richard Weinberger, Russell King,
	Ingo Molnar, Geert Uytterhoeven, Fenghua Yu, Albert Ou, Kees Cook,
	Arnd Bergmann, linux-kbuild, Jeff Dike, Jessica Yu, linux-um,
	linux-m68k, Tony Luck, Paul Walmsley, linux-arm-kernel,
	Michal Marek, linux-kernel, Palmer Dabbelt, linuxppc-dev
In-Reply-To: <20200904133122.133071-1-masahiroy@kernel.org>

On Fri, Sep 04, 2020 at 10:31:21PM +0900, Masahiro Yamada wrote:
> There was a request to preprocess the module linker script like we do
> for the vmlinux one (https://lkml.org/lkml/2020/8/21/512).
> 
> The difference between vmlinux.lds and module.lds is that the latter
> is needed for external module builds, thus must be cleaned up by
> 'make mrproper' instead of 'make clean' (also, it must be created by
> 'make modules_prepare').
> 
> You cannot put it in arch/*/kernel/ because 'make clean' descends into
> it. I moved arch/*/kernel/module.lds to arch/*/include/asm/module.lds.h,
> which is included from scripts/module.lds.S.
> 
> scripts/module.lds is fine because 'make clean' keeps all the build
> artifacts under scripts/.
> 
> You can add arch-specific sections in <asm/module.lds.h>.
> 
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> Tested-by: Jessica Yu <jeyu@kernel.org>
> ---

For the arm64 bits:

Acked-by: Will Deacon <will@kernel.org>

Thanks,

Will

^ permalink raw reply

* [PATCH 1/2] powerpc/32: Fix vmap stack - Do not activate MMU before reading task struct
From: Christophe Leroy @ 2020-09-07 13:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

We need r1 to be properly set before activating MMU, so
reading task_struct->stack must be done with MMU off.

This means we need an additional register to play with MSR
bits while r11 now points to the stack. For that, move r10
back to CR (As is already done for hash MMU) and use r10.

We still don't have r1 correct yet when we activate MMU.
It is done in following patch.

Fixes: 028474876f47 ("powerpc/32: prepare for CONFIG_VMAP_STACK")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_32.S |  6 ------
 arch/powerpc/kernel/head_32.h | 31 ++++++-------------------------
 2 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f3ab94d73936..d967266d62e8 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -274,14 +274,8 @@ __secondary_hold_acknowledge:
 	DO_KVM  0x200
 MachineCheck:
 	EXCEPTION_PROLOG_0
-#ifdef CONFIG_VMAP_STACK
-	li	r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-	mtmsr	r11
-	isync
-#endif
 #ifdef CONFIG_PPC_CHRP
 	mfspr	r11, SPRN_SPRG_THREAD
-	tovirt_vmstack r11, r11
 	lwz	r11, RTAS_SP(r11)
 	cmpwi	cr1, r11, 0
 	bne	cr1, 7f
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9abec6cd099c..21effebb9277 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -39,24 +39,13 @@
 .endm
 
 .macro EXCEPTION_PROLOG_1 for_rtas=0
-#ifdef CONFIG_VMAP_STACK
-	.ifeq	\for_rtas
-	li	r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-	mtmsr	r11
-	isync
-	.endif
 	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
-#else
-	tophys(r11,r1)			/* use tophys(r1) if kernel */
-	subi	r11, r11, INT_FRAME_SIZE	/* alloc exc. frame */
-#endif
 	beq	1f
 	mfspr	r11,SPRN_SPRG_THREAD
-	tovirt_vmstack r11, r11
 	lwz	r11,TASK_STACK-THREAD(r11)
 	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
-	tophys_novmstack r11, r11
 1:
+	tophys_novmstack r11, r11
 #ifdef CONFIG_VMAP_STACK
 	mtcrf	0x7f, r11
 	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
@@ -64,12 +53,11 @@
 .endm
 
 .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
 	mtcr	r10
-FTR_SECTION_ELSE
-	stw	r10, _CCR(r11)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+	li	r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+	mtmsr	r10
+	isync
 #else
 	stw	r10,_CCR(r11)		/* save registers */
 #endif
@@ -77,11 +65,9 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
 	stw	r10,GPR10(r11)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
+#ifdef CONFIG_VMAP_STACK
 	mfcr	r10
 	stw	r10, _CCR(r11)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 #endif
 	mfspr	r12,SPRN_SPRG_SCRATCH1
 	stw	r12,GPR11(r11)
@@ -97,11 +83,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	stw	r10, _DSISR(r11)
 	.endif
 	lwz	r9, SRR1(r12)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
 	andi.	r10, r9, MSR_PR
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
 	lwz	r12, SRR0(r12)
 #else
 	mfspr	r12,SPRN_SRR0
@@ -328,7 +310,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 #ifdef CONFIG_VMAP_STACK
 #ifdef CONFIG_SMP
 	mfspr	r11, SPRN_SPRG_THREAD
-	tovirt(r11, r11)
 	lwz	r11, TASK_CPU - THREAD(r11)
 	slwi	r11, r11, 3
 	addis	r11, r11, emergency_ctx@ha
-- 
2.25.0


^ permalink raw reply related

* [PATCH 2/2] powerpc/32: Fix vmap stack - Properly set r1 before activating MMU
From: Christophe Leroy @ 2020-09-07 13:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <a027d447022a006c9c4958ac734128e577a3c5c1.1599486108.git.christophe.leroy@csgroup.eu>

We need r1 to be properly set before activating MMU, otherwise any new
exception taken while saving registers into the stack in exception
prologs will use the user stack, which is wrong and will even lockup
or crash when KUAP is selected.

Do that by switching the meaning of r11 and r1 until we have saved r1
to the stack: copy r1 into r11 and setup the new stack pointer in r1.
To avoid complicating and impacting all generic and specific prolog
code (and more), copy back r1 into r11 once r11 is save onto
the stack.

We could get rid of copying r1 back and forth at the cost of
rewriting everything to use r1 instead of r11 all the way when
CONFIG_VMAP_STACK is set, but the effort is probably not worth it.

Fixes: 028474876f47 ("powerpc/32: prepare for CONFIG_VMAP_STACK")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_32.h | 43 +++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 21effebb9277..cc36998c5541 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -39,15 +39,24 @@
 .endm
 
 .macro EXCEPTION_PROLOG_1 for_rtas=0
+#ifdef CONFIG_VMAP_STACK
+	mr	r11, r1
+	subi	r1, r1, INT_FRAME_SIZE		/* use r1 if kernel */
+	beq	1f
+	mfspr	r1,SPRN_SPRG_THREAD
+	lwz	r1,TASK_STACK-THREAD(r1)
+	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+#else
 	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
 	beq	1f
 	mfspr	r11,SPRN_SPRG_THREAD
 	lwz	r11,TASK_STACK-THREAD(r11)
 	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+#endif
 1:
 	tophys_novmstack r11, r11
 #ifdef CONFIG_VMAP_STACK
-	mtcrf	0x7f, r11
+	mtcrf	0x7f, r1
 	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
 #endif
 .endm
@@ -62,6 +71,15 @@
 	stw	r10,_CCR(r11)		/* save registers */
 #endif
 	mfspr	r10, SPRN_SPRG_SCRATCH0
+#ifdef CONFIG_VMAP_STACK
+	stw	r11,GPR1(r1)
+	stw	r11,0(r1)
+	mr	r11, r1
+#else
+	stw	r1,GPR1(r11)
+	stw	r1,0(r11)
+	tovirt(r1, r11)		/* set new kernel sp */
+#endif
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
 	stw	r10,GPR10(r11)
@@ -89,9 +107,6 @@
 	mfspr	r12,SPRN_SRR0
 	mfspr	r9,SPRN_SRR1
 #endif
-	stw	r1,GPR1(r11)
-	stw	r1,0(r11)
-	tovirt_novmstack r1, r11	/* set new kernel sp */
 #ifdef CONFIG_40x
 	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
 #else
@@ -309,19 +324,19 @@
 .macro vmap_stack_overflow_exception
 #ifdef CONFIG_VMAP_STACK
 #ifdef CONFIG_SMP
-	mfspr	r11, SPRN_SPRG_THREAD
-	lwz	r11, TASK_CPU - THREAD(r11)
-	slwi	r11, r11, 3
-	addis	r11, r11, emergency_ctx@ha
+	mfspr	r1, SPRN_SPRG_THREAD
+	lwz	r1, TASK_CPU - THREAD(r1)
+	slwi	r1, r1, 3
+	addis	r1, r1, emergency_ctx@ha
 #else
-	lis	r11, emergency_ctx@ha
+	lis	r1, emergency_ctx@ha
 #endif
-	lwz	r11, emergency_ctx@l(r11)
-	cmpwi	cr1, r11, 0
+	lwz	r1, emergency_ctx@l(r1)
+	cmpwi	cr1, r1, 0
 	bne	cr1, 1f
-	lis	r11, init_thread_union@ha
-	addi	r11, r11, init_thread_union@l
-1:	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+	lis	r1, init_thread_union@ha
+	addi	r1, r1, init_thread_union@l
+1:	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 	EXCEPTION_PROLOG_2
 	SAVE_NVGPRS(r11)
 	addi	r3, r1, STACK_FRAME_OVERHEAD
-- 
2.25.0


^ permalink raw reply related

* Re: fsl_espi errors on v5.7.15
From: Joakim Tjernlund @ 2020-09-07 15:38 UTC (permalink / raw)
  To: mpe@ellerman.id.au, broonie@kernel.org, paulus@samba.org,
	npiggin@gmail.com, Chris.Packham@alliedtelesis.co.nz,
	benh@kernel.crashing.org, hkallweit1@gmail.com
  Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	linux-spi@vger.kernel.org
In-Reply-To: <23d13439e4cc1872c29db2f93e715a61f4843943.camel@infinera.com>

[SNIP]
> > 

> > > Would you be able to ftrace the interrupt handler function and see if you
> > > can see a difference in number or timing of interrupts? I'm at a bit of
> > > a loss.
> > 
> > I tried ftrace but I really wasn't sure what I was looking for.
> > Capturing a "bad" case was pretty tricky. But I think I've identified a
> > fix (I'll send it as a proper patch shortly). The gist is
> > 
> > diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
> > index 7e7c92cafdbb..cb120b68c0e2 100644
> > --- a/drivers/spi/spi-fsl-espi.c
> > +++ b/drivers/spi/spi-fsl-espi.c
> > @@ -574,13 +574,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi
> > *espi, u32 events)
> >   static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
> >   {
> >          struct fsl_espi *espi = context_data;
> > -       u32 events;
> > +       u32 events, mask;
> > 
> >          spin_lock(&espi->lock);
> > 
> >          /* Get interrupt events(tx/rx) */
> >          events = fsl_espi_read_reg(espi, ESPI_SPIE);
> > -       if (!events) {
> > +       mask = fsl_espi_read_reg(espi, ESPI_SPIM);
> > +       if (!(events & mask)) {
> >                  spin_unlock(&espi->lock);
> >                  return IRQ_NONE;
> >          }
> > 
> > The SPIE register contains the TXCNT so events is pretty much always
> > going to have something set. By checking events against what we've
> > actually requested interrupts for we don't see any spurious events.
> > 
> > I've tested this on the T2080RDB and on our custom hardware and it seems
> > to resolve the problem.
> > 
> 
> I looked at the fsl_espi_irq() too and noticed that clearing of the IRQ events
> are after processing TX/RX. That looks a bit odd to me.

I should have been more specific. I think you can loose IRQs as fsl_espi_irq() works now.
Consider this:
1) You get TX IRQ and enter fsl_espi_irq()
2) Enter fsl_espi_fill_tx_fifo() to process any chars until done.
3) Now you get one more TX IRQ
4) fsl_espi_irq() clear events -> IRQ from 3) is lost.

 Jocke

^ permalink raw reply

* [PATCH AUTOSEL 5.8 14/53] ibmvnic fix NULL tx_pools and rx_tools issue at do_reset
From: Sasha Levin @ 2020-09-07 16:31 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, netdev, Mingming Cao, Dany Madden, linuxppc-dev,
	David S . Miller
In-Reply-To: <20200907163220.1280412-1-sashal@kernel.org>

From: Mingming Cao <mmc@linux.vnet.ibm.com>

[ Upstream commit 9f13457377907fa253aef560e1a37e1ca4197f9b ]

At the time of do_rest, ibmvnic tries to re-initalize the tx_pools
and rx_pools to avoid re-allocating the long term buffer. However
there is a window inside do_reset that the tx_pools and
rx_pools were freed before re-initialized making it possible to deference
null pointers.

This patch fix this issue by always check the tx_pool
and rx_pool are not NULL after ibmvnic_login. If so, re-allocating
the pools. This will avoid getting into calling reset_tx/rx_pools with
NULL adapter tx_pools/rx_pools pointer. Also add null pointer check in
reset_tx_pools and reset_rx_pools to safe handle NULL pointer case.

Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
Signed-off-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5afb3c9c52d20..d3a774331afc7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -479,6 +479,9 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 	int i, j, rc;
 	u64 *size_array;
 
+	if (!adapter->rx_pool)
+		return -1;
+
 	size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
 		be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
 
@@ -649,6 +652,9 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 	int tx_scrqs;
 	int i, rc;
 
+	if (!adapter->tx_pool)
+		return -1;
+
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
 		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
@@ -2011,7 +2017,10 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		    adapter->req_rx_add_entries_per_subcrq !=
 		    old_num_rx_slots ||
 		    adapter->req_tx_entries_per_subcrq !=
-		    old_num_tx_slots) {
+		    old_num_tx_slots ||
+		    !adapter->rx_pool ||
+		    !adapter->tso_pool ||
+		    !adapter->tx_pool) {
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
 			release_napi(adapter);
@@ -2024,10 +2033,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
+				netdev_dbg(adapter->netdev, "reset tx pools failed (%d)\n",
+						rc);
 				goto out;
 
 			rc = reset_rx_pools(adapter);
 			if (rc)
+				netdev_dbg(adapter->netdev, "reset rx pools failed (%d)\n",
+						rc);
 				goto out;
 		}
 		ibmvnic_disable_irqs(adapter);
-- 
2.25.1


^ permalink raw reply related

* [PATCH AUTOSEL 5.4 11/43] ibmvnic fix NULL tx_pools and rx_tools issue at do_reset
From: Sasha Levin @ 2020-09-07 16:32 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, netdev, Mingming Cao, Dany Madden, linuxppc-dev,
	David S . Miller
In-Reply-To: <20200907163329.1280888-1-sashal@kernel.org>

From: Mingming Cao <mmc@linux.vnet.ibm.com>

[ Upstream commit 9f13457377907fa253aef560e1a37e1ca4197f9b ]

At the time of do_rest, ibmvnic tries to re-initalize the tx_pools
and rx_pools to avoid re-allocating the long term buffer. However
there is a window inside do_reset that the tx_pools and
rx_pools were freed before re-initialized making it possible to deference
null pointers.

This patch fix this issue by always check the tx_pool
and rx_pool are not NULL after ibmvnic_login. If so, re-allocating
the pools. This will avoid getting into calling reset_tx/rx_pools with
NULL adapter tx_pools/rx_pools pointer. Also add null pointer check in
reset_tx_pools and reset_rx_pools to safe handle NULL pointer case.

Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
Signed-off-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2d20a48f0ba0a..de45b3709c14e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -416,6 +416,9 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 	int i, j, rc;
 	u64 *size_array;
 
+	if (!adapter->rx_pool)
+		return -1;
+
 	size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
 		be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
 
@@ -586,6 +589,9 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 	int tx_scrqs;
 	int i, rc;
 
+	if (!adapter->tx_pool)
+		return -1;
+
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
 		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
@@ -1918,7 +1924,10 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		    adapter->req_rx_add_entries_per_subcrq !=
 		    old_num_rx_slots ||
 		    adapter->req_tx_entries_per_subcrq !=
-		    old_num_tx_slots) {
+		    old_num_tx_slots ||
+		    !adapter->rx_pool ||
+		    !adapter->tso_pool ||
+		    !adapter->tx_pool) {
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
 			release_napi(adapter);
@@ -1931,10 +1940,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
+				netdev_dbg(adapter->netdev, "reset tx pools failed (%d)\n",
+						rc);
 				goto out;
 
 			rc = reset_rx_pools(adapter);
 			if (rc)
+				netdev_dbg(adapter->netdev, "reset rx pools failed (%d)\n",
+						rc);
 				goto out;
 		}
 		ibmvnic_disable_irqs(adapter);
-- 
2.25.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox