LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] powerpc/fault: fix wrong KUAP fault for IO_URING
From: Aneesh Kumar K.V @ 2021-02-02  6:16 UTC (permalink / raw)
  To: Christophe Leroy, Nicholas Piggin, Michael Ellerman, Zorro Lang
  Cc: Jens Axboe, linuxppc-dev
In-Reply-To: <6b081ea7-e4ee-21bb-7085-e33b4e5c6205@csgroup.eu>

On 2/2/21 11:32 AM, Christophe Leroy wrote:
> 
> 
> Le 02/02/2021 à 06:55, Aneesh Kumar K.V a écrit :
>> Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> writes:
>>
>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>
>>>> Excerpts from Michael Ellerman's message of January 30, 2021 9:22 pm:
>>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>>>> +Aneesh
>>>>>>
>>>>>> Le 29/01/2021 à 07:52, Zorro Lang a écrit :
>>>>> ..
>>>>>>> [   96.200296] ------------[ cut here ]------------
>>>>>>> [   96.200304] Bug: Read fault blocked by KUAP!
>>>>>>> [   96.200309] WARNING: CPU: 3 PID: 1876 at 
>>>>>>> arch/powerpc/mm/fault.c:229 bad_kernel_fault+0x180/0x310
>>>>>>
>>>>>>> [   96.200734] NIP [c000000000849424] 
>>>>>>> fault_in_pages_readable+0x104/0x350
>>>>>>> [   96.200741] LR [c00000000084952c] 
>>>>>>> fault_in_pages_readable+0x20c/0x350
>>>>>>> [   96.200747] --- interrupt: 300
>>>>>>
>>>>>>
>>>>>> Problem happens in a section where userspace access is supposed to 
>>>>>> be granted, so the patch you
>>>>>> proposed is definitely not the right fix.
>>>>>>
>>>>>> c000000000849408:    2c 01 00 4c     isync
>>>>>> c00000000084940c:    a6 03 3d 7d     mtspr   29,r9  <== granting 
>>>>>> userspace access permission
>>>>>> c000000000849410:    2c 01 00 4c     isync
>>>>>> c000000000849414:    00 00 36 e9     ld      r9,0(r22)
>>>>>> c000000000849418:    20 00 29 81     lwz     r9,32(r9)
>>>>>> c00000000084941c:    00 02 29 71     andi.   r9,r9,512
>>>>>> c000000000849420:    78 d3 5e 7f     mr      r30,r26
>>>>>> ==> c000000000849424:    00 00 bf 8b     lbz     r29,0(r31)  <== 
>>>>>> accessing userspace
>>>>>> c000000000849428:    10 00 82 41     beq     c000000000849438 
>>>>>> <fault_in_pages_readable+0x118>
>>>>>> c00000000084942c:    2c 01 00 4c     isync
>>>>>> c000000000849430:    a6 03 bd 7e     mtspr   29,r21  <== clearing 
>>>>>> userspace access permission
>>>>>> c000000000849434:    2c 01 00 4c     isync
>>>>>>
>>>>>> My first guess is that the problem is linked to the following 
>>>>>> function, see the comment
>>>>>>
>>>>>> /*
>>>>>>    * For kernel thread that doesn't have thread.regs return
>>>>>>    * default AMR/IAMR values.
>>>>>>    */
>>>>>> static inline u64 current_thread_amr(void)
>>>>>> {
>>>>>>     if (current->thread.regs)
>>>>>>         return current->thread.regs->amr;
>>>>>>     return AMR_KUAP_BLOCKED;
>>>>>> }
>>>>>>
>>>>>> Above function was introduced by commit 48a8ab4eeb82 
>>>>>> ("powerpc/book3s64/pkeys: Don't update SPRN_AMR
>>>>>> when in kernel mode")
>>>>>
>>>>> Yeah that's a bit of a curly one.
>>>>>
>>>>> At some point io_uring did kthread_use_mm(), which is supposed to mean
>>>>> the kthread can operate on behalf of the original process that 
>>>>> submitted
>>>>> the IO.
>>>>>
>>>>> But because KUAP is implemented using memory protection keys, it 
>>>>> depends
>>>>> on the value of the AMR register, which is not part of the mm, it's in
>>>>> thread.regs->amr.
>>>>>
>>>>> And what's worse by the time we're in kthread_use_mm() we no longer 
>>>>> have
>>>>> access to the thread.regs->amr of the original process that submitted
>>>>> the IO.
>>>>>
>>>>> We also can't simply move the AMR into the mm, precisely because it's
>>>>> per thread, not per mm.
>>>>>
>>>>> So TBH I don't know how we're going to fix this.
>>>>>
>>>>> I guess we could return AMR=unblocked for kernel threads, but that's
>>>>> arguably a bug because it allows a process to circumvent memory 
>>>>> keys by
>>>>> asking the kernel to do the access.
>>>>
>>>> We shouldn't need to inherit AMR should we? We only need it to be 
>>>> locked
>>>> for kernel threads until it's explicitly unlocked -- nothing mm 
>>>> specific
>>>> there. I think current_thread_amr could return 0 for kernel threads? Or
>>>> I would even avoid using that function for allow_user_access and open
>>>> code the kthread case and remove it from current_thread_amr().
>>>>
>>>> Thanks,
>>>> Nick
>>>
>>
>> updated one
>>
>>  From 8fdb0680f983940d61f91da8252b13c8d3e8ebee Mon Sep 17 00:00:00 2001
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
>> Date: Tue, 2 Feb 2021 09:23:38 +0530
>> Subject: [PATCH v2] powerpc/kuap: Allow kernel thread to access userspace
>>   after kthread_use_mm
>>
>> This fix the bad fault reported by KUAP when io_wqe_worker access 
>> userspace.
>>
>>   Bug: Read fault blocked by KUAP!
>>   WARNING: CPU: 1 PID: 101841 at arch/powerpc/mm/fault.c:229 
>> __do_page_fault+0x6b4/0xcd0
>>   NIP [c00000000009e7e4] __do_page_fault+0x6b4/0xcd0
>>   LR [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0
>> ..........
>>   Call Trace:
>>   [c000000016367330] [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0 
>> (unreliable)
>>   [c0000000163673e0] [c00000000009ee3c] do_page_fault+0x3c/0x120
>>   [c000000016367430] [c00000000000c848] handle_page_fault+0x10/0x2c
>>   --- interrupt: 300 at iov_iter_fault_in_readable+0x148/0x6f0
>> ..........
>>   NIP [c0000000008e8228] iov_iter_fault_in_readable+0x148/0x6f0
>>   LR [c0000000008e834c] iov_iter_fault_in_readable+0x26c/0x6f0
>>   interrupt: 300
>>   [c0000000163677e0] [c0000000007154a0] iomap_write_actor+0xc0/0x280
>>   [c000000016367880] [c00000000070fc94] iomap_apply+0x1c4/0x780
>>   [c000000016367990] [c000000000710330] 
>> iomap_file_buffered_write+0xa0/0x120
>>   [c0000000163679e0] [c00800000040791c] 
>> xfs_file_buffered_aio_write+0x314/0x5e0 [xfs]
>>   [c000000016367a90] [c0000000006d74bc] io_write+0x10c/0x460
>>   [c000000016367bb0] [c0000000006d80e4] io_issue_sqe+0x8d4/0x1200
>>   [c000000016367c70] [c0000000006d8ad0] io_wq_submit_work+0xc0/0x250
>>   [c000000016367cb0] [c0000000006e2578] io_worker_handle_work+0x498/0x800
>>   [c000000016367d40] [c0000000006e2cdc] io_wqe_worker+0x3fc/0x4f0
>>   [c000000016367da0] [c0000000001cb0a4] kthread+0x1c4/0x1d0
>>   [c000000016367e10] [c00000000000dbf0] ret_from_kernel_thread+0x5c/0x6c
>>
>> The kernel consider thread AMR value for kernel thread to be
>> AMR_KUAP_BLOCKED. Hence access to userspace is denied. This
>> of course not correct and we should allow userspace access after
>> kthread_use_mm(). To be precise, kthread_use_mm() should inherit the
>> AMR value of the operating address space. But, the AMR value is
>> thread-specific and we inherit the address space and not thread
>> access restrictions. Because of this ignore AMR value when accessing
>> userspace via kernel thread.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>> Changes from v1:
>> * Address review feedback from Nick
>>
>>   arch/powerpc/include/asm/book3s/64/kup.h | 8 +++++++-
>>   1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
>> b/arch/powerpc/include/asm/book3s/64/kup.h
>> index f50f72e535aa..95f4df99249e 100644
>> --- a/arch/powerpc/include/asm/book3s/64/kup.h
>> +++ b/arch/powerpc/include/asm/book3s/64/kup.h
>> @@ -384,7 +384,13 @@ static __always_inline void 
>> allow_user_access(void __user *to, const void __user
>>       // This is written so we can resolve to a single case at build time
>>       BUILD_BUG_ON(!__builtin_constant_p(dir));
>> -    if (mmu_has_feature(MMU_FTR_PKEY))
>> +    /*
>> +     * if it is a kthread that did kthread_use_mm() don't
>> +     * use current_thread_amr().
> 
> According to include/linux/sched.h, PF_KTHREAD means /* I am a kernel 
> thread */
> It doesn't seem to be related to kthread_use_mm()

That should be a sufficient check here. if we did reach here without 
calling kthread_user_mm, we will crash on access because we don't have a 
mm attached to the current process.  a kernel thread with kthread_use_mm has

current->mm == current->active_mm && current->flags & PF_KTHREAD.

The first part is true for every other process too.

> 
>> +     */
>> +    if (current->flags & PF_KTHREAD)
>> +        thread_amr = 0;
>> +    else if (mmu_has_feature(MMU_FTR_PKEY))
>>           thread_amr = current_thread_amr();
>>       if (dir == KUAP_READ)
>>
> 
> Christophe


-aneesh

^ permalink raw reply

* Re: [PATCH v4 11/23] powerpc/syscall: Rename syscall_64.c into syscall.c
From: Christophe Leroy @ 2021-02-02  6:15 UTC (permalink / raw)
  To: Nicholas Piggin, Benjamin Herrenschmidt, David Laight,
	Michael Ellerman, msuchanek@suse.de, Paul Mackerras
  Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org
In-Reply-To: <1611791083.sqnnh21vv0.astroid@bobo.none>



Le 28/01/2021 à 00:50, Nicholas Piggin a écrit :
> Excerpts from David Laight's message of January 26, 2021 8:28 pm:
>> From: Nicholas Piggin
>>> Sent: 26 January 2021 10:21
>>>
>>> Excerpts from Christophe Leroy's message of January 26, 2021 12:48 am:
>>>> syscall_64.c will be reused almost as is for PPC32.
>>>>
>>>> Rename it syscall.c
>>>
>>> Could you rename it to interrupt.c instead? A system call is an
>>> interrupt, and the file now also has code to return from other
>>> interrupts as well, and it matches the new asm/interrupt.h from
>>> the interrupts series.
>>
>> Hmmm....
>>
>> That might make it harder for someone looking for the system call
>> entry code to find it.
> 
> It's very grep'able.
> 
>> In some sense interrupts are the simpler case.
>>
>> Especially when comparing with other architectures which have
>> special instructions for syscall entry.
> 
> powerpc does have a special instruction for syscall, and it causes a
> system call interrupt.
> 
> I'm not sure about other architectures, but for powerpc its more
> sensible to call it interrupt.c than syscall.c.

Many other architectures have a syscall.c but for a different purpose: it contains arch specific 
system calls. We have that in powerpc as well, it is called syscalls.c

So to avoid confusion, I'll rename it. But I think "interrupt" is maybe not the right name. An 
interrupt most of the time refers to IRQ. For me system call is not an interrupt in the way it 
doesn't unexpectedly interrupt a program flow. In powerpc manuals it is generally called exceptions, 
no I'm more inclined to call it exception.c

Christophe

^ permalink raw reply

* Re: [PATCH] powerpc/fault: fix wrong KUAP fault for IO_URING
From: Christophe Leroy @ 2021-02-02  6:02 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Nicholas Piggin, Michael Ellerman, Zorro Lang
  Cc: Jens Axboe, linuxppc-dev
In-Reply-To: <874kivrp2v.fsf@linux.ibm.com>



Le 02/02/2021 à 06:55, Aneesh Kumar K.V a écrit :
> Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> writes:
> 
>> Nicholas Piggin <npiggin@gmail.com> writes:
>>
>>> Excerpts from Michael Ellerman's message of January 30, 2021 9:22 pm:
>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>>> +Aneesh
>>>>>
>>>>> Le 29/01/2021 à 07:52, Zorro Lang a écrit :
>>>> ..
>>>>>> [   96.200296] ------------[ cut here ]------------
>>>>>> [   96.200304] Bug: Read fault blocked by KUAP!
>>>>>> [   96.200309] WARNING: CPU: 3 PID: 1876 at arch/powerpc/mm/fault.c:229 bad_kernel_fault+0x180/0x310
>>>>>
>>>>>> [   96.200734] NIP [c000000000849424] fault_in_pages_readable+0x104/0x350
>>>>>> [   96.200741] LR [c00000000084952c] fault_in_pages_readable+0x20c/0x350
>>>>>> [   96.200747] --- interrupt: 300
>>>>>
>>>>>
>>>>> Problem happens in a section where userspace access is supposed to be granted, so the patch you
>>>>> proposed is definitely not the right fix.
>>>>>
>>>>> c000000000849408:	2c 01 00 4c 	isync
>>>>> c00000000084940c:	a6 03 3d 7d 	mtspr   29,r9  <== granting userspace access permission
>>>>> c000000000849410:	2c 01 00 4c 	isync
>>>>> c000000000849414:	00 00 36 e9 	ld      r9,0(r22)
>>>>> c000000000849418:	20 00 29 81 	lwz     r9,32(r9)
>>>>> c00000000084941c:	00 02 29 71 	andi.   r9,r9,512
>>>>> c000000000849420:	78 d3 5e 7f 	mr      r30,r26
>>>>> ==> c000000000849424:	00 00 bf 8b 	lbz     r29,0(r31)  <== accessing userspace
>>>>> c000000000849428:	10 00 82 41 	beq     c000000000849438 <fault_in_pages_readable+0x118>
>>>>> c00000000084942c:	2c 01 00 4c 	isync
>>>>> c000000000849430:	a6 03 bd 7e 	mtspr   29,r21  <== clearing userspace access permission
>>>>> c000000000849434:	2c 01 00 4c 	isync
>>>>>
>>>>> My first guess is that the problem is linked to the following function, see the comment
>>>>>
>>>>> /*
>>>>>    * For kernel thread that doesn't have thread.regs return
>>>>>    * default AMR/IAMR values.
>>>>>    */
>>>>> static inline u64 current_thread_amr(void)
>>>>> {
>>>>> 	if (current->thread.regs)
>>>>> 		return current->thread.regs->amr;
>>>>> 	return AMR_KUAP_BLOCKED;
>>>>> }
>>>>>
>>>>> Above function was introduced by commit 48a8ab4eeb82 ("powerpc/book3s64/pkeys: Don't update SPRN_AMR
>>>>> when in kernel mode")
>>>>
>>>> Yeah that's a bit of a curly one.
>>>>
>>>> At some point io_uring did kthread_use_mm(), which is supposed to mean
>>>> the kthread can operate on behalf of the original process that submitted
>>>> the IO.
>>>>
>>>> But because KUAP is implemented using memory protection keys, it depends
>>>> on the value of the AMR register, which is not part of the mm, it's in
>>>> thread.regs->amr.
>>>>
>>>> And what's worse by the time we're in kthread_use_mm() we no longer have
>>>> access to the thread.regs->amr of the original process that submitted
>>>> the IO.
>>>>
>>>> We also can't simply move the AMR into the mm, precisely because it's
>>>> per thread, not per mm.
>>>>
>>>> So TBH I don't know how we're going to fix this.
>>>>
>>>> I guess we could return AMR=unblocked for kernel threads, but that's
>>>> arguably a bug because it allows a process to circumvent memory keys by
>>>> asking the kernel to do the access.
>>>
>>> We shouldn't need to inherit AMR should we? We only need it to be locked
>>> for kernel threads until it's explicitly unlocked -- nothing mm specific
>>> there. I think current_thread_amr could return 0 for kernel threads? Or
>>> I would even avoid using that function for allow_user_access and open
>>> code the kthread case and remove it from current_thread_amr().
>>>
>>> Thanks,
>>> Nick
>>
> 
> updated one
> 
>  From 8fdb0680f983940d61f91da8252b13c8d3e8ebee Mon Sep 17 00:00:00 2001
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
> Date: Tue, 2 Feb 2021 09:23:38 +0530
> Subject: [PATCH v2] powerpc/kuap: Allow kernel thread to access userspace
>   after kthread_use_mm
> 
> This fix the bad fault reported by KUAP when io_wqe_worker access userspace.
> 
>   Bug: Read fault blocked by KUAP!
>   WARNING: CPU: 1 PID: 101841 at arch/powerpc/mm/fault.c:229 __do_page_fault+0x6b4/0xcd0
>   NIP [c00000000009e7e4] __do_page_fault+0x6b4/0xcd0
>   LR [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0
> ..........
>   Call Trace:
>   [c000000016367330] [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0 (unreliable)
>   [c0000000163673e0] [c00000000009ee3c] do_page_fault+0x3c/0x120
>   [c000000016367430] [c00000000000c848] handle_page_fault+0x10/0x2c
>   --- interrupt: 300 at iov_iter_fault_in_readable+0x148/0x6f0
> ..........
>   NIP [c0000000008e8228] iov_iter_fault_in_readable+0x148/0x6f0
>   LR [c0000000008e834c] iov_iter_fault_in_readable+0x26c/0x6f0
>   interrupt: 300
>   [c0000000163677e0] [c0000000007154a0] iomap_write_actor+0xc0/0x280
>   [c000000016367880] [c00000000070fc94] iomap_apply+0x1c4/0x780
>   [c000000016367990] [c000000000710330] iomap_file_buffered_write+0xa0/0x120
>   [c0000000163679e0] [c00800000040791c] xfs_file_buffered_aio_write+0x314/0x5e0 [xfs]
>   [c000000016367a90] [c0000000006d74bc] io_write+0x10c/0x460
>   [c000000016367bb0] [c0000000006d80e4] io_issue_sqe+0x8d4/0x1200
>   [c000000016367c70] [c0000000006d8ad0] io_wq_submit_work+0xc0/0x250
>   [c000000016367cb0] [c0000000006e2578] io_worker_handle_work+0x498/0x800
>   [c000000016367d40] [c0000000006e2cdc] io_wqe_worker+0x3fc/0x4f0
>   [c000000016367da0] [c0000000001cb0a4] kthread+0x1c4/0x1d0
>   [c000000016367e10] [c00000000000dbf0] ret_from_kernel_thread+0x5c/0x6c
> 
> The kernel consider thread AMR value for kernel thread to be
> AMR_KUAP_BLOCKED. Hence access to userspace is denied. This
> of course not correct and we should allow userspace access after
> kthread_use_mm(). To be precise, kthread_use_mm() should inherit the
> AMR value of the operating address space. But, the AMR value is
> thread-specific and we inherit the address space and not thread
> access restrictions. Because of this ignore AMR value when accessing
> userspace via kernel thread.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> Changes from v1:
> * Address review feedback from Nick
> 
>   arch/powerpc/include/asm/book3s/64/kup.h | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
> index f50f72e535aa..95f4df99249e 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup.h
> @@ -384,7 +384,13 @@ static __always_inline void allow_user_access(void __user *to, const void __user
>   	// This is written so we can resolve to a single case at build time
>   	BUILD_BUG_ON(!__builtin_constant_p(dir));
>   
> -	if (mmu_has_feature(MMU_FTR_PKEY))
> +	/*
> +	 * if it is a kthread that did kthread_use_mm() don't
> +	 * use current_thread_amr().

According to include/linux/sched.h, PF_KTHREAD means /* I am a kernel thread */
It doesn't seem to be related to kthread_use_mm()

> +	 */
> +	if (current->flags & PF_KTHREAD)
> +		thread_amr = 0;
> +	else if (mmu_has_feature(MMU_FTR_PKEY))
>   		thread_amr = current_thread_amr();
>   
>   	if (dir == KUAP_READ)
> 

Christophe

^ permalink raw reply

* Re: [PATCH v7 00/42] powerpc: interrupt wrappers
From: Christophe Leroy @ 2021-02-02  5:57 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Athira Rajeev
In-Reply-To: <20210130130852.2952424-1-npiggin@gmail.com>



Le 30/01/2021 à 14:08, Nicholas Piggin a écrit :
> This adds interrupt handler wrapper functions, similar to the
> generic / x86 code, and moves several common operations into them
> from either asm or open coded in the individual handlers.
> 
> This series is based on powerpc fixes-test tree, there's another
> unrelated pending fix in patch 1 of the series which clashes a
> bit.

This series trivialy conflicts with 
https://github.com/linuxppc/linux/commit/11f9c1d2fb497f69f83d4fab6fb7fc8a6884eded on powerpc next tree.

> 
> This includes more changes and fixes suggested by Christophe,
> a few minor bug fixes and compile fix noticed by kbuild, and
> some NMI changes Athira asked about -- PMI interrupts don't
> block tracing when they are soft-NMI.
> 
> Since v1:
> - Fixed a couple of compile issues
> - Fixed perf weirdness (sometimes NMI, sometimes not)
> - Also move irq_enter/exit into wrappers
> 
> Since v2:
> - Rebased upstream
> - Took code in patch 3 from Christophe
> - Fixed some compile errors from 0day
> 
> Since v3:
> - Rebased
> - Split Christophe's 32s DABR patch into its own patch
> - Fixed missing asm from 32s on patch 3 noticed by Christophe.
> - Moved changes around, split out one more patch (patch 9) to make
>    changes more logical and atomic.
> - Add comments explaining _RAW handlers (SLB, HPTE) interrupts better
> 
> Since v4:
> - Rebased (on top of scv fallback flush fix)
> - Rearranged a few changes into different patches from Christophe,
>    e.g., the ___do_page_fault change from patch 2 to 10. I didn't
>    do everything (e.g., splitting to update __hash_page to drop the
>    msr argument before the bulk of patch 2 seemed like churn without
>    much improvement), and also other things like removing the new
>    ___do_page_fault variant if we can change hash fault context tracking
>    I didn't get time to completely investigate and implement. I think
>    this shouldn't be a showstopper though we can make more improvements
>    as we go.
> 
> Since v5:
> - Lots of good review suggestions from Christophe, see v5 email threads.
> - Major change being do_break is left in asm and selected early as an
>    alternate interrupt handler now, which is a smaller step and matches
>    other subarchs better.
> - Rearranged patches, split, moved things, bug fixes, etc.
> - Converted a few more missed exception handlers for debug and ras
> 
> Since v6:
> - Move related interrupt handler de-argify patches together [Christophe]
> - Split do_bad_page_fault patch [Christophe]
> - Change do_page_fault cleanup patch [Christophe]
> - entry_32.S can't avoid saving r4/r5 until later in the series [Christophe]
> - Soft-NMI decrementer and perf don't block ftrace [Athira]
> - Rebased on some fixes
> - Fixed mismerge / duplicate line in patch 40
> - Fix kbuild hash missing declaration bug
> 
> Christophe Leroy (1):
>    powerpc/32s: move DABR match out of handle_page_fault
> 
> Nicholas Piggin (41):
>    powerpc/64s: interrupt exit improve bounding of interrupt recursion
>    KVM: PPC: Book3S HV: Context tracking exit guest context before
>      enabling irqs
>    powerpc/64s: move DABR match out of handle_page_fault
>    powerpc/64s: move the hash fault handling logic to C
>    powerpc: remove arguments from fault handler functions
>    powerpc/fsl_booke/32: CacheLockingException remove args
>    powerpc: do_break get registers from regs
>    powerpc: DebugException remove args
>    powerpc/32: transfer can avoid saving r4/r5 over trace call
>    powerpc: bad_page_fault get registers from regs
>    powerpc/64s: add do_bad_page_fault_segv handler
>    powerpc: rearrange do_page_fault error case to be inside
>      exception_enter
>    powerpc/64s: move bad_page_fault handling to C
>    powerpc/64s: split do_hash_fault
>    powerpc/mm: Remove stale do_page_fault comment referring to SLB faults
>    powerpc/64s: slb comment update
>    powerpc/traps: add NOKPROBE_SYMBOL for sreset and mce
>    powerpc/perf: move perf irq/nmi handling details into traps.c
>    powerpc/time: move timer_broadcast_interrupt prototype to asm/time.h
>    powerpc: add and use unknown_async_exception
>    powerpc/cell: tidy up pervasive declarations
>    powerpc: introduce die_mce
>    powerpc/mce: ensure machine check handler always tests RI
>    powerpc: improve handling of unrecoverable system reset
>    powerpc: interrupt handler wrapper functions
>    powerpc: add interrupt wrapper entry / exit stub functions
>    powerpc: convert interrupt handlers to use wrappers
>    powerpc: add interrupt_cond_local_irq_enable helper
>    powerpc/64: context tracking remove _TIF_NOHZ
>    powerpc/64s/hash: improve context tracking of hash faults
>    powerpc/64: context tracking move to interrupt wrappers
>    powerpc/64: add context tracking to asynchronous interrupts
>    powerpc: handle irq_enter/irq_exit in interrupt handler wrappers
>    powerpc/64s: move context tracking exit to interrupt exit path
>    powerpc/64s: reconcile interrupts in C
>    powerpc/64: move account_stolen_time into its own function
>    powerpc/64: entry cpu time accounting in C
>    powerpc: move NMI entry/exit code into wrapper
>    powerpc/64s: move NMI soft-mask handling to C
>    powerpc/64s: runlatch interrupt handling in C
>    powerpc/64s: power4 nap fixup in C
> 
>   arch/powerpc/Kconfig                       |   1 -
>   arch/powerpc/include/asm/asm-prototypes.h  |  29 --
>   arch/powerpc/include/asm/bug.h             |   9 +-
>   arch/powerpc/include/asm/cputime.h         |  14 +
>   arch/powerpc/include/asm/debug.h           |   4 -
>   arch/powerpc/include/asm/hw_irq.h          |   9 -
>   arch/powerpc/include/asm/interrupt.h       | 437 +++++++++++++++++++++
>   arch/powerpc/include/asm/ppc_asm.h         |  24 --
>   arch/powerpc/include/asm/processor.h       |   1 +
>   arch/powerpc/include/asm/thread_info.h     |  10 +-
>   arch/powerpc/include/asm/time.h            |   2 +
>   arch/powerpc/kernel/dbell.c                |   9 +-
>   arch/powerpc/kernel/entry_32.S             |  25 +-
>   arch/powerpc/kernel/exceptions-64e.S       |   8 +-
>   arch/powerpc/kernel/exceptions-64s.S       | 310 ++-------------
>   arch/powerpc/kernel/head_40x.S             |  11 +-
>   arch/powerpc/kernel/head_8xx.S             |  11 +-
>   arch/powerpc/kernel/head_book3s_32.S       |  14 +-
>   arch/powerpc/kernel/head_booke.h           |   6 +-
>   arch/powerpc/kernel/head_fsl_booke.S       |   6 +-
>   arch/powerpc/kernel/idle_book3s.S          |   4 +
>   arch/powerpc/kernel/irq.c                  |   7 +-
>   arch/powerpc/kernel/mce.c                  |  16 +-
>   arch/powerpc/kernel/process.c              |   8 +-
>   arch/powerpc/kernel/ptrace/ptrace.c        |   4 -
>   arch/powerpc/kernel/signal.c               |   4 -
>   arch/powerpc/kernel/syscall_64.c           |  90 +++--
>   arch/powerpc/kernel/tau_6xx.c              |   5 +-
>   arch/powerpc/kernel/time.c                 |   7 +-
>   arch/powerpc/kernel/traps.c                | 265 ++++++-------
>   arch/powerpc/kernel/watchdog.c             |  15 +-
>   arch/powerpc/kvm/book3s_hv.c               |   7 +-
>   arch/powerpc/kvm/book3s_hv_builtin.c       |   1 +
>   arch/powerpc/kvm/booke.c                   |   1 +
>   arch/powerpc/mm/book3s64/hash_utils.c      |  97 +++--
>   arch/powerpc/mm/book3s64/slb.c             |  40 +-
>   arch/powerpc/mm/fault.c                    |  76 ++--
>   arch/powerpc/perf/core-book3s.c            |  35 +-
>   arch/powerpc/perf/core-fsl-emb.c           |  25 --
>   arch/powerpc/platforms/8xx/machine_check.c |   2 +-
>   arch/powerpc/platforms/cell/pervasive.c    |   1 +
>   arch/powerpc/platforms/cell/pervasive.h    |   3 -
>   arch/powerpc/platforms/cell/ras.c          |   6 +-
>   arch/powerpc/platforms/cell/ras.h          |   9 +-
>   arch/powerpc/platforms/powernv/idle.c      |   1 +
>   arch/powerpc/platforms/powernv/opal.c      |   2 +-
>   arch/powerpc/platforms/pseries/ras.c       |   2 +-
>   47 files changed, 914 insertions(+), 759 deletions(-)
>   create mode 100644 arch/powerpc/include/asm/interrupt.h
> 

^ permalink raw reply

* Re: [PATCH] powerpc/fault: fix wrong KUAP fault for IO_URING
From: Aneesh Kumar K.V @ 2021-02-02  5:55 UTC (permalink / raw)
  To: Nicholas Piggin, Christophe Leroy, Michael Ellerman, Zorro Lang
  Cc: Jens Axboe, linuxppc-dev
In-Reply-To: <877dnrrsbu.fsf@linux.ibm.com>

Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> writes:

> Nicholas Piggin <npiggin@gmail.com> writes:
>
>> Excerpts from Michael Ellerman's message of January 30, 2021 9:22 pm:
>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>> +Aneesh
>>>>
>>>> Le 29/01/2021 à 07:52, Zorro Lang a écrit :
>>> ..
>>>>> [   96.200296] ------------[ cut here ]------------
>>>>> [   96.200304] Bug: Read fault blocked by KUAP!
>>>>> [   96.200309] WARNING: CPU: 3 PID: 1876 at arch/powerpc/mm/fault.c:229 bad_kernel_fault+0x180/0x310
>>>>
>>>>> [   96.200734] NIP [c000000000849424] fault_in_pages_readable+0x104/0x350
>>>>> [   96.200741] LR [c00000000084952c] fault_in_pages_readable+0x20c/0x350
>>>>> [   96.200747] --- interrupt: 300
>>>>
>>>>
>>>> Problem happens in a section where userspace access is supposed to be granted, so the patch you 
>>>> proposed is definitely not the right fix.
>>>>
>>>> c000000000849408:	2c 01 00 4c 	isync
>>>> c00000000084940c:	a6 03 3d 7d 	mtspr   29,r9  <== granting userspace access permission
>>>> c000000000849410:	2c 01 00 4c 	isync
>>>> c000000000849414:	00 00 36 e9 	ld      r9,0(r22)
>>>> c000000000849418:	20 00 29 81 	lwz     r9,32(r9)
>>>> c00000000084941c:	00 02 29 71 	andi.   r9,r9,512
>>>> c000000000849420:	78 d3 5e 7f 	mr      r30,r26
>>>> ==> c000000000849424:	00 00 bf 8b 	lbz     r29,0(r31)  <== accessing userspace
>>>> c000000000849428:	10 00 82 41 	beq     c000000000849438 <fault_in_pages_readable+0x118>
>>>> c00000000084942c:	2c 01 00 4c 	isync
>>>> c000000000849430:	a6 03 bd 7e 	mtspr   29,r21  <== clearing userspace access permission
>>>> c000000000849434:	2c 01 00 4c 	isync
>>>>
>>>> My first guess is that the problem is linked to the following function, see the comment
>>>>
>>>> /*
>>>>   * For kernel thread that doesn't have thread.regs return
>>>>   * default AMR/IAMR values.
>>>>   */
>>>> static inline u64 current_thread_amr(void)
>>>> {
>>>> 	if (current->thread.regs)
>>>> 		return current->thread.regs->amr;
>>>> 	return AMR_KUAP_BLOCKED;
>>>> }
>>>>
>>>> Above function was introduced by commit 48a8ab4eeb82 ("powerpc/book3s64/pkeys: Don't update SPRN_AMR 
>>>> when in kernel mode")
>>> 
>>> Yeah that's a bit of a curly one.
>>> 
>>> At some point io_uring did kthread_use_mm(), which is supposed to mean
>>> the kthread can operate on behalf of the original process that submitted
>>> the IO.
>>> 
>>> But because KUAP is implemented using memory protection keys, it depends
>>> on the value of the AMR register, which is not part of the mm, it's in
>>> thread.regs->amr.
>>> 
>>> And what's worse by the time we're in kthread_use_mm() we no longer have
>>> access to the thread.regs->amr of the original process that submitted
>>> the IO.
>>> 
>>> We also can't simply move the AMR into the mm, precisely because it's
>>> per thread, not per mm.
>>> 
>>> So TBH I don't know how we're going to fix this.
>>> 
>>> I guess we could return AMR=unblocked for kernel threads, but that's
>>> arguably a bug because it allows a process to circumvent memory keys by
>>> asking the kernel to do the access.
>>
>> We shouldn't need to inherit AMR should we? We only need it to be locked 
>> for kernel threads until it's explicitly unlocked -- nothing mm specific 
>> there. I think current_thread_amr could return 0 for kernel threads? Or
>> I would even avoid using that function for allow_user_access and open
>> code the kthread case and remove it from current_thread_amr().
>>
>> Thanks,
>> Nick
>

updated one

From 8fdb0680f983940d61f91da8252b13c8d3e8ebee Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 2 Feb 2021 09:23:38 +0530
Subject: [PATCH v2] powerpc/kuap: Allow kernel thread to access userspace
 after kthread_use_mm

This fix the bad fault reported by KUAP when io_wqe_worker access userspace.

 Bug: Read fault blocked by KUAP!
 WARNING: CPU: 1 PID: 101841 at arch/powerpc/mm/fault.c:229 __do_page_fault+0x6b4/0xcd0
 NIP [c00000000009e7e4] __do_page_fault+0x6b4/0xcd0
 LR [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0
..........
 Call Trace:
 [c000000016367330] [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0 (unreliable)
 [c0000000163673e0] [c00000000009ee3c] do_page_fault+0x3c/0x120
 [c000000016367430] [c00000000000c848] handle_page_fault+0x10/0x2c
 --- interrupt: 300 at iov_iter_fault_in_readable+0x148/0x6f0
..........
 NIP [c0000000008e8228] iov_iter_fault_in_readable+0x148/0x6f0
 LR [c0000000008e834c] iov_iter_fault_in_readable+0x26c/0x6f0
 interrupt: 300
 [c0000000163677e0] [c0000000007154a0] iomap_write_actor+0xc0/0x280
 [c000000016367880] [c00000000070fc94] iomap_apply+0x1c4/0x780
 [c000000016367990] [c000000000710330] iomap_file_buffered_write+0xa0/0x120
 [c0000000163679e0] [c00800000040791c] xfs_file_buffered_aio_write+0x314/0x5e0 [xfs]
 [c000000016367a90] [c0000000006d74bc] io_write+0x10c/0x460
 [c000000016367bb0] [c0000000006d80e4] io_issue_sqe+0x8d4/0x1200
 [c000000016367c70] [c0000000006d8ad0] io_wq_submit_work+0xc0/0x250
 [c000000016367cb0] [c0000000006e2578] io_worker_handle_work+0x498/0x800
 [c000000016367d40] [c0000000006e2cdc] io_wqe_worker+0x3fc/0x4f0
 [c000000016367da0] [c0000000001cb0a4] kthread+0x1c4/0x1d0
 [c000000016367e10] [c00000000000dbf0] ret_from_kernel_thread+0x5c/0x6c

The kernel consider thread AMR value for kernel thread to be
AMR_KUAP_BLOCKED. Hence access to userspace is denied. This
of course not correct and we should allow userspace access after
kthread_use_mm(). To be precise, kthread_use_mm() should inherit the
AMR value of the operating address space. But, the AMR value is
thread-specific and we inherit the address space and not thread
access restrictions. Because of this ignore AMR value when accessing
userspace via kernel thread.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes from v1:
* Address review feedback from Nick 

 arch/powerpc/include/asm/book3s/64/kup.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index f50f72e535aa..95f4df99249e 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -384,7 +384,13 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 	// This is written so we can resolve to a single case at build time
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
 
-	if (mmu_has_feature(MMU_FTR_PKEY))
+	/*
+	 * if it is a kthread that did kthread_use_mm() don't
+	 * use current_thread_amr().
+	 */
+	if (current->flags & PF_KTHREAD)
+		thread_amr = 0;
+	else if (mmu_has_feature(MMU_FTR_PKEY))
 		thread_amr = current_thread_amr();
 
 	if (dir == KUAP_READ)
-- 
2.29.2


^ permalink raw reply related

* [PATCH] cpufreq: Remove unused flag CPUFREQ_PM_NO_WARN
From: Viresh Kumar @ 2021-02-02  5:41 UTC (permalink / raw)
  To: Rafael Wysocki, Viresh Kumar, Michael Ellerman,
	Benjamin Herrenschmidt, Paul Mackerras
  Cc: linuxppc-dev, Vincent Guittot, linux-kernel, linux-pm

This flag is set by one of the drivers but it isn't used in the code
otherwise. Remove the unused flag and update the driver.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
Rebased over:

https://lore.kernel.org/lkml/a59bb322b22c247d570b70a8e94067804287623b.1612241683.git.viresh.kumar@linaro.org/

 drivers/cpufreq/pmac32-cpufreq.c |  3 +--
 include/linux/cpufreq.h          | 13 +++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 73621bc11976..4f20c6a9108d 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -439,8 +439,7 @@ static struct cpufreq_driver pmac_cpufreq_driver = {
 	.init		= pmac_cpufreq_cpu_init,
 	.suspend	= pmac_cpufreq_suspend,
 	.resume		= pmac_cpufreq_resume,
-	.flags		= CPUFREQ_PM_NO_WARN |
-			  CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING,
+	.flags		= CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING,
 	.attr		= cpufreq_generic_attr,
 	.name		= "powermac",
 };
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c8e40e91fe9b..353969c7acd3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -398,8 +398,11 @@ struct cpufreq_driver {
 /* loops_per_jiffy or other kernel "constants" aren't affected by frequency transitions */
 #define CPUFREQ_CONST_LOOPS			BIT(1)
 
-/* don't warn on suspend/resume speed mismatches */
-#define CPUFREQ_PM_NO_WARN			BIT(2)
+/*
+ * Set by drivers that want the core to automatically register the cpufreq
+ * driver as a thermal cooling device.
+ */
+#define CPUFREQ_IS_COOLING_DEV			BIT(2)
 
 /*
  * This should be set by platforms having multiple clock-domains, i.e.
@@ -431,12 +434,6 @@ struct cpufreq_driver {
  */
 #define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING	BIT(6)
 
-/*
- * Set by drivers that want the core to automatically register the cpufreq
- * driver as a thermal cooling device.
- */
-#define CPUFREQ_IS_COOLING_DEV			BIT(7)
-
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
-- 
2.25.0.rc1.19.g042ed3e048af


^ permalink raw reply related

* Re: [PATCH] powerpc/fault: fix wrong KUAP fault for IO_URING
From: Aneesh Kumar K.V @ 2021-02-02  4:45 UTC (permalink / raw)
  To: Nicholas Piggin, Christophe Leroy, Michael Ellerman, Zorro Lang
  Cc: Jens Axboe, linuxppc-dev
In-Reply-To: <1612014260.b4fac0liie.astroid@bobo.none>

Nicholas Piggin <npiggin@gmail.com> writes:

> Excerpts from Michael Ellerman's message of January 30, 2021 9:22 pm:
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> +Aneesh
>>>
>>> Le 29/01/2021 à 07:52, Zorro Lang a écrit :
>> ..
>>>> [   96.200296] ------------[ cut here ]------------
>>>> [   96.200304] Bug: Read fault blocked by KUAP!
>>>> [   96.200309] WARNING: CPU: 3 PID: 1876 at arch/powerpc/mm/fault.c:229 bad_kernel_fault+0x180/0x310
>>>
>>>> [   96.200734] NIP [c000000000849424] fault_in_pages_readable+0x104/0x350
>>>> [   96.200741] LR [c00000000084952c] fault_in_pages_readable+0x20c/0x350
>>>> [   96.200747] --- interrupt: 300
>>>
>>>
>>> Problem happens in a section where userspace access is supposed to be granted, so the patch you 
>>> proposed is definitely not the right fix.
>>>
>>> c000000000849408:	2c 01 00 4c 	isync
>>> c00000000084940c:	a6 03 3d 7d 	mtspr   29,r9  <== granting userspace access permission
>>> c000000000849410:	2c 01 00 4c 	isync
>>> c000000000849414:	00 00 36 e9 	ld      r9,0(r22)
>>> c000000000849418:	20 00 29 81 	lwz     r9,32(r9)
>>> c00000000084941c:	00 02 29 71 	andi.   r9,r9,512
>>> c000000000849420:	78 d3 5e 7f 	mr      r30,r26
>>> ==> c000000000849424:	00 00 bf 8b 	lbz     r29,0(r31)  <== accessing userspace
>>> c000000000849428:	10 00 82 41 	beq     c000000000849438 <fault_in_pages_readable+0x118>
>>> c00000000084942c:	2c 01 00 4c 	isync
>>> c000000000849430:	a6 03 bd 7e 	mtspr   29,r21  <== clearing userspace access permission
>>> c000000000849434:	2c 01 00 4c 	isync
>>>
>>> My first guess is that the problem is linked to the following function, see the comment
>>>
>>> /*
>>>   * For kernel thread that doesn't have thread.regs return
>>>   * default AMR/IAMR values.
>>>   */
>>> static inline u64 current_thread_amr(void)
>>> {
>>> 	if (current->thread.regs)
>>> 		return current->thread.regs->amr;
>>> 	return AMR_KUAP_BLOCKED;
>>> }
>>>
>>> Above function was introduced by commit 48a8ab4eeb82 ("powerpc/book3s64/pkeys: Don't update SPRN_AMR 
>>> when in kernel mode")
>> 
>> Yeah that's a bit of a curly one.
>> 
>> At some point io_uring did kthread_use_mm(), which is supposed to mean
>> the kthread can operate on behalf of the original process that submitted
>> the IO.
>> 
>> But because KUAP is implemented using memory protection keys, it depends
>> on the value of the AMR register, which is not part of the mm, it's in
>> thread.regs->amr.
>> 
>> And what's worse by the time we're in kthread_use_mm() we no longer have
>> access to the thread.regs->amr of the original process that submitted
>> the IO.
>> 
>> We also can't simply move the AMR into the mm, precisely because it's
>> per thread, not per mm.
>> 
>> So TBH I don't know how we're going to fix this.
>> 
>> I guess we could return AMR=unblocked for kernel threads, but that's
>> arguably a bug because it allows a process to circumvent memory keys by
>> asking the kernel to do the access.
>
> We shouldn't need to inherit AMR should we? We only need it to be locked 
> for kernel threads until it's explicitly unlocked -- nothing mm specific 
> there. I think current_thread_amr could return 0 for kernel threads? Or
> I would even avoid using that function for allow_user_access and open
> code the kthread case and remove it from current_thread_amr().
>
> Thanks,
> Nick

Can we try this?

commit 9a193d38b1a0a52364bc70ec953e0685993603b6
Author: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Date:   Tue Feb 2 09:23:38 2021 +0530

    powerpc/kuap: Allow kernel thread to access userspace after kthread_use_mm
    
    This fix the bad fault reported by KUAP when io_wqe_worker access userspace.
    
     Bug: Read fault blocked by KUAP!
     WARNING: CPU: 1 PID: 101841 at arch/powerpc/mm/fault.c:229 __do_page_fault+0x6b4/0xcd0
     NIP [c00000000009e7e4] __do_page_fault+0x6b4/0xcd0
     LR [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0
    ..........
     Call Trace:
     [c000000016367330] [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0 (unreliable)
     [c0000000163673e0] [c00000000009ee3c] do_page_fault+0x3c/0x120
     [c000000016367430] [c00000000000c848] handle_page_fault+0x10/0x2c
     --- interrupt: 300 at iov_iter_fault_in_readable+0x148/0x6f0
    ..........
     NIP [c0000000008e8228] iov_iter_fault_in_readable+0x148/0x6f0
     LR [c0000000008e834c] iov_iter_fault_in_readable+0x26c/0x6f0
     interrupt: 300
     [c0000000163677e0] [c0000000007154a0] iomap_write_actor+0xc0/0x280
     [c000000016367880] [c00000000070fc94] iomap_apply+0x1c4/0x780
     [c000000016367990] [c000000000710330] iomap_file_buffered_write+0xa0/0x120
     [c0000000163679e0] [c00800000040791c] xfs_file_buffered_aio_write+0x314/0x5e0 [xfs]
     [c000000016367a90] [c0000000006d74bc] io_write+0x10c/0x460
     [c000000016367bb0] [c0000000006d80e4] io_issue_sqe+0x8d4/0x1200
     [c000000016367c70] [c0000000006d8ad0] io_wq_submit_work+0xc0/0x250
     [c000000016367cb0] [c0000000006e2578] io_worker_handle_work+0x498/0x800
     [c000000016367d40] [c0000000006e2cdc] io_wqe_worker+0x3fc/0x4f0
     [c000000016367da0] [c0000000001cb0a4] kthread+0x1c4/0x1d0
     [c000000016367e10] [c00000000000dbf0] ret_from_kernel_thread+0x5c/0x6c
    
    The kernel consider thread AMR value for kernel thread to be
    AMR_KUAP_BLOCKED. Hence access to userspace is denied. This
    of course not correct and we should allow userspace access after
    kthread_use_mm(). To be precise, kthread_use_mm() should inherit the
    AMR value of the operating address space. But, the AMR value is
    thread-specific and we inherit the address space and not thread
    access restrictions. Because of this ignore AMR value when accessing
    userspace via kernel thread.
    
    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index f50f72e535aa..7457d80ba0bb 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -384,7 +384,13 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 	// This is written so we can resolve to a single case at build time
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
 
-	if (mmu_has_feature(MMU_FTR_PKEY))
+	/*
+	 * if it is a kthread that did kthread_use_mm() don't
+	 * use current_thread_amr().
+	 */
+	if (!current->mm && current->active_mm != &init_mm)
+		thread_amr = 0;
+	else if (mmu_has_feature(MMU_FTR_PKEY))
 		thread_amr = current_thread_amr();
 
 	if (dir == KUAP_READ)

^ permalink raw reply related

* Re: [PATCH] dma-mapping: remove unneeded semicolon
From: Geoff Levand @ 2021-02-02  4:14 UTC (permalink / raw)
  To: Yang Li; +Cc: paulus, linuxppc-dev, linux-kernel
In-Reply-To: <1612237276-111378-1-git-send-email-yang.lee@linux.alibaba.com>

On 2/1/21 7:41 PM, Yang Li wrote:
> Eliminate the following coccicheck warning:
> ./arch/powerpc/platforms/ps3/system-bus.c:606:2-3: Unneeded semicolon
> ./arch/powerpc/platforms/ps3/system-bus.c:765:2-3: Unneeded semicolon
> 
> Reported-by: Abaci Robot <abaci@linux.alibaba.com>
> Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
> ---
>  arch/powerpc/platforms/ps3/system-bus.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Thanks for your patch, it looks good.

Acked-by: Geoff Levand <geoff@infradead.org>

-Geoff

^ permalink raw reply

* [PATCH] powerpc/book3s64: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:51 UTC (permalink / raw)
  To: mpe; +Cc: Yang Li, paulus, linuxppc-dev, linux-kernel

Eliminate the following coccicheck warning:
./arch/powerpc/platforms/pseries/lpar.c:1632:2-3: Unneeded semicolon
./arch/powerpc/platforms/pseries/lpar.c:1663:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/platforms/pseries/lpar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 764170f..24889b8 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1629,7 +1629,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 		}
 		msleep(delay);
 		rc = plpar_resize_hpt_prepare(0, shift);
-	};
+	}
 
 	switch (rc) {
 	case H_SUCCESS:
@@ -1663,7 +1663,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 			pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
 				state.commit_rc);
 			return -EIO;
-		};
+		}
 	}
 
 	pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH] powerpc/eeh: remove unneeded semicolon
From: Oliver O'Halloran @ 2021-02-02  3:43 UTC (permalink / raw)
  To: Yang Li; +Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
In-Reply-To: <1612236096-91154-1-git-send-email-yang.lee@linux.alibaba.com>

On Tue, Feb 2, 2021 at 2:21 PM Yang Li <yang.lee@linux.alibaba.com> wrote:
>
> Eliminate the following coccicheck warning:
> ./arch/powerpc/kernel/eeh.c:782:2-3: Unneeded semicolon

Doesn't appear to be a load-bearing semicolon. It's hard to tell with EEH.

Reviewed-by: Oliver O'Halloran <oohall@gmail.com>

>
> Reported-by: Abaci Robot <abaci@linux.alibaba.com>
> Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
> ---
>  arch/powerpc/kernel/eeh.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 813713c..02c058d 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -779,7 +779,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
>         default:
>                 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
>                 return -EINVAL;
> -       };
> +       }
>
>         return 0;
>  }
> --
> 1.8.3.1
>

^ permalink raw reply

* [PATCH] dma-mapping: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:41 UTC (permalink / raw)
  To: geoff; +Cc: linux-kernel, Yang Li, paulus, linuxppc-dev

Eliminate the following coccicheck warning:
./arch/powerpc/platforms/ps3/system-bus.c:606:2-3: Unneeded semicolon
./arch/powerpc/platforms/ps3/system-bus.c:765:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/platforms/ps3/system-bus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index b431f41..5c73926 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
 	default:
 		/* not happned */
 		BUG();
-	};
+	}
 	result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
 			     &bus_addr, iopte_flag);
 
@@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
 		break;
 	default:
 		BUG();
-	};
+	}
 
 	dev->core.of_node = NULL;
 	set_dev_node(&dev->core, 0);
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] powerpc/64s: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:34 UTC (permalink / raw)
  To: mpe; +Cc: Yang Li, paulus, linuxppc-dev, linux-kernel

Eliminate the following coccicheck warning:
./arch/powerpc/platforms/powernv/setup.c:160:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/platforms/powernv/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 4426a10..167d6ed 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -157,7 +157,7 @@ static void __init pnv_check_guarded_cores(void)
 	for_each_node_by_type(dn, "cpu") {
 		if (of_property_match_string(dn, "status", "bad") >= 0)
 			bad_count++;
-	};
+	}
 
 	if (bad_count) {
 		printk("  _     _______________\n");
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] KVM: PPC: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:30 UTC (permalink / raw)
  To: paulus; +Cc: linux-kernel, kvm-ppc, Yang Li, linuxppc-dev

Eliminate the following coccicheck warning:
./arch/powerpc/kvm/booke.c:701:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/kvm/booke.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 288a982..f38ae3e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -698,7 +698,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
 		r = 1;
-	};
+	}
 
 	return r;
 }
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] powerpc/prom_init: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:25 UTC (permalink / raw)
  To: mpe; +Cc: Yang Li, paulus, linuxppc-dev, linux-kernel

Eliminate the following coccicheck warning:
./arch/powerpc/kernel/prom_init.c:2990:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/kernel/prom_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index e9d4eb6..2d111bb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2987,7 +2987,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
 				" 0x3 encode-int encode+"
 				" s\" interrupts\" property"
 			" finish-device");
-	};
+	}
 
 	/* Check for a PHY device node - if missing then create one and
 	 * give it's phandle to the ethernet node */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] powerpc/eeh: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:21 UTC (permalink / raw)
  To: ruscur; +Cc: linux-kernel, oohall, paulus, Yang Li, linuxppc-dev

Eliminate the following coccicheck warning:
./arch/powerpc/kernel/eeh.c:782:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/kernel/eeh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 813713c..02c058d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -779,7 +779,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 	default:
 		eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] crypto: powerpc: remove unneeded semicolon
From: Yang Li @ 2021-02-02  3:17 UTC (permalink / raw)
  To: herbert; +Cc: linux-kernel, paulus, linux-crypto, Yang Li, linuxppc-dev, davem

Eliminate the following coccicheck warning:
./arch/powerpc/crypto/sha256-spe-glue.c:132:2-3: Unneeded
semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/crypto/sha256-spe-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
index a6e650a..ffedea7 100644
--- a/arch/powerpc/crypto/sha256-spe-glue.c
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -129,7 +129,7 @@ static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
 
 		src += bytes;
 		len -= bytes;
-	};
+	}
 
 	memcpy((char *)sctx->buf, src, len);
 	return 0;
-- 
1.8.3.1


^ permalink raw reply related

* [RFC PATCH 9/9] KVM: PPC: Book3S HV: Implement the rest of the P9 entry/exit handling in C
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

Almost all logic is moved to C, by introducing a new in_guest mode that
selects and branches very early in the interrupt handler to the P9 exit
code.

The remaining asm is a low level stack setup, with VCPU vs host register
save and restore. Only the GPRs that might be touched by the compiler are
handled in asm.

More consolidation could be made in future by inlining more of
__kvmhv_vcpu_entry_p9 into its callers and handling some bits there
perhaps, although there is something to be said for keeping the P9
and old path looking similar, so that needs to be done carefully if
at all.

Some bits are left out for now, ultravisor return for example.

I haven't added any loop to return quickly back into the guest, no
simple "realmode" hcalls, decrementer checks, etc. It remains to be
seen whether these would actually matter to add -- ISA v3.0 (radix,
large decrementer, etc) + XIVE should cut down on guest exits by
a huge amount, and of those that remain I would say a large portion
of them will need to go back out to Linux. Radix also makes this
much more light-weight because we are already in host MMU mode.
There are a few bits like LPID/LPCR/PID switching and XIVE pulling
and pushing which would still be desirable to avoid so if it
happens a lot we could probably handle some simple common cases. I
hope we won't have to at all.

---
 arch/powerpc/include/asm/asm-prototypes.h |   2 +-
 arch/powerpc/include/asm/kvm_asm.h        |   3 +-
 arch/powerpc/include/asm/kvm_book3s_64.h  |   2 +
 arch/powerpc/include/asm/kvm_ppc.h        |   2 +
 arch/powerpc/kvm/Makefile                 |   3 +
 arch/powerpc/kvm/book3s_64_entry.S        | 135 ++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c              |  21 ++-
 arch/powerpc/kvm/book3s_hv_interrupt.c    | 164 ++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 123 ++--------------
 arch/powerpc/kvm/book3s_xive.c            |  32 +++++
 10 files changed, 364 insertions(+), 123 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_interrupt.c

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d0b832cbbec8..ba15ce78ffe5 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -171,7 +171,7 @@ void kvmhv_load_host_pmu(void);
 void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
 void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
 
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
 
 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
 long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a3633560493b..b4f9996bd331 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -146,7 +146,8 @@
 #define KVM_GUEST_MODE_GUEST	1
 #define KVM_GUEST_MODE_SKIP	2
 #define KVM_GUEST_MODE_GUEST_HV	3
-#define KVM_GUEST_MODE_HOST_HV	4
+#define KVM_GUEST_MODE_GUEST_HV_FAST	4 /* ISA v3.0 with host radix mode */
+#define KVM_GUEST_MODE_HOST_HV	5
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..7f08f6ed73df 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,6 +153,8 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
 	return radix;
 }
 
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
 #endif
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 0a056c64c317..1699e8ca96b1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -670,6 +670,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 			       int level, bool line_status);
 extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
 
 static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 {
@@ -710,6 +711,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
 static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 				      int level, bool line_status) { return -ENODEV; }
 static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
 
 static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 	{ return 0; }
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index cdd119028f64..b94be8c9bad1 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -43,6 +43,9 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU) := \
 	book3s_64_vio_hv.o
 
+kvm-book3s_64-objs-y += \
+	book3s_hv_interrupt.o
+
 kvm-pr-y := \
 	fpu.o \
 	emulate.o \
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 0e39267aef63..8541fba11aea 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,6 +1,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/exception-64s.h>
+#include <asm/export.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/ppc_asm.h>
@@ -9,13 +10,24 @@
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,KVM_GUEST_MODE_GUEST_HV_FAST
+	beq	kvmppc_p9_exit_hcall
 	ld	r10,PACA_EXGEN+EX_R13(r13)
 	SET_SCRATCH0(r10)
 	li	r10,0xc00
+	li	r11,PACA_EXGEN
+	b	1f
 
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
 kvmppc_interrupt:
+	std	r10,HSTATE_SCRATCH0(r13)
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,KVM_GUEST_MODE_GUEST_HV_FAST
+	beq	kvmppc_p9_exit_interrupt
+	ld	r10,HSTATE_SCRATCH0(r13)
+	lbz	r11,HSTATE_IN_GUEST(r13)
 	li	r11,PACA_EXGEN
 	cmpdi	r10,0x200
 	bgt+	1f
@@ -95,3 +107,126 @@ maybe_skip:
 	GET_SCRATCH0(r13)
 	HRFI_TO_KERNEL
 #endif
+
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS			208
+#define STACK_SLOT_VCPU		(SFS-8)
+#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
+
+/*
+ * Enter the guest on ISAv3.0 or later system where we have exactly
+ * one vcpu per vcore and the host and guest are running radix.
+ */
+// void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+.balign	IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -SFS(r1)
+
+	std	r1, HSTATE_HOST_R1(r13)
+	std	r3, STACK_SLOT_VCPU(r1)
+
+	mfcr	r4
+	stw	r4, SFS+8(r1)
+
+	reg = 14
+	.rept	18
+	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	ld	r4,VCPU_LR(r3)
+	mtlr	r4
+	ld	r4,VCPU_CR(r3)
+	mtcr	r4
+	ld	r4,VCPU_CTR(r3)
+	mtctr	r4
+#if 0
+XXX
+	ld	r4,VCPU_TAR(r3)
+	mtspr	SPRN_TAR,r4
+#endif
+	ld	r4,VCPU_XER(r3)
+	mtspr	SPRN_XER,r4
+
+BEGIN_FTR_SECTION
+	ld	r4,VCPU_CFAR(r3)
+	mtspr	SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r4,VCPU_PPR(r3)
+	mtspr	SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	reg = 4
+	.rept	28
+	ld	reg, __VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+	ld	r0,VCPU_GPR(R0)(r3)
+	ld	r1,VCPU_GPR(R1)(r3)
+	ld	r2,VCPU_GPR(R2)(r3)
+	ld	r4,VCPU_GPR(R4)(r3)
+	ld	r3,VCPU_GPR(R3)(r3)
+
+	HRFI_TO_GUEST
+
+.balign	IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+	mfspr	r11,SPRN_SRR0
+	mfspr	r12,SPRN_SRR1
+	li	r10,0xc00
+	std	r10,HSTATE_SCRATCH0(r13)
+
+.balign	IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+	std     r1,HSTATE_SCRATCH1(r13)
+	std     r3,HSTATE_SCRATCH2(r13)
+	ld	r1,HSTATE_HOST_R1(r13)
+	ld	r3, STACK_SLOT_VCPU(r1)
+
+	std	r9,VCPU_CR(r3)
+
+1:
+	std	r11,VCPU_PC(r3)
+	std	r12,VCPU_MSR(r3)
+
+	reg = 14
+	.rept	18
+	std	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	/* r1, r3, r9-r13 are saved to vcpu by C code */
+	std	r0,VCPU_GPR(R0)(r3)
+	std	r2,VCPU_GPR(R2)(r3)
+	reg = 4
+	.rept	5
+	std	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	ld	r2,PACATOC(r13)
+
+	mflr	r4
+	std	r4,VCPU_LR(r3)
+	mfspr	r4,SPRN_XER
+	std	r4,VCPU_XER(r3)
+	/* XXX: TAR */
+
+	reg = 14
+	.rept	18
+	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	lwz	r4, SFS+8(r1)
+	mtcr	r4
+
+	addi	r1, r1, SFS
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 53d0cbfe5933..f87a8cb6afef 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1127,7 +1127,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  * This has to be done early, not in kvmppc_pseries_do_hcall(), so
  * that the cede logic in kvmppc_run_single_vcpu() works properly.
  */
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.shregs.msr |= MSR_EE;
 	vcpu->arch.ceded = 1;
@@ -3522,9 +3522,6 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	kvmppc_xive_push_vcpu(vcpu);
 
-	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
-	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
 	trap = __kvmhv_vcpu_entry_p9(vcpu);
 
 	/* Advance host PURR/SPURR by the amount used by guest */
@@ -3698,18 +3695,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
 		vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
 		mtspr(SPRN_PSSCR_PR, host_psscr);
-
-		/* H_CEDE has to be handled now, not later */
-		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
-		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
-			kvmppc_nested_cede(vcpu);
-			kvmppc_set_gpr(vcpu, 3, 0);
-			trap = 0;
-		}
 	} else {
 		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
 	}
 
+	/* H_CEDE has to be handled now, not later */
+	if (trap == BOOK3S_INTERRUPT_SYSCALL &&
+	    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
+		kvmppc_cede(vcpu);
+		kvmppc_set_gpr(vcpu, 3, 0);
+		trap = 0;
+	}
+
 	vcpu->arch.slb_max = 0;
 	dec = mfspr(SPRN_DEC);
 	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
new file mode 100644
index 000000000000..22d5f1013d01
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -0,0 +1,164 @@
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
+{
+	u64 *exsave;
+	unsigned long msr = mfmsr();
+	int trap;
+
+	vcpu->arch.ceded = 0;
+
+	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	kvmhv_accumulate_time(vcpu, &vcpu->arch.guest_time); // XXX: something simpler?
+#endif
+	__mtmsrd(0, 1); /* clear RI (XXX: should already be cleared by caller because SRR0/1/DAR/etc are set. */
+
+	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+	mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+	mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST_HV_FAST;
+	kvmppc_p9_enter_guest(vcpu);
+	// Radix host and guest means host never runs with guest MMU state
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
+
+	/* Get these from r11/12 and paca exsave */
+	vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+	vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+	vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+	vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+	trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+	if (trap == 0x100) {
+		exsave = local_paca->exnmi;
+	} else if (trap == 0x200) {
+		exsave = local_paca->exmc;
+	} else {
+		exsave = local_paca->exgen;
+	}
+
+	vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+	vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+	vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+	vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+	vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+	vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+	vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+	vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+	vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+	vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+	/* XXX: could leave in real mode for machine check handler / hmi / etc */
+	/* XXX: flush first 4 slbes */
+	/* XXX: HPT guest could switch to LPCR[HR]=1 now, save/restore SLBs */
+
+	__mtmsrd(msr, 0);
+
+	/* XXX: Why are these set? */
+	vcpu->arch.fault_dar = vcpu->arch.shregs.dar;
+	vcpu->arch.fault_dsisr = vcpu->arch.shregs.dsisr;
+
+	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	kvmhv_accumulate_time(vcpu, &vcpu->arch.rm_intr);
+#endif
+
+	// switch to virt mode somewhere here (unless sreset or mce I guess)
+	// could leave guest LPID and PID in case re-entering.
+
+	switch (trap) {
+	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+		break;
+
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+		break;
+
+	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+		break;
+
+	// no TM for the moment
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* For softpatch interrupt, go off and do TM instruction emulation */
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		WARN_ON(1);
+		/* kvmppc_tm_emul */
+		break;
+#endif
+
+	case BOOK3S_INTERRUPT_HV_DECREMENTER: {
+		s64 dec = mfspr(SPRN_HDEC);
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			dec = (s32)dec;
+		if (dec >= 0) { // XXX: could exit if a small amount remains?
+			/* See if this is a leftover HDEC interrupt */
+			// re-enter guest?
+		}
+		break;
+	}
+
+	// try_real_mode ? (no, should just go virt all the time)
+	//
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		ppc_msgsync();
+		if (vcpu->arch.nested)
+			goto out; // nested always exits
+		if (local_paca->kvm_hstate.host_ipi)
+			goto out;
+		// maybe_reenter_guest (always re-enter because no other threads
+		break;
+
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+		break;
+
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		WARN_ON_ONCE(1);
+		// kvmppc_guest_external
+		// let's assume xive_enabled() = always host pending interrupt
+		break;
+
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+		kvmppc_realmode_machine_check(vcpu);
+		/* all machine checks go to virtual mode for further handling */
+		break;
+
+	case BOOK3S_INTERRUPT_HMI:
+		kvmppc_realmode_hmi_handler();
+		break;
+	case BOOK3S_INTERRUPT_SYSCALL:
+		break;
+	case 0:
+		WARN_ON_ONCE(1);
+		/* nothing */
+		break;
+	default:
+		printk("unknown guest interrupt %x\n", trap);
+		break;
+	}
+
+out:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	// XXX: this could go after the xive pull?
+	kvmhv_accumulate_time(vcpu, &vcpu->arch.rm_exit);
+#endif
+
+	kvmppc_xive_pull_vcpu(vcpu);
+	// XXX flush link stack
+
+	return trap;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8144c1403203..aea43e06129f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -46,7 +46,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 /* Stack frame offsets for kvmppc_hv_entry */
 #define SFS			208
 #define STACK_SLOT_TRAP		(SFS-4)
-#define STACK_SLOT_SHORT_PATH	(SFS-8)
 #define STACK_SLOT_TID		(SFS-16)
 #define STACK_SLOT_PSSCR	(SFS-24)
 #define STACK_SLOT_PID		(SFS-32)
@@ -994,9 +993,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
-	li	r0, 0
-	stw	r0, STACK_SLOT_SHORT_PATH(r1)
-
 deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
 	/* Check if we can deliver an external or decrementer interrupt now */
 	ld	r0, VCPU_PENDING_EXC(r4)
@@ -1016,6 +1012,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtspr	SPRN_SRR0, r6
 	mtspr	SPRN_SRR1, r7
 
+	/* Activate guest mode, so faults get handled by KVM */
+	li	r9, KVM_GUEST_MODE_GUEST_HV
+	stb	r9, HSTATE_IN_GUEST(r13)
+
 fast_guest_entry_c:
 	ld	r10, VCPU_PC(r4)
 	ld	r11, VCPU_MSR(r4)
@@ -1042,10 +1042,6 @@ fast_guest_return:
 	mtspr	SPRN_HSRR0,r10
 	mtspr	SPRN_HSRR1,r11
 
-	/* Activate guest mode, so faults get handled by KVM */
-	li	r9, KVM_GUEST_MODE_GUEST_HV
-	stb	r9, HSTATE_IN_GUEST(r13)
-
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	/* Accumulate timing */
 	addi	r3, r4, VCPU_TB_GUEST
@@ -1121,97 +1117,6 @@ ret_to_ultra:
 	ld	r4, VCPU_GPR(R4)(r4)
 	sc	2
 
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -SFS(r1)
-
-	li	r0, 1
-	stw	r0, STACK_SLOT_SHORT_PATH(r1)
-
-	std	r3, HSTATE_KVM_VCPU(r13)
-	mfcr	r4
-	stw	r4, SFS+8(r1)
-
-	std	r1, HSTATE_HOST_R1(r13)
-
-	reg = 14
-	.rept	18
-	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
-	reg = reg + 1
-	.endr
-
-	reg = 14
-	.rept	18
-	ld	reg, __VCPU_GPR(reg)(r3)
-	reg = reg + 1
-	.endr
-
-	mfmsr	r10
-	std	r10, HSTATE_HOST_MSR(r13)
-
-	mr	r4, r3
-	b	fast_guest_entry_c
-guest_exit_short_path:
-	/*
-	 * Malicious or buggy radix guests may have inserted SLB entries
-	 * (only 0..3 because radix always runs with UPRT=1), so these must
-	 * be cleared here to avoid side-channels. slbmte is used rather
-	 * than slbia, as it won't clear cached translations.
-	 */
-	li	r0,0
-	slbmte	r0,r0
-	li	r4,1
-	slbmte	r0,r4
-	li	r4,2
-	slbmte	r0,r4
-	li	r4,3
-	slbmte	r0,r4
-
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	reg = 14
-	.rept	18
-	std	reg, __VCPU_GPR(reg)(r9)
-	reg = reg + 1
-	.endr
-
-	reg = 14
-	.rept	18
-	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
-	reg = reg + 1
-	.endr
-
-	lwz	r4, SFS+8(r1)
-	mtcr	r4
-
-	mr	r3, r12		/* trap number */
-
-	addi	r1, r1, SFS
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-
-	/* If we are in real mode, do a rfid to get back to the caller */
-	mfmsr	r4
-	andi.	r5, r4, MSR_IR
-	bnelr
-	rldicl	r5, r4, 64 - MSR_TS_S_LG, 62	/* extract TS field */
-	mtspr	SPRN_SRR0, r0
-	ld	r10, HSTATE_HOST_MSR(r13)
-	rldimi	r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	mtspr	SPRN_SRR1, r10
-	RFI_TO_KERNEL
-	b	.
-
 secondary_too_late:
 	li	r12, 0
 	stw	r12, STACK_SLOT_TRAP(r1)
@@ -1466,17 +1371,12 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 #endif /* CONFIG_KVM_XICS */
 
 	/*
-	 * Possibly flush the link stack here, before we do a blr in
-	 * guest_exit_short_path.
+	 * Possibly flush the link stack here.
+	 * XXX: flush the link stack in the fast guest exit?
 	 */
 1:	nop
 	patch_site 1b patch__call_kvm_flush_link_stack
 
-	/* If we came in through the P9 short path, go back out to C now */
-	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
-	cmpwi	r0, 0
-	bne	guest_exit_short_path
-
 	/* For hash guest, read the guest SLB and save it away */
 	ld	r5, VCPU_KVM(r9)
 	lbz	r0, KVM_RADIX(r5)
@@ -1520,8 +1420,13 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	.endr
 	b	guest_bypass
 
-0:	/* Sanitise radix guest SLB, see guest_exit_short_path comment. */
-	li	r0,0
+	/*
+	 * Malicious or buggy radix guests may have inserted SLB entries
+	 * (only 0..3 because radix always runs with UPRT=1), so these must
+	 * be cleared here to avoid side-channels. slbmte is used rather
+	 * than slbia, as it won't clear cached translations.
+	 */
+0:	li	r0,0
 	slbmte	r0,r0
 	li	r4,1
 	slbmte	r0,r4
@@ -3322,7 +3227,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_CIABR, r0
 	mtspr	SPRN_DAWRX0, r0
 
-	/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
+	/* Clear hash and radix guest SLB. */
 	slbmte	r0, r0
 	PPC_SLBIA(6)
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 30dfeac731c6..22a3be0aafa1 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -127,6 +127,38 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
 
+/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+	void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+	if (!vcpu->arch.xive_pushed)
+		return;
+
+	/*
+	 * Sould not have been pushed if there is no tima
+	 */
+	if (WARN_ON(!tima))
+		return
+
+	eieio();
+	/* First load to pull the context, we ignore the value */
+	__raw_readw(tima + TM_SPC_PULL_OS_CTX);
+	/* Second load to recover the context state (Words 0 and 1) */
+	vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+	/* Fixup some of the state for the next load */
+	vcpu->arch.xive_pushed = 0;
+	vcpu->arch.xive_saved_state.lsmfb = 0;
+	vcpu->arch.xive_saved_state.ack = 0xff;
+	eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+
 /*
  * This is a simple trigger for a generic XIVE IRQ. This must
  * only be called for interrupts that support a trigger page
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 8/9] KVM: PPC: Book3S HV: Minimise hcall handler calling convention differences
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 16 +++++++++++++++-
 arch/powerpc/kvm/book3s_64_entry.S   | 22 +++-------------------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index bed4499488b3..0844558f1d7c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1923,8 +1923,22 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 TRAMP_REAL_BEGIN(kvm_hcall)
+	std	r9,PACA_EXGEN+EX_R9(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfcr	r9
 	mfctr	r10
-	SET_SCRATCH0(r10) /* Save r13 in SCRATCH0 */
+	std	r10,PACA_EXGEN+EX_R13(r13)
+	li	r10,0
+	std	r10,PACA_EXGEN+EX_CFAR(r13)
+	std	r10,PACA_EXGEN+EX_CTR(r13)
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_PPR
+	std	r10,PACA_EXGEN+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	HMT_MEDIUM
+
 #ifdef CONFIG_RELOCATABLE
 	/*
 	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 0d9e1e55c24d..0e39267aef63 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -9,24 +9,9 @@
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
-	/*
-	 * This is a hcall, so register convention is as
-	 * Documentation/powerpc/papr_hcalls.rst, with these additions:
-	 * R13		= PACA
-	 * guest R13 saved in SPRN_SCRATCH0
-	 * R10		= free
-	 */
-BEGIN_FTR_SECTION
-	mfspr	r10,SPRN_PPR
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	HMT_MEDIUM
-	mfcr	r10
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r10,32
-	ori	r12,r12,0xc00
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	b	do_kvm_interrupt
+	ld	r10,PACA_EXGEN+EX_R13(r13)
+	SET_SCRATCH0(r10)
+	li	r10,0xc00
 
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
@@ -57,7 +42,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r10,EX_R10(r11)
 	ld	r11,EX_R11(r11)
 
-do_kvm_interrupt:
 	/*
 	 * Hcalls and other interrupts come here after normalising register
 	 * contents and save locations:
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 7/9] KVM: PPC: Book3S HV: move bad_host_intr check to HV handler
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

This is not used by PR KVM.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_64_entry.S      | 3 ---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +++-
 arch/powerpc/kvm/book3s_segment.S       | 7 +++++++
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 5db76c8d4012..0d9e1e55c24d 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -73,11 +73,8 @@ do_kvm_interrupt:
 	beq	maybe_skip
 no_skip:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
-	beq	kvmppc_bad_host_intr
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	cmpwi	r9,KVM_GUEST_MODE_GUEST
-	ld	r9,HSTATE_SCRATCH2(r13)
 	beq	kvmppc_interrupt_pr
 #endif
 	b	kvmppc_interrupt_hv
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b9c4acd747f7..8144c1403203 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1251,6 +1251,7 @@ hdec_soon:
 kvmppc_interrupt_hv:
 	/*
 	 * Register contents:
+	 * R9		= HSTATE_IN_GUEST
 	 * R12		= (guest CR << 32) | interrupt vector
 	 * R13		= PACA
 	 * guest R12 saved in shadow VCPU SCRATCH0
@@ -1258,6 +1259,8 @@ kvmppc_interrupt_hv:
 	 * guest R9 saved in HSTATE_SCRATCH2
 	 */
 	/* We're now back in the host but in guest MMU context */
+	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
 	li	r9, KVM_GUEST_MODE_HOST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
 
@@ -3245,7 +3248,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
  * cfar is saved in HSTATE_CFAR(r13)
  * ppr is saved in HSTATE_PPR(r13)
  */
-.global kvmppc_bad_host_intr
 kvmppc_bad_host_intr:
 	/*
 	 * Switch to the emergency stack, but start half-way down in
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1f492aa4c8d6..ef1d88b869bf 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -167,8 +167,15 @@ kvmppc_interrupt_pr:
 	 * R12             = (guest CR << 32) | exit handler id
 	 * R13             = PACA
 	 * HSTATE.SCRATCH0 = guest R12
+	 *
+	 * If HV is possible, additionally:
+	 * R9              = HSTATE_IN_GUEST
+	 * HSTATE.SCRATCH2 = guest R9
 	 */
 #ifdef CONFIG_PPC64
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ld	r9,HSTATE_SCRATCH2(r13)
+#endif
 	/* Match 32-bit entry */
 	rotldi	r12, r12, 32		  /* Flip R12 halves for stw */
 	stw	r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 6/9] KVM: PPC: Book3S HV: Move interrupt early register setup to KVM
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

Like the earlier patch for hcalls, KVM interrupt entry requires a
different calling convention than the Linux interrupt handlers
set up. Move the code that converts from one to the other into KVM.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 126 ++++-----------------------
 arch/powerpc/kvm/book3s_64_entry.S   |  37 ++++++--
 2 files changed, 50 insertions(+), 113 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c893801eb1d5..bed4499488b3 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -191,7 +191,6 @@ do_define_int n
 	.endif
 .endm
 
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 /*
  * All interrupts which set HSRR registers, as well as SRESET and MCE and
  * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -224,54 +223,25 @@ do_define_int n
  * to KVM to handle.
  */
 
-.macro KVMTEST name
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 	lbz	r10,HSTATE_IN_GUEST(r13)
 	cmpwi	r10,0
-	bne	\name\()_kvm
-.endm
-
-.macro GEN_KVM name
-	.balign IFETCH_ALIGN_BYTES
-\name\()_kvm:
-
-BEGIN_FTR_SECTION
-	ld	r10,IAREA+EX_CFAR(r13)
-	std	r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-
-	ld	r10,IAREA+EX_CTR(r13)
-	mtctr	r10
-BEGIN_FTR_SECTION
-	ld	r10,IAREA+EX_PPR(r13)
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	ld	r11,IAREA+EX_R11(r13)
-	ld	r12,IAREA+EX_R12(r13)
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r9,32
-	ld	r9,IAREA+EX_R9(r13)
-	ld	r10,IAREA+EX_R10(r13)
 	/* HSRR variants have the 0x2 bit added to their trap number */
 	.if IHSRR_IF_HVMODE
 	BEGIN_FTR_SECTION
-	ori	r12,r12,(IVEC + 0x2)
+	li	r10,(IVEC + 0x2)
 	FTR_SECTION_ELSE
-	ori	r12,r12,(IVEC)
+	li	r10,(IVEC)
 	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 	.elseif IHSRR
-	ori	r12,r12,(IVEC+ 0x2)
+	li	r10,(IVEC + 0x2)
 	.else
-	ori	r12,r12,(IVEC)
+	li	r10,(IVEC)
 	.endif
-	b	kvmppc_interrupt
-.endm
-
-#else
-.macro KVMTEST name
-.endm
-.macro GEN_KVM name
-.endm
+	bne	\handler
 #endif
+.endm
 
 /*
  * This is the BOOK3S interrupt entry code macro.
@@ -413,7 +383,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 DEFINE_FIXED_SYMBOL(\name\()_common_real)
 \name\()_common_real:
 	.if IKVM_REAL
-		KVMTEST \name
+		KVMTEST \name kvm_interrupt
 	.endif
 
 	ld	r10,PACAKMSR(r13)	/* get MSR value for kernel */
@@ -436,7 +406,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 \name\()_common_virt:
 	.if IKVM_VIRT
-		KVMTEST \name
+		KVMTEST \name kvm_interrupt
 1:
 	.endif
 	.endif /* IVIRT */
@@ -450,7 +420,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 DEFINE_FIXED_SYMBOL(\name\()_common_real)
 \name\()_common_real:
 	.if IKVM_REAL
-		KVMTEST \name
+		KVMTEST \name kvm_interrupt
 	.endif
 .endm
 
@@ -1011,8 +981,6 @@ EXC_COMMON_BEGIN(system_reset_common)
 	EXCEPTION_RESTORE_REGS
 	RFI_TO_USER_OR_KERNEL
 
-	GEN_KVM system_reset
-
 
 /**
  * Interrupt 0x200 - Machine Check Interrupt (MCE).
@@ -1196,7 +1164,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 	/*
 	 * Check if we are coming from guest. If yes, then run the normal
 	 * exception handler which will take the
-	 * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+	 * machine_check_kvm->kvm_interrupt branch to deliver the MC event
 	 * to guest.
 	 */
 	lbz	r11,HSTATE_IN_GUEST(r13)
@@ -1267,8 +1235,6 @@ EXC_COMMON_BEGIN(machine_check_common)
 	bl	machine_check_exception
 	b	interrupt_return
 
-	GEN_KVM machine_check
-
 
 #ifdef CONFIG_PPC_P7_NAP
 /*
@@ -1393,8 +1359,6 @@ MMU_FTR_SECTION_ELSE
 	b	handle_page_fault
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
-	GEN_KVM data_access
-
 
 /**
  * Interrupt 0x380 - Data Segment Interrupt (DSLB).
@@ -1449,8 +1413,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	bl	do_bad_slb_fault
 	b	interrupt_return
 
-	GEN_KVM data_access_slb
-
 
 /**
  * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
@@ -1489,8 +1451,6 @@ MMU_FTR_SECTION_ELSE
 	b	handle_page_fault
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
-	GEN_KVM instruction_access
-
 
 /**
  * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
@@ -1540,8 +1500,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	bl	do_bad_slb_fault
 	b	interrupt_return
 
-	GEN_KVM instruction_access_slb
-
 
 /**
  * Interrupt 0x500 - External Interrupt.
@@ -1588,8 +1546,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
 	bl	do_IRQ
 	b	interrupt_return
 
-	GEN_KVM hardware_interrupt
-
 
 /**
  * Interrupt 0x600 - Alignment Interrupt
@@ -1617,8 +1573,6 @@ EXC_COMMON_BEGIN(alignment_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM alignment
-
 
 /**
  * Interrupt 0x700 - Program Interrupt (program check).
@@ -1681,8 +1635,6 @@ EXC_COMMON_BEGIN(program_check_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM program_check
-
 
 /*
  * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
@@ -1735,8 +1687,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 	b	interrupt_return
 #endif
 
-	GEN_KVM fp_unavailable
-
 
 /**
  * Interrupt 0x900 - Decrementer Interrupt.
@@ -1777,8 +1727,6 @@ EXC_COMMON_BEGIN(decrementer_common)
 	bl	timer_interrupt
 	b	interrupt_return
 
-	GEN_KVM decrementer
-
 
 /**
  * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
@@ -1825,8 +1773,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
 	ld	r13,PACA_EXGEN+EX_R13(r13)
 	HRFI_TO_KERNEL
 
-	GEN_KVM hdecrementer
-
 
 /**
  * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
@@ -1868,8 +1814,6 @@ EXC_COMMON_BEGIN(doorbell_super_common)
 #endif
 	b	interrupt_return
 
-	GEN_KVM doorbell_super
-
 
 EXC_REAL_NONE(0xb00, 0x100)
 EXC_VIRT_NONE(0x4b00, 0x100)
@@ -1919,7 +1863,7 @@ INT_DEFINE_END(system_call)
 	GET_PACA(r13)
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	INTERRUPT_TO_KERNEL
-	KVMTEST system_call /* uses r10, branch to system_call_kvm */
+	KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
 	mfctr	r9
 #else
 	mr	r9,r13
@@ -1978,7 +1922,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
 EXC_VIRT_END(system_call, 0x4c00, 0x100)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(system_call_kvm)
+TRAMP_REAL_BEGIN(kvm_hcall)
 	mfctr	r10
 	SET_SCRATCH0(r10) /* Save r13 in SCRATCH0 */
 #ifdef CONFIG_RELOCATABLE
@@ -2018,8 +1962,6 @@ EXC_COMMON_BEGIN(single_step_common)
 	bl	single_step_exception
 	b	interrupt_return
 
-	GEN_KVM single_step
-
 
 /**
  * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
@@ -2060,8 +2002,6 @@ MMU_FTR_SECTION_ELSE
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
 	b       interrupt_return
 
-	GEN_KVM h_data_storage
-
 
 /**
  * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
@@ -2087,8 +2027,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
 	bl	unknown_exception
 	b	interrupt_return
 
-	GEN_KVM h_instr_storage
-
 
 /**
  * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
@@ -2113,8 +2051,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM emulation_assist
-
 
 /**
  * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
@@ -2187,8 +2123,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 	EXCEPTION_RESTORE_REGS hsrr=1
 	GEN_INT_ENTRY hmi_exception, virt=0
 
-	GEN_KVM hmi_exception_early
-
 EXC_COMMON_BEGIN(hmi_exception_common)
 	GEN_COMMON hmi_exception
 	FINISH_NAP
@@ -2197,8 +2131,6 @@ EXC_COMMON_BEGIN(hmi_exception_common)
 	bl	handle_hmi_exception
 	b	interrupt_return
 
-	GEN_KVM hmi_exception
-
 
 /**
  * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
@@ -2231,8 +2163,6 @@ EXC_COMMON_BEGIN(h_doorbell_common)
 #endif
 	b	interrupt_return
 
-	GEN_KVM h_doorbell
-
 
 /**
  * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
@@ -2261,8 +2191,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
 	bl	do_IRQ
 	b	interrupt_return
 
-	GEN_KVM h_virt_irq
-
 
 EXC_REAL_NONE(0xec0, 0x20)
 EXC_VIRT_NONE(0x4ec0, 0x20)
@@ -2308,8 +2236,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
 	bl	performance_monitor_exception
 	b	interrupt_return
 
-	GEN_KVM performance_monitor
-
 
 /**
  * Interrupt 0xf20 - Vector Unavailable Interrupt.
@@ -2362,8 +2288,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	bl	altivec_unavailable_exception
 	b	interrupt_return
 
-	GEN_KVM altivec_unavailable
-
 
 /**
  * Interrupt 0xf40 - VSX Unavailable Interrupt.
@@ -2415,8 +2339,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	bl	vsx_unavailable_exception
 	b	interrupt_return
 
-	GEN_KVM vsx_unavailable
-
 
 /**
  * Interrupt 0xf60 - Facility Unavailable Interrupt.
@@ -2445,8 +2367,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM facility_unavailable
-
 
 /**
  * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
@@ -2475,8 +2395,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
 	REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
 	b	interrupt_return
 
-	GEN_KVM h_facility_unavailable
-
 
 EXC_REAL_NONE(0xfa0, 0x20)
 EXC_VIRT_NONE(0x4fa0, 0x20)
@@ -2507,8 +2425,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
 	bl	cbe_system_error_exception
 	b	interrupt_return
 
-	GEN_KVM cbe_system_error
-
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1200, 0x100)
 EXC_VIRT_NONE(0x5200, 0x100)
@@ -2534,8 +2450,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
 	bl	instruction_breakpoint_exception
 	b	interrupt_return
 
-	GEN_KVM instruction_breakpoint
-
 
 EXC_REAL_NONE(0x1400, 0x100)
 EXC_VIRT_NONE(0x5400, 0x100)
@@ -2656,8 +2570,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
 	bl	unknown_exception
 	b	interrupt_return
 
-	GEN_KVM denorm_exception
-
 
 #ifdef CONFIG_CBE_RAS
 INT_DEFINE_BEGIN(cbe_maintenance)
@@ -2676,8 +2588,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
 	bl	cbe_maintenance_exception
 	b	interrupt_return
 
-	GEN_KVM cbe_maintenance
-
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1600, 0x100)
 EXC_VIRT_NONE(0x5600, 0x100)
@@ -2708,8 +2618,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 #endif
 	b	interrupt_return
 
-	GEN_KVM altivec_assist
-
 
 #ifdef CONFIG_CBE_RAS
 INT_DEFINE_BEGIN(cbe_thermal)
@@ -2728,8 +2636,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
 	bl	cbe_thermal_exception
 	b	interrupt_return
 
-	GEN_KVM cbe_thermal
-
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1800, 0x100)
 EXC_VIRT_NONE(0x5800, 0x100)
@@ -2983,6 +2889,10 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
 
 USE_TEXT_SECTION()
 
+	/* conditional branch in KVMTEST can't reach all the way, make a stub */
+kvm_interrupt:
+	b	kvmppc_interrupt
+
 _GLOBAL(do_uaccess_flush)
 	UACCESS_FLUSH_FIXUP_SECTION
 	nop
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index df074d33f6d7..5db76c8d4012 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -6,9 +6,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/reg.h>
 
-/*
- * We come here from the first-level interrupt handlers.
- */
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
@@ -29,15 +26,45 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	sldi	r12,r10,32
 	ori	r12,r12,0xc00
 	ld	r10,PACA_EXGEN+EX_R10(r13)
+	b	do_kvm_interrupt
 
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
 kvmppc_interrupt:
+	li	r11,PACA_EXGEN
+	cmpdi	r10,0x200
+	bgt+	1f
+	li	r11,PACA_EXMC
+	beq	1f
+	li	r11,PACA_EXNMI
+1:	add	r11,r11,r13
+
+BEGIN_FTR_SECTION
+	ld	r12,EX_CFAR(r11)
+	std	r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	ld	r12,EX_CTR(r11)
+	mtctr	r12
+BEGIN_FTR_SECTION
+	ld	r12,EX_PPR(r11)
+	std	r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	ld	r12,EX_R12(r11)
+	std	r12,HSTATE_SCRATCH0(r13)
+	sldi	r12,r9,32
+	or	r12,r12,r10
+	ld	r9,EX_R9(r11)
+	ld	r10,EX_R10(r11)
+	ld	r11,EX_R11(r11)
+
+do_kvm_interrupt:
 	/*
-	 * Register contents:
+	 * Hcalls and other interrupts come here after normalising register
+	 * contents and save locations:
+	 *
 	 * R12		= (guest CR << 32) | interrupt vector
 	 * R13		= PACA
-	 * guest R12 saved in shadow VCPU SCRATCH0
+	 * guest R12 saved in shadow HSTATE_SCRATCH0
 	 * guest R13 saved in SPRN_SCRATCH0
 	 */
 	std	r9,HSTATE_SCRATCH2(r13)
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 5/9] powerpc/64s: Remove EXSLB interrupt save area
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

SLB faults should not be taken while the PACA save areas are live, so
EXSLB is not be required.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/paca.h      | 3 +--
 arch/powerpc/kernel/asm-offsets.c    | 1 -
 arch/powerpc/kernel/exceptions-64s.S | 5 -----
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 9454d29ff4b4..be0b00cb9fbb 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -108,8 +108,7 @@ struct paca_struct {
 	 */
 	/* used for most interrupts/exceptions */
 	u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
-	u64 exslb[EX_SIZE];	/* used for SLB/segment table misses
- 				 * on the linear mapping */
+
 	/* SLB related definitions */
 	u16 vmalloc_sllp;
 	u8 slb_cache_ptr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 489a22cf1a92..51302fb74d14 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -255,7 +255,6 @@ int main(void)
 #endif /* CONFIG_PPC_MM_SLICES */
 	OFFSET(PACA_EXGEN, paca_struct, exgen);
 	OFFSET(PACA_EXMC, paca_struct, exmc);
-	OFFSET(PACA_EXSLB, paca_struct, exslb);
 	OFFSET(PACA_EXNMI, paca_struct, exnmi);
 #ifdef CONFIG_PPC_PSERIES
 	OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e964ea5149e8..c893801eb1d5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1412,13 +1412,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  *   on user-handler data structures.
  *
  *   KVM: Same as 0x300, DSLB must test for KVM guest.
- *
- * A dedicated save area EXSLB is used (XXX: but it actually need not be
- * these days, we could use EXGEN).
  */
 INT_DEFINE_BEGIN(data_access_slb)
 	IVEC=0x380
-	IAREA=PACA_EXSLB
 	IRECONCILE=0
 	IDAR=1
 	IKVM_REAL=1
@@ -1507,7 +1503,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  */
 INT_DEFINE_BEGIN(instruction_access_slb)
 	IVEC=0x480
-	IAREA=PACA_EXSLB
 	IRECONCILE=0
 	IISIDE=1
 	IDAR=1
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 4/9] KVM: PPC: Book3S HV: Move hcall early register setup to KVM
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

System calls / hcalls have a different calling convention than
other interrupts, so there is code in the KVMTEST to massage these
into the same form as other interrupt handlers.

Move this work into the KVM hcall handler. This means teaching KVM
a little more about the low level interrupt handler setup, PACA save
areas, etc., although that's not obviously worse than the current
approach of coming up with an entirely different interrupt register
/ save convention.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/exception-64s.h | 13 +++++++
 arch/powerpc/kernel/exceptions-64s.S     | 44 ++----------------------
 arch/powerpc/kvm/book3s_64_entry.S       | 17 +++++++++
 3 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 1d32b174ab6a..7e5168b83155 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -35,6 +35,19 @@
 /* PACA save area size in u64 units (exgen, exmc, etc) */
 #define EX_SIZE		10
 
+/* PACA save area offsets */
+#define EX_R9		0
+#define EX_R10		8
+#define EX_R11		16
+#define EX_R12		24
+#define EX_R13		32
+#define EX_DAR		40
+#define EX_DSISR	48
+#define EX_CCR		52
+#define EX_CFAR		56
+#define EX_PPR		64
+#define EX_CTR		72
+
 /*
  * maximum recursive depth of MCE exceptions
  */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c25395b5921a..e964ea5149e8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,22 +21,6 @@
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
 
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9		0
-#define EX_R10		8
-#define EX_R11		16
-#define EX_R12		24
-#define EX_R13		32
-#define EX_DAR		40
-#define EX_DSISR	48
-#define EX_CCR		52
-#define EX_CFAR		56
-#define EX_PPR		64
-#define EX_CTR		72
-.if EX_SIZE != 10
-	.error "EX_SIZE is wrong"
-.endif
-
 /*
  * Following are fixed section helper macros.
  *
@@ -2000,45 +1984,21 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 TRAMP_REAL_BEGIN(system_call_kvm)
-	/*
-	 * This is a hcall, so register convention is as above, with these
-	 * differences:
-	 * r13 = PACA
-	 * ctr = orig r13
-	 * orig r10 saved in PACA
-	 */
-	 /*
-	  * Save the PPR (on systems that support it) before changing to
-	  * HMT_MEDIUM. That allows the KVM code to save that value into the
-	  * guest state (it is the guest's PPR value).
-	  */
-BEGIN_FTR_SECTION
-	mfspr	r10,SPRN_PPR
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	HMT_MEDIUM
 	mfctr	r10
-	SET_SCRATCH0(r10)
-	mfcr	r10
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r10,32
-	ori	r12,r12,0xc00
+	SET_SCRATCH0(r10) /* Save r13 in SCRATCH0 */
 #ifdef CONFIG_RELOCATABLE
 	/*
-	 * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
 	 * outside the head section.
 	 */
 	__LOAD_FAR_HANDLER(r10, kvmppc_hcall)
 	mtctr   r10
-	ld	r10,PACA_EXGEN+EX_R10(r13)
 	bctr
 #else
-	ld	r10,PACA_EXGEN+EX_R10(r13)
 	b       kvmppc_hcall
 #endif
 #endif
 
-
 /**
  * Interrupt 0xd00 - Trace Interrupt.
  * This is a synchronous interrupt in response to instruction step or
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 3b894b90862f..df074d33f6d7 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -12,6 +12,23 @@
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
+	/*
+	 * This is a hcall, so register convention is as
+	 * Documentation/powerpc/papr_hcalls.rst, with these additions:
+	 * R13		= PACA
+	 * guest R13 saved in SPRN_SCRATCH0
+	 * R10		= free
+	 */
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_PPR
+	std	r10,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	HMT_MEDIUM
+	mfcr	r10
+	std	r12,HSTATE_SCRATCH0(r13)
+	sldi	r12,r10,32
+	ori	r12,r12,0xc00
+	ld	r10,PACA_EXGEN+EX_R10(r13)
 
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 3/9] KVM: PPC: Book3S 64: add hcall interrupt handler
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

Add a separate hcall entry point. This can be used to deal with the
different calling convention.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 4 ++--
 arch/powerpc/kvm/book3s_64_entry.S   | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e6f7fc7c61a1..c25395b5921a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2028,13 +2028,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	 * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
 	 * outside the head section.
 	 */
-	__LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+	__LOAD_FAR_HANDLER(r10, kvmppc_hcall)
 	mtctr   r10
 	ld	r10,PACA_EXGEN+EX_R10(r13)
 	bctr
 #else
 	ld	r10,PACA_EXGEN+EX_R10(r13)
-	b       kvmppc_interrupt
+	b       kvmppc_hcall
 #endif
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 8e7216f3c3ee..3b894b90862f 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -9,6 +9,10 @@
 /*
  * We come here from the first-level interrupt handlers.
  */
+.global	kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
 kvmppc_interrupt:
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 2/9] KVM: PPC: Book3S 64: Move GUEST_MODE_SKIP test into KVM
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

Move the GUEST_MODE_SKIP logic into KVM code. This is quite a KVM
internal detail that has no real need to be in common handlers.

(XXX: Need to confirm CBE handlers etc)

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 64 ----------------------------
 arch/powerpc/kvm/book3s_64_entry.S   | 50 ++++++++++++++++++----
 2 files changed, 42 insertions(+), 72 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 65659ea3cec4..e6f7fc7c61a1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -133,7 +133,6 @@ name:
 #define IBRANCH_TO_COMMON	.L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
 #define IREALMODE_COMMON	.L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
 #define IMASK		.L_IMASK_\name\()	/* IRQ soft-mask bit */
-#define IKVM_SKIP	.L_IKVM_SKIP_\name\()	/* Generate KVM skip handler */
 #define IKVM_REAL	.L_IKVM_REAL_\name\()	/* Real entry tests KVM */
 #define __IKVM_REAL(name)	.L_IKVM_REAL_ ## name
 #define IKVM_VIRT	.L_IKVM_VIRT_\name\()	/* Virt entry tests KVM */
@@ -191,9 +190,6 @@ do_define_int n
 	.ifndef IMASK
 		IMASK=0
 	.endif
-	.ifndef IKVM_SKIP
-		IKVM_SKIP=0
-	.endif
 	.ifndef IKVM_REAL
 		IKVM_REAL=0
 	.endif
@@ -254,15 +250,10 @@ do_define_int n
 	.balign IFETCH_ALIGN_BYTES
 \name\()_kvm:
 
-	.if IKVM_SKIP
-	cmpwi	r10,KVM_GUEST_MODE_SKIP
-	beq	89f
-	.else
 BEGIN_FTR_SECTION
 	ld	r10,IAREA+EX_CFAR(r13)
 	std	r10,HSTATE_CFAR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-	.endif
 
 	ld	r10,IAREA+EX_CTR(r13)
 	mtctr	r10
@@ -289,27 +280,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ori	r12,r12,(IVEC)
 	.endif
 	b	kvmppc_interrupt
-
-	.if IKVM_SKIP
-89:	mtocrf	0x80,r9
-	ld	r10,IAREA+EX_CTR(r13)
-	mtctr	r10
-	ld	r9,IAREA+EX_R9(r13)
-	ld	r10,IAREA+EX_R10(r13)
-	ld	r11,IAREA+EX_R11(r13)
-	ld	r12,IAREA+EX_R12(r13)
-	.if IHSRR_IF_HVMODE
-	BEGIN_FTR_SECTION
-	b	kvmppc_skip_Hinterrupt
-	FTR_SECTION_ELSE
-	b	kvmppc_skip_interrupt
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif IHSRR
-	b	kvmppc_skip_Hinterrupt
-	.else
-	b	kvmppc_skip_interrupt
-	.endif
-	.endif
 .endm
 
 #else
@@ -1128,7 +1098,6 @@ INT_DEFINE_BEGIN(machine_check)
 	ISET_RI=0
 	IDAR=1
 	IDSISR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(machine_check)
 
@@ -1419,7 +1388,6 @@ INT_DEFINE_BEGIN(data_access)
 	IVEC=0x300
 	IDAR=1
 	IDSISR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(data_access)
 
@@ -1469,7 +1437,6 @@ INT_DEFINE_BEGIN(data_access_slb)
 	IAREA=PACA_EXSLB
 	IRECONCILE=0
 	IDAR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(data_access_slb)
 
@@ -2116,7 +2083,6 @@ INT_DEFINE_BEGIN(h_data_storage)
 	IHSRR=1
 	IDAR=1
 	IDSISR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 	IKVM_VIRT=1
 INT_DEFINE_END(h_data_storage)
@@ -2573,7 +2539,6 @@ EXC_VIRT_NONE(0x5100, 0x100)
 INT_DEFINE_BEGIN(cbe_system_error)
 	IVEC=0x1200
 	IHSRR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(cbe_system_error)
 
@@ -2598,7 +2563,6 @@ EXC_VIRT_NONE(0x5200, 0x100)
 INT_DEFINE_BEGIN(instruction_breakpoint)
 	IVEC=0x1300
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	IKVM_SKIP=1
 	IKVM_REAL=1
 #endif
 INT_DEFINE_END(instruction_breakpoint)
@@ -2744,7 +2708,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
 INT_DEFINE_BEGIN(cbe_maintenance)
 	IVEC=0x1600
 	IHSRR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(cbe_maintenance)
 
@@ -2797,7 +2760,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 INT_DEFINE_BEGIN(cbe_thermal)
 	IVEC=0x1800
 	IHSRR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(cbe_thermal)
 
@@ -3081,32 +3043,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
 MASKED_INTERRUPT
 MASKED_INTERRUPT hsrr=1
 
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
-	/*
-	 * Here all GPRs are unchanged from when the interrupt happened
-	 * except for r13, which is saved in SPRG_SCRATCH0.
-	 */
-	mfspr	r13, SPRN_SRR0
-	addi	r13, r13, 4
-	mtspr	SPRN_SRR0, r13
-	GET_SCRATCH0(r13)
-	RFI_TO_KERNEL
-	b	.
-
-kvmppc_skip_Hinterrupt:
-	/*
-	 * Here all GPRs are unchanged from when the interrupt happened
-	 * except for r13, which is saved in SPRG_SCRATCH0.
-	 */
-	mfspr	r13, SPRN_HSRR0
-	addi	r13, r13, 4
-	mtspr	SPRN_HSRR0, r13
-	GET_SCRATCH0(r13)
-	HRFI_TO_KERNEL
-	b	.
-#endif
-
 	/*
 	 * Relocation-on interrupts: A subset of the interrupts can be delivered
 	 * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 22e34b95f478..8e7216f3c3ee 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,9 +1,10 @@
+#include <asm/asm-offsets.h>
 #include <asm/cache.h>
-#include <asm/ppc_asm.h>
+#include <asm/exception-64s.h>
 #include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/asm-offsets.h>
 #include <asm/kvm_book3s_asm.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
 
 /*
  * We come here from the first-level interrupt handlers.
@@ -18,17 +19,50 @@ kvmppc_interrupt:
 	 * guest R12 saved in shadow VCPU SCRATCH0
 	 * guest R13 saved in SPRN_SCRATCH0
 	 */
+	std	r9,HSTATE_SCRATCH2(r13)
+	lbz	r9,HSTATE_IN_GUEST(r13)
+	cmpwi	r9,KVM_GUEST_MODE_SKIP
+	beq	maybe_skip
+no_skip:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	std	r9, HSTATE_SCRATCH2(r13)
-	lbz	r9, HSTATE_IN_GUEST(r13)
-	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
+	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
 	beq	kvmppc_bad_host_intr
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	cmpwi	r9, KVM_GUEST_MODE_GUEST
-	ld	r9, HSTATE_SCRATCH2(r13)
+	cmpwi	r9,KVM_GUEST_MODE_GUEST
+	ld	r9,HSTATE_SCRATCH2(r13)
 	beq	kvmppc_interrupt_pr
 #endif
 	b	kvmppc_interrupt_hv
 #else
 	b	kvmppc_interrupt_pr
 #endif
+
+maybe_skip:
+	cmpwi	r12,0x200
+	beq	1f
+	cmpwi	r12,0x300
+	beq	1f
+	cmpwi	r12,0x380
+	beq	1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* XXX: cbe stuff? instruction breakpoint? */
+	cmpwi	r12,0xe02
+	beq	2f
+#endif
+	b	no_skip
+1:	mfspr	r9,SPRN_SRR0
+	addi	r9,r9,4
+	mtspr	SPRN_SRR0,r9
+	ld	r12,HSTATE_SCRATCH0(r13)
+	ld	r9,HSTATE_SCRATCH2(r13)
+	GET_SCRATCH0(r13)
+	RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2:	mfspr	r9,SPRN_HSRR0
+	addi	r9,r9,4
+	mtspr	SPRN_HSRR0,r9
+	ld	r12,HSTATE_SCRATCH0(r13)
+	ld	r9,HSTATE_SCRATCH2(r13)
+	GET_SCRATCH0(r13)
+	HRFI_TO_KERNEL
+#endif
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 1/9] KVM: PPC: Book3S 64: move KVM interrupt entry to a common entry point
From: Nicholas Piggin @ 2021-02-02  3:03 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210202030313.3509446-1-npiggin@gmail.com>

Rather than bifurcate the call depending on whether or not HV is
possible, and have the HV entry test for PR, just make a single
common point which does the demultiplexing. This makes it simpler
to add another type of exit handler.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S    |  8 +-----
 arch/powerpc/kvm/Makefile               |  3 +++
 arch/powerpc/kvm/book3s_64_entry.S      | 34 +++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 ++------
 4 files changed, 40 insertions(+), 16 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_64_entry.S

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e02ad6fefa46..65659ea3cec4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -212,7 +212,6 @@ do_define_int n
 .endm
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * All interrupts which set HSRR registers, as well as SRESET and MCE and
  * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -242,13 +241,8 @@ do_define_int n
 
 /*
  * If an interrupt is taken while a guest is running, it is immediately routed
- * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
- * to kvmppc_interrupt_hv, which handles the PR guest case.
+ * to KVM to handle.
  */
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
 
 .macro KVMTEST name
 	lbz	r10,HSTATE_IN_GUEST(r13)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2bfeaa13befb..cdd119028f64 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -59,6 +59,9 @@ kvm-pr-y := \
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	tm.o
 
+kvm-book3s_64-builtin-objs-y += \
+	book3s_64_entry.o
+
 ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_rmhandlers.o
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 000000000000..22e34b95f478
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,34 @@
+#include <asm/cache.h>
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm_book3s_asm.h>
+
+/*
+ * We come here from the first-level interrupt handlers.
+ */
+.global	kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+	/*
+	 * Register contents:
+	 * R12		= (guest CR << 32) | interrupt vector
+	 * R13		= PACA
+	 * guest R12 saved in shadow VCPU SCRATCH0
+	 * guest R13 saved in SPRN_SCRATCH0
+	 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	std	r9, HSTATE_SCRATCH2(r13)
+	lbz	r9, HSTATE_IN_GUEST(r13)
+	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	cmpwi	r9, KVM_GUEST_MODE_GUEST
+	ld	r9, HSTATE_SCRATCH2(r13)
+	beq	kvmppc_interrupt_pr
+#endif
+	b	kvmppc_interrupt_hv
+#else
+	b	kvmppc_interrupt_pr
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8cf1f69f442e..b9c4acd747f7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1255,16 +1255,8 @@ kvmppc_interrupt_hv:
 	 * R13		= PACA
 	 * guest R12 saved in shadow VCPU SCRATCH0
 	 * guest R13 saved in SPRN_SCRATCH0
+	 * guest R9 saved in HSTATE_SCRATCH2
 	 */
-	std	r9, HSTATE_SCRATCH2(r13)
-	lbz	r9, HSTATE_IN_GUEST(r13)
-	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
-	beq	kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	cmpwi	r9, KVM_GUEST_MODE_GUEST
-	ld	r9, HSTATE_SCRATCH2(r13)
-	beq	kvmppc_interrupt_pr
-#endif
 	/* We're now back in the host but in guest MMU context */
 	li	r9, KVM_GUEST_MODE_HOST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
@@ -3253,6 +3245,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
  * cfar is saved in HSTATE_CFAR(r13)
  * ppr is saved in HSTATE_PPR(r13)
  */
+.global kvmppc_bad_host_intr
 kvmppc_bad_host_intr:
 	/*
 	 * Switch to the emergency stack, but start half-way down in
-- 
2.23.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox