LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v8 4/6] KVM: PPC: Book3S HV: Nested support in H_RPT_INVALIDATE
From: Bharata B Rao @ 2021-06-22  7:12 UTC (permalink / raw)
  To: Nathan Chancellor, kvm-ppc, linuxppc-dev, farosas, aneesh.kumar,
	npiggin, david
In-Reply-To: <YNFom3Ojb4TGsKj2@in.ibm.com>

On Tue, Jun 22, 2021 at 10:05:45AM +0530, Bharata B Rao wrote:
> On Mon, Jun 21, 2021 at 10:12:42AM -0700, Nathan Chancellor wrote:
> > I have not seen this reported yet so apologies if it has and there is a
> > fix I am missing:
> > 
> > arch/powerpc/kvm/book3s_hv_nested.c:1334:11: error: variable 'ap' is uninitialized when used here [-Werror,-Wuninitialized]
> >                                                            ap, start, end);
> >                                                            ^~
> > arch/powerpc/kvm/book3s_hv_nested.c:1276:25: note: initialize the variable 'ap' to silence this warning
> >         unsigned long psize, ap;
> >                                ^
> >                                 = 0
> 
> Thanks for catching this, this wasn't caught in my environment.
> 
> I will repost the series with proper initialization to ap.

Michael,

Here is the fix for this on top of powerpc/next. If it is easier
and cleaner to fold this into the original series and re-post
the whole series against any updated tree, let me know.


From 2e7198e28c0d1137f3230d4645e9cfddaccf4987 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.ibm.com>
Date: Tue, 22 Jun 2021 12:07:01 +0530
Subject: [PATCH 1/1] KVM: PPC: Book3S HV: Use proper ap value in
 H_RPT_INVALIDATE

The ap value that is used when performing range based partition
scoped invalidations for the nested guests wasn't initialized
correctly.

Fix this and while we are here, reorganize the routine that does
this invalidation for better readability.

Fixes: 0e67d866cb32 ("KVM: PPC: Book3S HV: Nested support in H_RPT_INVALIDATE")
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_hv_nested.c | 90 +++++++++++++----------------
 1 file changed, 40 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index d78efb5f5bb3..3a06ac0b53e2 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1222,27 +1222,6 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
 	return H_SUCCESS;
 }
 
-static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
-					 unsigned long lpid,
-					 unsigned long page_size,
-					 unsigned long ap,
-					 unsigned long start,
-					 unsigned long end)
-{
-	unsigned long addr = start;
-	int ret;
-
-	do {
-		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
-						   get_epn(addr));
-		if (ret)
-			return ret;
-		addr += page_size;
-	} while (addr < end);
-
-	return ret;
-}
-
 static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
 					 unsigned long lpid, unsigned long ric)
 {
@@ -1263,6 +1242,42 @@ static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
  */
 static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
 
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+					 unsigned long lpid,
+					 unsigned long pg_sizes,
+					 unsigned long start,
+					 unsigned long end)
+{
+	int ret = H_P4;
+	unsigned long addr, nr_pages;
+	struct mmu_psize_def *def;
+	unsigned long psize, ap, page_size;
+	bool flush_lpid;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		def = &mmu_psize_defs[psize];
+		if (!(pg_sizes & def->h_rpt_pgsize))
+			continue;
+
+		nr_pages = (end - start) >> def->shift;
+		flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+		if (flush_lpid)
+			return do_tlb_invalidate_nested_all(vcpu, lpid,
+							RIC_FLUSH_TLB);
+		addr = start;
+		ap = mmu_get_ap(psize);
+		page_size = 1UL << def->shift;
+		do {
+			ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+						   get_epn(addr));
+			if (ret)
+				return H_P4;
+			addr += page_size;
+		} while (addr < end);
+	}
+	return ret;
+}
+
 /*
  * Performs partition-scoped invalidations for nested guests
  * as part of H_RPT_INVALIDATE hcall.
@@ -1271,10 +1286,6 @@ long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
 			     unsigned long type, unsigned long pg_sizes,
 			     unsigned long start, unsigned long end)
 {
-	struct kvm_nested_guest *gp;
-	long ret;
-	unsigned long psize, ap;
-
 	/*
 	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
 	 *
@@ -1284,8 +1295,7 @@ long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
 	 * H_ENTER_NESTED call. Since we can't differentiate this case from
 	 * the invalid case, we ignore such flush requests and return success.
 	 */
-	gp = kvmhv_find_nested(vcpu->kvm, lpid);
-	if (!gp)
+	if (!kvmhv_find_nested(vcpu->kvm, lpid))
 		return H_SUCCESS;
 
 	/*
@@ -1313,29 +1323,9 @@ long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
 	if (start == 0 && end == -1)
 		return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
 
-	if (type & H_RPTI_TYPE_TLB) {
-		struct mmu_psize_def *def;
-		bool flush_lpid;
-		unsigned long nr_pages;
-
-		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
-			def = &mmu_psize_defs[psize];
-			if (!(pg_sizes & def->h_rpt_pgsize))
-				continue;
-
-			nr_pages = (end - start) >> def->shift;
-			flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
-			if (flush_lpid)
-				return do_tlb_invalidate_nested_all(vcpu, lpid,
-								RIC_FLUSH_TLB);
-
-			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
-							   (1UL << def->shift),
-							   ap, start, end);
-			if (ret)
-				return H_P4;
-		}
-	}
+	if (type & H_RPTI_TYPE_TLB)
+		return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+						    start, end);
 	return H_SUCCESS;
 }
 
-- 
2.31.1


^ permalink raw reply related

* [powerpc][next-20210621] WARNING at kernel/sched/fair.c:3277 during boot
From: Sachin Sant @ 2021-06-22  7:39 UTC (permalink / raw)
  To: open list, linux-next; +Cc: Vincent Guittot, linuxppc-dev, Odin Ugedal

While booting 5.13.0-rc7-next-20210621 on a PowerVM LPAR following warning
is seen

[   30.922154] ------------[ cut here ]------------
[   30.922201] cfs_rq->avg.load_avg || cfs_rq->avg.util_avg || cfs_rq->avg.runnable_avg
[   30.922219] WARNING: CPU: 6 PID: 762 at kernel/sched/fair.c:3277 update_blocked_averages+0x758/0x780
[   30.922259] Modules linked in: pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel ip_tables sd_mod t10_pi sg fuse
[   30.922309] CPU: 6 PID: 762 Comm: augenrules Not tainted 5.13.0-rc7-next-20210621 #1
[   30.922329] NIP:  c0000000001b27e8 LR: c0000000001b27e4 CTR: c0000000007cfda0
[   30.922344] REGS: c000000023fcb660 TRAP: 0700   Not tainted  (5.13.0-rc7-next-20210621)
[   30.922359] MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48488224  XER: 00000005
[   30.922394] CFAR: c00000000014d120 IRQMASK: 1 
               GPR00: c0000000001b27e4 c000000023fcb900 c000000002a08400 0000000000000048 
               GPR04: 00000000ffff7fff c000000023fcb5c0 0000000000000027 c000000f6fdd7e18 
               GPR08: 0000000000000023 0000000000000001 0000000000000027 c0000000028a6650 
               GPR12: 0000000000008000 c000000f6fff7680 c000000f6fe62600 0000000000000032 
               GPR16: 00000007331a989a c000000f6fe62600 c0000000238a6800 0000000000000001 
               GPR20: 0000000000000000 c000000002a4dfe0 0000000000000000 0000000000000006 
               GPR24: 0000000000000000 c000000f6fe63010 0000000000000001 c000000f6fe62680 
               GPR28: 0000000000000006 c0000000238a69c0 0000000000000000 c000000f6fe62600 
[   30.922569] NIP [c0000000001b27e8] update_blocked_averages+0x758/0x780
[   30.922599] LR [c0000000001b27e4] update_blocked_averages+0x754/0x780
[   30.922624] Call Trace:
[   30.922631] [c000000023fcb900] [c0000000001b27e4] update_blocked_averages+0x754/0x780 (unreliable)
[   30.922653] [c000000023fcba20] [c0000000001bd668] newidle_balance+0x258/0x5c0
[   30.922674] [c000000023fcbab0] [c0000000001bdaac] pick_next_task_fair+0x7c/0x4d0
[   30.922692] [c000000023fcbb10] [c000000000dcd31c] __schedule+0x15c/0x1780
[   30.922708] [c000000023fcbc50] [c0000000001a5a04] do_task_dead+0x64/0x70
[   30.922726] [c000000023fcbc80] [c000000000156338] do_exit+0x848/0xcc0
[   30.922743] [c000000023fcbd50] [c000000000156884] do_group_exit+0x64/0xe0
[   30.922758] [c000000023fcbd90] [c000000000156924] sys_exit_group+0x24/0x30
[   30.922774] [c000000023fcbdb0] [c0000000000310c0] system_call_exception+0x150/0x2d0
[   30.922792] [c000000023fcbe10] [c00000000000cc5c] system_call_common+0xec/0x278
[   30.922808] --- interrupt: c00 at 0x7fffb3acddcc
[   30.922821] NIP:  00007fffb3acddcc LR: 00007fffb3a27f04 CTR: 0000000000000000
[   30.922833] REGS: c000000023fcbe80 TRAP: 0c00   Not tainted  (5.13.0-rc7-next-20210621)
[   30.922847] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28444202  XER: 00000000
[   30.922882] IRQMASK: 0 
               GPR00: 00000000000000ea 00007fffc8f21780 00007fffb3bf7100 0000000000000000 
               GPR04: 0000000000000000 0000000155f142f0 0000000000000000 00007fffb3d23740 
               GPR08: fffffffffbad2a87 0000000000000000 0000000000000000 0000000000000000 
               GPR12: 0000000000000000 00007fffb3d2aeb0 0000000116be95e0 0000000000000032 
               GPR16: 0000000000000000 00007fffc8f21cd8 000000000000002d 0000000000000024 
               GPR20: 00007fffc8f21cd4 00007fffb3bf4f98 0000000000000001 0000000000000001 
               GPR24: 00007fffb3bf0950 0000000000000000 0000000000000000 0000000000000001 
               GPR28: 0000000000000000 0000000000000000 00007fffb3d23ec0 0000000000000000 
[   30.923023] NIP [00007fffb3acddcc] 0x7fffb3acddcc
[   30.923035] LR [00007fffb3a27f04] 0x7fffb3a27f04
[   30.923045] --- interrupt: c00
[   30.923052] Instruction dump:
[   30.923061] 3863be48 9be97ae6 4bf9a8f9 60000000 0fe00000 4bfff980 e9210070 e8610088 
[   30.923088] 39400001 99490003 4bf9a8d9 60000000 <0fe00000> 4bfffc24 3d22fff5 89297ae3 
[   30.923113] ---[ end trace ed07974d2149c499 ]—

This warning was introduced with commit 9e077b52d86a
sched/pelt: Check that *_avg are null when *_sum are

next-20210618 was good.

Thanks
-Sachin

^ permalink raw reply

* Re: [powerpc][next-20210621] WARNING at kernel/sched/fair.c:3277 during boot
From: Vincent Guittot @ 2021-06-22  7:49 UTC (permalink / raw)
  To: Sachin Sant; +Cc: Odin Ugedal, Linux Next Mailing List, linuxppc-dev, open list
In-Reply-To: <2ED1BDF5-BC0C-47CD-8F33-9A46C738F8CF@linux.vnet.ibm.com>

Hi Sachin,

On Tue, 22 Jun 2021 at 09:39, Sachin Sant <sachinp@linux.vnet.ibm.com> wrote:
>
> While booting 5.13.0-rc7-next-20210621 on a PowerVM LPAR following warning
> is seen
>
> [   30.922154] ------------[ cut here ]------------
> [   30.922201] cfs_rq->avg.load_avg || cfs_rq->avg.util_avg || cfs_rq->avg.runnable_avg
> [   30.922219] WARNING: CPU: 6 PID: 762 at kernel/sched/fair.c:3277 update_blocked_averages+0x758/0x780
> [   30.922259] Modules linked in: pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel ip_tables sd_mod t10_pi sg fuse
> [   30.922309] CPU: 6 PID: 762 Comm: augenrules Not tainted 5.13.0-rc7-next-20210621 #1
> [   30.922329] NIP:  c0000000001b27e8 LR: c0000000001b27e4 CTR: c0000000007cfda0
> [   30.922344] REGS: c000000023fcb660 TRAP: 0700   Not tainted  (5.13.0-rc7-next-20210621)
> [   30.922359] MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48488224  XER: 00000005
> [   30.922394] CFAR: c00000000014d120 IRQMASK: 1
>                GPR00: c0000000001b27e4 c000000023fcb900 c000000002a08400 0000000000000048
>                GPR04: 00000000ffff7fff c000000023fcb5c0 0000000000000027 c000000f6fdd7e18
>                GPR08: 0000000000000023 0000000000000001 0000000000000027 c0000000028a6650
>                GPR12: 0000000000008000 c000000f6fff7680 c000000f6fe62600 0000000000000032
>                GPR16: 00000007331a989a c000000f6fe62600 c0000000238a6800 0000000000000001
>                GPR20: 0000000000000000 c000000002a4dfe0 0000000000000000 0000000000000006
>                GPR24: 0000000000000000 c000000f6fe63010 0000000000000001 c000000f6fe62680
>                GPR28: 0000000000000006 c0000000238a69c0 0000000000000000 c000000f6fe62600
> [   30.922569] NIP [c0000000001b27e8] update_blocked_averages+0x758/0x780
> [   30.922599] LR [c0000000001b27e4] update_blocked_averages+0x754/0x780
> [   30.922624] Call Trace:
> [   30.922631] [c000000023fcb900] [c0000000001b27e4] update_blocked_averages+0x754/0x780 (unreliable)
> [   30.922653] [c000000023fcba20] [c0000000001bd668] newidle_balance+0x258/0x5c0
> [   30.922674] [c000000023fcbab0] [c0000000001bdaac] pick_next_task_fair+0x7c/0x4d0
> [   30.922692] [c000000023fcbb10] [c000000000dcd31c] __schedule+0x15c/0x1780
> [   30.922708] [c000000023fcbc50] [c0000000001a5a04] do_task_dead+0x64/0x70
> [   30.922726] [c000000023fcbc80] [c000000000156338] do_exit+0x848/0xcc0
> [   30.922743] [c000000023fcbd50] [c000000000156884] do_group_exit+0x64/0xe0
> [   30.922758] [c000000023fcbd90] [c000000000156924] sys_exit_group+0x24/0x30
> [   30.922774] [c000000023fcbdb0] [c0000000000310c0] system_call_exception+0x150/0x2d0
> [   30.922792] [c000000023fcbe10] [c00000000000cc5c] system_call_common+0xec/0x278
> [   30.922808] --- interrupt: c00 at 0x7fffb3acddcc
> [   30.922821] NIP:  00007fffb3acddcc LR: 00007fffb3a27f04 CTR: 0000000000000000
> [   30.922833] REGS: c000000023fcbe80 TRAP: 0c00   Not tainted  (5.13.0-rc7-next-20210621)
> [   30.922847] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28444202  XER: 00000000
> [   30.922882] IRQMASK: 0
>                GPR00: 00000000000000ea 00007fffc8f21780 00007fffb3bf7100 0000000000000000
>                GPR04: 0000000000000000 0000000155f142f0 0000000000000000 00007fffb3d23740
>                GPR08: fffffffffbad2a87 0000000000000000 0000000000000000 0000000000000000
>                GPR12: 0000000000000000 00007fffb3d2aeb0 0000000116be95e0 0000000000000032
>                GPR16: 0000000000000000 00007fffc8f21cd8 000000000000002d 0000000000000024
>                GPR20: 00007fffc8f21cd4 00007fffb3bf4f98 0000000000000001 0000000000000001
>                GPR24: 00007fffb3bf0950 0000000000000000 0000000000000000 0000000000000001
>                GPR28: 0000000000000000 0000000000000000 00007fffb3d23ec0 0000000000000000
> [   30.923023] NIP [00007fffb3acddcc] 0x7fffb3acddcc
> [   30.923035] LR [00007fffb3a27f04] 0x7fffb3a27f04
> [   30.923045] --- interrupt: c00
> [   30.923052] Instruction dump:
> [   30.923061] 3863be48 9be97ae6 4bf9a8f9 60000000 0fe00000 4bfff980 e9210070 e8610088
> [   30.923088] 39400001 99490003 4bf9a8d9 60000000 <0fe00000> 4bfffc24 3d22fff5 89297ae3
> [   30.923113] ---[ end trace ed07974d2149c499 ]—
>
> This warning was introduced with commit 9e077b52d86a
> sched/pelt: Check that *_avg are null when *_sum are

Yes. That was exactly the purpose of the patch. There is one last
remaining part which could generate this. I'm going to prepare a patch

Thanks

>
> next-20210618 was good.
>
> Thanks
> -Sachin

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/64s/interrupt: Check and fix srr_valid without crashing
From: Nicholas Piggin @ 2021-06-22  8:54 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev
In-Reply-To: <21088c39-77fb-97d5-6fe9-76ae4ef4b439@csgroup.eu>

Excerpts from Christophe Leroy's message of June 22, 2021 4:47 pm:
> 
> 
> Le 22/06/2021 à 08:04, Nicholas Piggin a écrit :
>> The PPC_RFI_SRR_DEBUG check added by patch "powerpc/64s: avoid reloading
>> (H)SRR registers if they are still valid" has a few deficiencies. It
>> does not fix the actual problem, it's not enabled by default, and it
>> causes a program check interrupt which can cause more difficulties.
>> 
>> However there are a lot of paths which may clobber SRRs or change return
>> regs, and difficult to have a high confidence that all paths are covered
>> without wider testing.
>> 
>> Add a relatively low overhead always-enabled check that catches most
>> such cases, reports once, and fixes it so the kernel can continue.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/kernel/interrupt.c | 58 +++++++++++++++++++++++++++++++++
>>   1 file changed, 58 insertions(+)
>> 
>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>> index 05fa3ae56e25..5920a3e8d1d5 100644
>> --- a/arch/powerpc/kernel/interrupt.c
>> +++ b/arch/powerpc/kernel/interrupt.c
>> @@ -231,6 +231,56 @@ static notrace void booke_load_dbcr0(void)
>>   #endif
>>   }
>>   
>> +#include <linux/sched/debug.h> /* for show_regs */
>> +static void check_return_regs_valid(struct pt_regs *regs)
>> +{
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +	static bool warned = false;
>> +
>> +	if (regs->trap == 0x980 || regs->trap == 0xe00 || regs->trap == 0xe20 ||
>> +	    regs->trap == 0xe40 || regs->trap == 0xe60 || regs->trap == 0xe80 ||
>> +	    regs->trap == 0xea0 || regs->trap == 0xf80 || regs->trap == 0x1200 ||
>> +	    regs->trap == 0x1500 || regs->trap == 0x1600 || regs->trap == 0x1800) {
> 
> Can you use names defined in asm/interrupt.h instead of raw values ?
> Some are already there, others can be added.

Good idea. Could go into a helper too actually.

I wanted to clean up the KVM definitions and unify them with interrupt.h 
defs but that's a bit of churn. Can I get to that in the next merge or 
so?

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/64s/interrupt: Check and fix srr_valid without crashing
From: Christophe Leroy @ 2021-06-22  8:55 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
In-Reply-To: <1624351871.ezysew4q8c.astroid@bobo.none>



Le 22/06/2021 à 10:54, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of June 22, 2021 4:47 pm:
>>
>>
>> Le 22/06/2021 à 08:04, Nicholas Piggin a écrit :
>>> The PPC_RFI_SRR_DEBUG check added by patch "powerpc/64s: avoid reloading
>>> (H)SRR registers if they are still valid" has a few deficiencies. It
>>> does not fix the actual problem, it's not enabled by default, and it
>>> causes a program check interrupt which can cause more difficulties.
>>>
>>> However there are a lot of paths which may clobber SRRs or change return
>>> regs, and difficult to have a high confidence that all paths are covered
>>> without wider testing.
>>>
>>> Add a relatively low overhead always-enabled check that catches most
>>> such cases, reports once, and fixes it so the kernel can continue.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>    arch/powerpc/kernel/interrupt.c | 58 +++++++++++++++++++++++++++++++++
>>>    1 file changed, 58 insertions(+)
>>>
>>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>>> index 05fa3ae56e25..5920a3e8d1d5 100644
>>> --- a/arch/powerpc/kernel/interrupt.c
>>> +++ b/arch/powerpc/kernel/interrupt.c
>>> @@ -231,6 +231,56 @@ static notrace void booke_load_dbcr0(void)
>>>    #endif
>>>    }
>>>    
>>> +#include <linux/sched/debug.h> /* for show_regs */
>>> +static void check_return_regs_valid(struct pt_regs *regs)
>>> +{
>>> +#ifdef CONFIG_PPC_BOOK3S_64
>>> +	static bool warned = false;
>>> +
>>> +	if (regs->trap == 0x980 || regs->trap == 0xe00 || regs->trap == 0xe20 ||
>>> +	    regs->trap == 0xe40 || regs->trap == 0xe60 || regs->trap == 0xe80 ||
>>> +	    regs->trap == 0xea0 || regs->trap == 0xf80 || regs->trap == 0x1200 ||
>>> +	    regs->trap == 0x1500 || regs->trap == 0x1600 || regs->trap == 0x1800) {
>>
>> Can you use names defined in asm/interrupt.h instead of raw values ?
>> Some are already there, others can be added.
> 
> Good idea. Could go into a helper too actually.
> 
> I wanted to clean up the KVM definitions and unify them with interrupt.h
> defs but that's a bit of churn. Can I get to that in the next merge or
> so?
> 


Sure

Christophe

^ permalink raw reply

* Re: [PATCH 4/4] powerpc: Enable KFENCE on BOOK3S/64
From: Michael Ellerman @ 2021-06-22  8:57 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev; +Cc: aneesh.kumar, npiggin, Jordan Niethe
In-Reply-To: <20210517061658.194708-5-jniethe5@gmail.com>

Jordan Niethe <jniethe5@gmail.com> writes:
> From: Christophe Leroy <christophe.leroy@csgroup.eu>
>
> This reuses the DEBUG_PAGEALLOC logic.
>
> Tested with CONFIG_KFENCE + CONFIG_KUNIT + CONFIG_KFENCE_KUNIT_TEST on
> radix and hash.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> [jpn: Handle radix]
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/Kconfig                         |  2 +-
>  arch/powerpc/include/asm/book3s/64/pgtable.h |  2 +-
>  arch/powerpc/include/asm/kfence.h            | 19 +++++++++++++++++++
>  arch/powerpc/mm/book3s64/hash_utils.c        | 12 ++++++------
>  arch/powerpc/mm/book3s64/radix_pgtable.c     |  8 +++++---
>  5 files changed, 32 insertions(+), 11 deletions(-)

This makes lockdep very unhappy :(

  [   24.016750][    C0] ================================
  [   24.017145][    C0] WARNING: inconsistent lock state
  [   24.017600][    C0] 5.13.0-rc2-00196-g8bf29f9c76e2 #1 Not tainted
  [   24.018222][    C0] --------------------------------
  [   24.018612][    C0] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  [   24.019146][    C0] S55runtest/104 [HC0[0]:SC1[1]:HE1:SE0] takes:
  [   24.019695][    C0] c00000000278bf50 (init_mm.page_table_lock){+.?.}-{2:2}, at: change_page_attr+0x54/0x290
  [   24.021847][    C0] {SOFTIRQ-ON-W} state was registered at:
  [   24.022353][    C0]   lock_acquire+0x128/0x600
  [   24.022941][    C0]   _raw_spin_lock+0x54/0x80
  [   24.023301][    C0]   change_page_attr+0x54/0x290
  [   24.023667][    C0]   __apply_to_page_range+0x550/0xa70
  [   24.024070][    C0]   change_memory_attr+0x7c/0x140
  [   24.024445][    C0]   bpf_prog_select_runtime+0x230/0x2a0
  [   24.024911][    C0]   bpf_migrate_filter+0x18c/0x1e0
  [   24.025310][    C0]   bpf_prog_create+0x178/0x1d0
  [   24.025681][    C0]   ptp_classifier_init+0x4c/0x80
  [   24.026090][    C0]   sock_init+0xe0/0x100
  [   24.026422][    C0]   do_one_initcall+0x88/0x4b0
  [   24.026790][    C0]   kernel_init_freeable+0x364/0x40c
  [   24.027196][    C0]   kernel_init+0x24/0x188
  [   24.027539][    C0]   ret_from_kernel_thread+0x5c/0x70
  [   24.027987][    C0] irq event stamp: 1322
  [   24.028315][    C0] hardirqs last  enabled at (1322): [<c0000000010996f4>] _raw_spin_unlock_irqrestore+0x94/0xd0
  [   24.029084][    C0] hardirqs last disabled at (1321): [<c000000001099268>] _raw_spin_lock_irqsave+0xa8/0xc0
  [   24.029813][    C0] softirqs last  enabled at (738): [<c00000000109a9b8>] __do_softirq+0x5f8/0x668
  [   24.030531][    C0] softirqs last disabled at (1271): [<c000000000159b84>] __irq_exit_rcu+0x1c4/0x1d0
  [   24.031271][    C0]
  [   24.031271][    C0] other info that might help us debug this:
  [   24.031917][    C0]  Possible unsafe locking scenario:
  [   24.031917][    C0]
  [   24.032460][    C0]        CPU0
  [   24.032720][    C0]        ----
  [   24.032980][    C0]   lock(init_mm.page_table_lock);
  [   24.033400][    C0]   <Interrupt>
  [   24.033668][    C0]     lock(init_mm.page_table_lock);
  [   24.034102][    C0]
  [   24.034102][    C0]  *** DEADLOCK ***
  [   24.034102][    C0]
  [   24.034735][    C0] 5 locks held by S55runtest/104:
  [   24.035162][    C0]  #0: c00000000a9ef098 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x3c/0xa0
  [   24.035998][    C0]  #1: c00000000a9ef130 (&tty->atomic_write_lock){+.+.}-{3:3}, at: file_tty_write.constprop.0+0xd8/0x3b0
  [   24.036849][    C0]  #2: c00000000a9ef2e8 (&tty->termios_rwsem){++++}-{3:3}, at: n_tty_write+0xd0/0x6b0
  [   24.037591][    C0]  #3: c0080000001d2378 (&ldata->output_lock){+.+.}-{3:3}, at: n_tty_write+0x248/0x6b0
  [   24.038342][    C0]  #4: c000000002618448 (rcu_callback){....}-{0:0}, at: rcu_core+0x450/0x1360
  [   24.039093][    C0]
  [   24.039093][    C0] stack backtrace:
  [   24.039727][    C0] CPU: 0 PID: 104 Comm: S55runtest Not tainted 5.13.0-rc2-00196-g8bf29f9c76e2 #1
  [   24.040790][    C0] Call Trace:
  [   24.041120][    C0] [c00000000adc2be0] [c000000000940868] dump_stack+0xec/0x144 (unreliable)
  [   24.041925][    C0] [c00000000adc2c30] [c0000000001f1b38] print_usage_bug.part.0+0x24c/0x278
  [   24.042611][    C0] [c00000000adc2cd0] [c0000000001eb0c0] mark_lock+0x950/0xc00
  [   24.043186][    C0] [c00000000adc2df0] [c0000000001ebb74] __lock_acquire+0x494/0x28b0
  [   24.043794][    C0] [c00000000adc2f20] [c0000000001eeba8] lock_acquire+0x128/0x600
  [   24.044384][    C0] [c00000000adc3020] [c000000001098f64] _raw_spin_lock+0x54/0x80
  [   24.044976][    C0] [c00000000adc3050] [c00000000008aa14] change_page_attr+0x54/0x290
  [   24.045586][    C0] [c00000000adc30b0] [c0000000004347e0] __apply_to_page_range+0x550/0xa70
  [   24.046238][    C0] [c00000000adc31a0] [c00000000008accc] change_memory_attr+0x7c/0x140
  [   24.046857][    C0] [c00000000adc31e0] [c000000000099f78] radix__kernel_map_pages+0x68/0x80
  [   24.047501][    C0] [c00000000adc3200] [c0000000004a8028] kfence_protect+0x48/0x80
  [   24.048091][    C0] [c00000000adc3230] [c0000000004a84a8] kfence_guarded_free+0x448/0x590
  [   24.048718][    C0] [c00000000adc3290] [c00000000049e1b0] __slab_free+0x400/0x6c0
  [   24.049307][    C0] [c00000000adc3390] [c0000000004a471c] kmem_cache_free+0x1ac/0x4e0
  [   24.049917][    C0] [c00000000adc3450] [c000000000147a10] free_task+0x70/0xe0
  [   24.050491][    C0] [c00000000adc3480] [c000000000154084] delayed_put_task_struct+0x134/0x250
  [   24.051149][    C0] [c00000000adc34c0] [c00000000022bb94] rcu_core+0x4b4/0x1360
  [   24.051727][    C0] [c00000000adc3580] [c00000000109a5cc] __do_softirq+0x20c/0x668
  [   24.052331][    C0] [c00000000adc3680] [c000000000159b84] __irq_exit_rcu+0x1c4/0x1d0
  [   24.052937][    C0] [c00000000adc36b0] [c000000000159dd0] irq_exit+0x20/0x50
  [   24.053496][    C0] [c00000000adc36d0] [c000000000028478] timer_interrupt+0x1a8/0x520
  [   24.054111][    C0] [c00000000adc3730] [c0000000000098c4] decrementer_common_virt+0x1a4/0x1b0
  [   24.054790][    C0] --- interrupt: 900 at arch_local_irq_restore+0x118/0x180
  [   24.055373][    C0] NIP:  c0000000000164b8 LR: c000000001099700 CTR: 0000000000000000
  [   24.055991][    C0] REGS: c00000000adc37a0 TRAP: 0900   Not tainted  (5.13.0-rc2-00196-g8bf29f9c76e2)
  [   24.056678][    C0] MSR:  9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 28004202  XER: 20040000
  [   24.057665][    C0] CFAR: c0000000000163c4 IRQMASK: 0
  [   24.057665][    C0] GPR00: c0000000010996f4 c00000000adc3a40 c0000000027dce00 0000000000000000
  [   24.057665][    C0] GPR04: c00000000ae26dc0 0000000000000006 c00000000adc39f4 0000000000000001
  [   24.057665][    C0] GPR08: 0000000079ce0000 0000000000008002 0000000000000001 9000000000001033
  [   24.057665][    C0] GPR12: 0000000000004000 c0000000034c0000 0000000000000000 0000000000000000
  [   24.057665][    C0] GPR16: 0000000000000000 0000000000000013 c0080000001d0000 c0080000001d2310
  [   24.057665][    C0] GPR20: c00000000a9ef000 7fffffffffffffff c00000000a9ef510 c00000000ae26300
  [   24.057665][    C0] GPR24: 0000000000000000 c00000000a75d410 0000000000000010 c00000000a683b80
  [   24.057665][    C0] GPR28: 0000000000000000 0000000000000000 c00000000a683b80 0000000000000000
  [   24.063433][    C0] NIP [c0000000000164b8] arch_local_irq_restore+0x118/0x180
  [   24.063978][    C0] LR [c000000001099700] _raw_spin_unlock_irqrestore+0xa0/0xd0
  [   24.064546][    C0] --- interrupt: 900
  [   24.064854][    C0] [c00000000adc3a40] [c0000000010996f4] _raw_spin_unlock_irqrestore+0x94/0xd0 (unreliable)
  [   24.065595][    C0] [c00000000adc3a70] [c000000000a1a944] hvc_write+0xb4/0x230
  [   24.066165][    C0] [c00000000adc3ad0] [c0000000009eeb20] n_tty_write+0x1a0/0x6b0
  [   24.066744][    C0] [c00000000adc3ba0] [c0000000009e64e0] file_tty_write.constprop.0+0x190/0x3b0
  [   24.067405][    C0] [c00000000adc3c60] [c0000000004e087c] new_sync_write+0x12c/0x1d0
  [   24.067997][    C0] [c00000000adc3d00] [c0000000004e3fa0] vfs_write+0x2a0/0x4b0
  [   24.068567][    C0] [c00000000adc3d60] [c0000000004e44b4] ksys_write+0x84/0x140
  [   24.069141][    C0] [c00000000adc3db0] [c0000000000300a4] system_call_exception+0x174/0x2e0
  [   24.069785][    C0] [c00000000adc3e10] [c00000000000cb5c] system_call_common+0xec/0x278
  [   24.070404][    C0] --- interrupt: c00 at 0x7fffa8f9067c
  [   24.071243][    C0] NIP:  00007fffa8f9067c LR: 00007fffa8f8218c CTR: 0000000000000000
  [   24.071834][    C0] REGS: c00000000adc3e80 TRAP: 0c00   Not tainted  (5.13.0-rc2-00196-g8bf29f9c76e2)
  [   24.072510][    C0] MSR:  900000000280f033 <SF,HV,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 20002202  XER: 00000000
  [   24.073428][    C0] IRQMASK: 0
  [   24.073428][    C0] GPR00: 0000000000000004 00007fffee2147b0 00007fffa8fd7c00 0000000000000001
  [   24.073428][    C0] GPR04: 00007fffa8fd6860 0000000000000013 0000000000000000 0000000000000000
  [   24.073428][    C0] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  [   24.073428][    C0] GPR12: 0000000000000000 00007fffa8fd9960 0000000000000000 0000000000000000
  [   24.073428][    C0] GPR16: 0000000000000000 0000000000000000 0000000000000000 00000000100a3c28
  [   24.073428][    C0] GPR20: 00007fffee21ff98 00000000100a3be8 0000000000000000 00000000100e0d60
  [   24.073428][    C0] GPR24: 0000000000000000 00007fffa8fd6468 0000000000000020 0000000000000013
  [   24.073428][    C0] GPR28: 00007fffa8fd6860 0000000000000001 00007fffa8fd9960 0000000000000004
  [   24.079055][    C0] NIP [00007fffa8f9067c] 0x7fffa8f9067c
  [   24.079482][    C0] LR [00007fffa8f8218c] 0x7fffa8f8218c
  [   24.079901][    C0] --- interrupt: c00


AFAICS caused by kfence calling into change_page_attr() from softirq
context, and taking init_mm.page_table_lock. But elsewhere we take
init_mm.page_table_lock with interrupts enabled.

To fix it I guess we either need to make change_page_attr() safe to call
from anywhere, or do something kfence specific.

cheers

^ permalink raw reply

* Re: [PATCH] KVM: PPC: Book3S HV: Workaround high stack usage with clang
From: Nicholas Piggin @ 2021-06-22  8:57 UTC (permalink / raw)
  To: Michael Ellerman, Nathan Chancellor, Paul Mackerras
  Cc: kernel test robot, Arnd Bergmann, Nick Desaulniers, linux-kernel,
	kvm-ppc, clang-built-linux, linuxppc-dev
In-Reply-To: <20210621182440.990242-1-nathan@kernel.org>

Excerpts from Nathan Chancellor's message of June 22, 2021 4:24 am:
> LLVM does not emit optimal byteswap assembly, which results in high
> stack usage in kvmhv_enter_nested_guest() due to the inlining of
> byteswap_pt_regs(). With LLVM 12.0.0:
> 
> arch/powerpc/kvm/book3s_hv_nested.c:289:6: error: stack frame size of
> 2512 bytes in function 'kvmhv_enter_nested_guest' [-Werror,-Wframe-larger-than=]
> long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>      ^
> 1 error generated.
> 
> While this gets fixed in LLVM, mark byteswap_pt_regs() as
> noinline_for_stack so that it does not get inlined and break the build
> due to -Werror by default in arch/powerpc/. Not inlining saves
> approximately 800 bytes with LLVM 12.0.0:
> 
> arch/powerpc/kvm/book3s_hv_nested.c:290:6: warning: stack frame size of
> 1728 bytes in function 'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
> long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>      ^
> 1 warning generated.
> 
> Link: https://github.com/ClangBuiltLinux/linux/issues/1292
> Link: https://bugs.llvm.org/show_bug.cgi?id=49610
> Link: https://lore.kernel.org/r/202104031853.vDT0Qjqj-lkp@intel.com/
> Link: https://gist.github.com/ba710e3703bf45043a31e2806c843ffd
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Nathan Chancellor <nathan@kernel.org>

Seems okay to me. If it was something where performance might be 
signficiant I guess you could ifdef on CC_IS_CLANG, but for this
it shouldn't matter.

Acked-by: Nicholas Piggin <npiggin@gmail.com>

Thanks,
Nick

> ---
>  arch/powerpc/kvm/book3s_hv_nested.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 60724f674421..1b3ff0af1264 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -53,7 +53,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
>  	hr->dawrx1 = vcpu->arch.dawrx1;
>  }
>  
> -static void byteswap_pt_regs(struct pt_regs *regs)
> +/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
> +static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
>  {
>  	unsigned long *addr = (unsigned long *) regs;
>  
> 
> base-commit: 4a21192e2796c3338c4b0083b494a84a61311aaf
> -- 
> 2.32.0.93.g670b81a890
> 
> 

^ permalink raw reply

* [RFC PATCH 00/43] KVM: PPC: Book3S HV P9: entry/exit optimisations round 1
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin

This series applies to powerpc topic/ppc-kvm branch (KVM Cify
series in particular), plus "KVM: PPC: Book3S HV Nested: Reflect L2 PMU
in-use to L0 when L2 SPRs are live" posted to kvm-ppc.

This reduces radix guest full entry/exit latency on POWER9 and POWER10
by almost 2x (hash is similar but it's still significantly slower than
the P7/8 real mode handler). Nested HV guests should see speedups with
some smaller improvements in the L1, plus the L0 switching sees many
of the same speedups as a direct guest.

It does this in several main ways:

- Rearrange code to optimise SPR accesses. Mainly, avoid scoreboard
  stalls.

- Test SPR values to avoid mtSPRs where possible. mtSPRs are expensive.

- Reduce mftb. mftb is expensive.

- Demand fault certain facilities to avoid saving and/or restoring them
  (at the cost of fault when they are used, but this is mitigated over
  a number of entries, like the facilities when context switching 
  processes). PM, TM, and EBB so far.

- Defer some sequences that are made just in case a guest is interrupted
  in the middle of a critical section to the case where the guest is
  scheduled on a different CPU, rather than every time (at the cost of
  an extra IPI in this case). Namely the tlbsync sequence for radix with
  GTSE, which is very expensive.

Some of the numbers quoted in changelogs may have changed a bit with
patches being updated, reordered, etc. They give a bit of a guide, but
I might remove them from the final submission because they're too much
to maintain.

Thanks,
Nick

Nicholas Piggin (43):
  powerpc/64s: Remove WORT SPR from POWER9/10
  KMV: PPC: Book3S HV P9: Use set_dec to set decrementer to host
  KVM: PPC: Book3S HV P9: Use host timer accounting to avoid decrementer
    read
  KVM: PPC: Book3S HV P9: Use large decrementer for HDEC
  KVM: PPC: Book3S HV P9: Reduce mftb per guest entry/exit
  powerpc/time: add API for KVM to re-arm the host timer/decrementer
  KVM: PPC: Book3S HV: POWER10 enable HAIL when running radix guests
  powerpc/64s: Keep AMOR SPR a constant ~0 at runtime
  KVM: PPC: Book3S HV: Don't always save PMU for guest capable of
    nesting
  powerpc/64s: Always set PMU control registers to frozen/disabled when
    not in use
  KVM: PPC: Book3S HV P9: Implement PMU save/restore in C
  KVM: PPC: Book3S HV P9: Factor out yield_count increment
  KVM: PPC: Book3S HV P9: Factor PMU save/load into context switch
    functions
  KVM: PPC: Book3S HV P9: Demand fault PMU SPRs when marked not inuse
  KVM: PPC: Book3S HV: CTRL SPR does not require read-modify-write
  KVM: PPC: Book3S HV P9: Move SPRG restore to restore_p9_host_os_sprs
  KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save
    host SPRs
  KVM: PPC: Book3S HV P9: Improve mtmsrd scheduling by delaying MSR[EE]
    disable
  KVM: PPC: Book3S HV P9: Add kvmppc_stop_thread to match
    kvmppc_start_thread
  KVM: PPC: Book3S HV: Change dec_expires to be relative to guest
    timebase
  KVM: PPC: Book3S HV P9: Move TB updates
  KVM: PPC: Book3S HV P9: Optimise timebase reads
  KVM: PPC: Book3S HV P9: Avoid SPR scoreboard stalls
  KVM: PPC: Book3S HV P9: Only execute mtSPR if the value changed
  KVM: PPC: Book3S HV P9: Juggle SPR switching around
  KVM: PPC: Book3S HV P9: Move vcpu register save/restore into functions
  KVM: PPC: Book3S HV P9: Move host OS save/restore functions to
    built-in
  KVM: PPC: Book3S HV P9: Move nested guest entry into its own function
  KVM: PPC: Book3S HV P9: Move remaining SPR and MSR access into low
    level entry
  KVM: PPC: Book3S HV P9: Implement TM fastpath for guest entry/exit
  KVM: PPC: Book3S HV P9: Switch PMU to guest as late as possible
  KVM: PPC: Book3S HV P9: Restrict DSISR canary workaround to processors
    that require it
  KVM: PPC: Book3S HV P9: More SPR speed improvements
  KVM: PPC: Book3S HV P9: Demand fault EBB facility registers
  KVM: PPC: Book3S HV P9: Demand fault TM facility registers
  KVM: PPC: Book3S HV P9: Use Linux SPR save/restore to manage some host
    SPRs
  KVM: PPC: Book3S HV P9: Comment and fix MMU context switching code
  KVM: PPC: Book3S HV P9: Test dawr_enabled() before saving host DAWR
    SPRs
  KVM: PPC: Book3S HV P9: Don't restore PSSCR if not needed
  KVM: PPC: Book3S HV P9: Avoid tlbsync sequence on radix guest exit
  KVM: PPC: Book3S HV Nested: Avoid extra mftb() in nested entry
  KVM: PPC: Book3S HV P9: Improve mfmsr performance on entry
  KVM: PPC: Book3S HV P9: Optimise hash guest SLB saving

 arch/powerpc/include/asm/asm-prototypes.h |   5 -
 arch/powerpc/include/asm/kvm_asm.h        |   1 +
 arch/powerpc/include/asm/kvm_book3s.h     |   6 +
 arch/powerpc/include/asm/kvm_book3s_64.h  |   4 +-
 arch/powerpc/include/asm/kvm_host.h       |   5 +-
 arch/powerpc/include/asm/switch_to.h      |   2 +
 arch/powerpc/include/asm/time.h           |  19 +-
 arch/powerpc/kernel/cpu_setup_power.c     |  12 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c         |   8 +-
 arch/powerpc/kernel/process.c             |  30 +
 arch/powerpc/kernel/time.c                |  54 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c    |   4 +
 arch/powerpc/kvm/book3s_hv.c              | 628 +++++++++----------
 arch/powerpc/kvm/book3s_hv.h              |  36 ++
 arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
 arch/powerpc/kvm/book3s_hv_nested.c       |  21 +-
 arch/powerpc/kvm/book3s_hv_p9_entry.c     | 725 +++++++++++++++++++---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  74 +--
 arch/powerpc/mm/book3s64/radix_pgtable.c  |  15 -
 arch/powerpc/perf/core-book3s.c           |   7 +
 arch/powerpc/platforms/powernv/idle.c     |  10 +-
 21 files changed, 1143 insertions(+), 536 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv.h

-- 
2.23.0


^ permalink raw reply

* [RFC PATCH 01/43] powerpc/64s: Remove WORT SPR from POWER9/10
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

This register is not architected and not implemented in POWER9 or 10,
it just reads back zeroes for compatibility.

-78 cycles (9255) cycles POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c          | 3 ---
 arch/powerpc/platforms/powernv/idle.c | 2 --
 2 files changed, 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9228042bd54f..97f3d6d54b61 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3640,7 +3640,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
 	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
 	mtspr(SPRN_BESCR, vcpu->arch.bescr);
-	mtspr(SPRN_WORT, vcpu->arch.wort);
 	mtspr(SPRN_TIDR, vcpu->arch.tid);
 	mtspr(SPRN_AMR, vcpu->arch.amr);
 	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3667,7 +3666,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
 	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
 	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
 	vcpu->arch.bescr = mfspr(SPRN_BESCR);
-	vcpu->arch.wort = mfspr(SPRN_WORT);
 	vcpu->arch.tid = mfspr(SPRN_TIDR);
 	vcpu->arch.amr = mfspr(SPRN_AMR);
 	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@ -3699,7 +3697,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 				    struct p9_host_os_sprs *host_os_sprs)
 {
 	mtspr(SPRN_PSPB, 0);
-	mtspr(SPRN_WORT, 0);
 	mtspr(SPRN_UAMOR, 0);
 
 	mtspr(SPRN_DSCR, host_os_sprs->dscr);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 528a7e0cf83a..180baecad914 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 		sprs.purr	= mfspr(SPRN_PURR);
 		sprs.spurr	= mfspr(SPRN_SPURR);
 		sprs.dscr	= mfspr(SPRN_DSCR);
-		sprs.wort	= mfspr(SPRN_WORT);
 		sprs.ciabr	= mfspr(SPRN_CIABR);
 
 		sprs.mmcra	= mfspr(SPRN_MMCRA);
@@ -785,7 +784,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 	mtspr(SPRN_PURR,	sprs.purr);
 	mtspr(SPRN_SPURR,	sprs.spurr);
 	mtspr(SPRN_DSCR,	sprs.dscr);
-	mtspr(SPRN_WORT,	sprs.wort);
 	mtspr(SPRN_CIABR,	sprs.ciabr);
 
 	mtspr(SPRN_MMCRA,	sprs.mmcra);
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 02/43] KMV: PPC: Book3S HV P9: Use set_dec to set decrementer to host
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: Alexey Kardashevskiy, linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

The host Linux timer code arms the decrementer with the value
'decrementers_next_tb - current_tb' using set_dec(), which stores
val - 1 on Book3S-64, which is not quite the same as what KVM does
to re-arm the host decrementer when exiting the guest.

This shouldn't be a significant change, but it makes the logic match
and avoids this small extra change being brought into the next patch.

Suggested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 97f3d6d54b61..d19b4ae01642 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3914,7 +3914,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
 
-	mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+	set_dec(local_paca->kvm_hstate.dec_expires - mftb());
 	/* We may have raced with new irq work */
 	if (test_irq_work_pending())
 		set_dec(1);
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 03/43] KVM: PPC: Book3S HV P9: Use host timer accounting to avoid decrementer read
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

There is no need to save away the host DEC value, as it is derived
from the host timer subsystem which maintains the next timer time,
so it can be restored from there.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/time.h |  5 +++++
 arch/powerpc/kernel/time.c      |  1 +
 arch/powerpc/kvm/book3s_hv.c    | 14 +++++++-------
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8c2c3dd4ddba..fd09b4797fd7 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -111,6 +111,11 @@ static inline unsigned long test_irq_work_pending(void)
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
+static inline u64 timer_get_next_tb(void)
+{
+	return __this_cpu_read(decrementers_next_tb);
+}
+
 /* Convert timebase ticks to nanoseconds */
 unsigned long long tb_to_ns(unsigned long long tb_ticks);
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index da995c5fb97d..98bdd96141f2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -108,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
 EXPORT_SYMBOL(decrementer_clockevent);
 
 DEFINE_PER_CPU(u64, decrementers_next_tb);
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
 static DEFINE_PER_CPU(struct clock_event_device, decrementers);
 
 #define XSEC_PER_SEC (1024*1024)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d19b4ae01642..a413377aafb5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3729,18 +3729,17 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	struct p9_host_os_sprs host_os_sprs;
 	s64 dec;
-	u64 tb;
+	u64 tb, next_timer;
 	int trap, save_pmu;
 
 	WARN_ON_ONCE(vcpu->arch.ceded);
 
-	dec = mfspr(SPRN_DEC);
 	tb = mftb();
-	if (dec < 0)
+	next_timer = timer_get_next_tb();
+	if (tb >= next_timer)
 		return BOOK3S_INTERRUPT_HV_DECREMENTER;
-	local_paca->kvm_hstate.dec_expires = dec + tb;
-	if (local_paca->kvm_hstate.dec_expires < time_limit)
-		time_limit = local_paca->kvm_hstate.dec_expires;
+	if (next_timer < time_limit)
+		time_limit = next_timer;
 
 	save_p9_host_os_sprs(&host_os_sprs);
 
@@ -3914,7 +3913,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
 
-	set_dec(local_paca->kvm_hstate.dec_expires - mftb());
+	next_timer = timer_get_next_tb();
+	set_dec(next_timer - mftb());
 	/* We may have raced with new irq work */
 	if (test_irq_work_pending())
 		set_dec(1);
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 04/43] KVM: PPC: Book3S HV P9: Use large decrementer for HDEC
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: Alexey Kardashevskiy, linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

On processors that don't suppress the HDEC exceptions when LPCR[HDICE]=0,
this could help reduce needless guest exits due to leftover exceptions on
entering the guest.

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/time.h       | 2 ++
 arch/powerpc/kernel/time.c            | 1 +
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index fd09b4797fd7..69b6be617772 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,8 @@
 #include <asm/vdso/timebase.h>
 
 /* time.c */
+extern u64 decrementer_max;
+
 extern unsigned long tb_ticks_per_jiffy;
 extern unsigned long tb_ticks_per_usec;
 extern unsigned long tb_ticks_per_sec;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 98bdd96141f2..026b3c0b648c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -89,6 +89,7 @@ static struct clocksource clocksource_timebase = {
 
 #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
 u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
 
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 83f592eadcd2..63afd277c5f3 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -489,7 +489,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 		vc->tb_offset_applied = 0;
 	}
 
-	mtspr(SPRN_HDEC, 0x7fffffff);
+	/* HDEC must be at least as large as DEC, so decrementer_max fits */
+	mtspr(SPRN_HDEC, decrementer_max);
 
 	save_clear_guest_mmu(kvm, vcpu);
 	switch_mmu_to_host(kvm, host_pidr);
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 05/43] KVM: PPC: Book3S HV P9: Reduce mftb per guest entry/exit
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin, Fabiano Rosas
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

mftb is serialising (dispatch next-to-complete) so it is heavy weight
for a mfspr. Avoid reading it multiple times in the entry or exit paths.
A small number of cycles delay to timers is tolerable.

-118 cycles (9137) POWER9 virt-mode NULL hcall

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c          | 4 ++--
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a413377aafb5..5ec534620e07 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3794,7 +3794,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	 *
 	 * XXX: Another day's problem.
 	 */
-	mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+	mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
 
 	if (kvmhv_on_pseries()) {
 		/*
@@ -3914,7 +3914,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->in_guest = 0;
 
 	next_timer = timer_get_next_tb();
-	set_dec(next_timer - mftb());
+	set_dec(next_timer - tb);
 	/* We may have raced with new irq work */
 	if (test_irq_work_pending())
 		set_dec(1);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 63afd277c5f3..c4f3e066fcb4 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -203,7 +203,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	unsigned long host_dawr1;
 	unsigned long host_dawrx1;
 
-	hdec = time_limit - mftb();
+	tb = mftb();
+	hdec = time_limit - tb;
 	if (hdec < 0)
 		return BOOK3S_INTERRUPT_HV_DECREMENTER;
 
@@ -215,7 +216,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	vcpu->arch.ceded = 0;
 
 	if (vc->tb_offset) {
-		u64 new_tb = mftb() + vc->tb_offset;
+		u64 new_tb = tb + vc->tb_offset;
 		mtspr(SPRN_TBU40, new_tb);
 		tb = mftb();
 		if ((tb & 0xffffff) < (new_tb & 0xffffff))
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 06/43] powerpc/time: add API for KVM to re-arm the host timer/decrementer
From: Nicholas Piggin @ 2021-06-22 10:56 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Rather than have KVM look up the host timer and fiddle with the
irq-work internal details, have the powerpc/time.c code provide a
function for KVM to re-arm the Linux timer code when exiting a
guest.

This is implementation has an improvement over existing code of
marking a decrementer interrupt as soft-pending if a timer has
expired, rather than setting DEC to a -ve value, which tended to
cause host timers to take two interrupts (first hdec to exit the
guest, then the immediate dec).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/time.h | 16 +++-------
 arch/powerpc/kernel/time.c      | 52 +++++++++++++++++++++++++++------
 arch/powerpc/kvm/book3s_hv.c    |  7 ++---
 3 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 69b6be617772..924b2157882f 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -99,18 +99,6 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
 extern void secondary_cpu_time_init(void);
 extern void __init time_init(void);
 
-#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
-	unsigned long x;
-
-	asm volatile("lbz %0,%1(13)"
-		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, irq_work_pending)));
-	return x;
-}
-#endif
-
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
 static inline u64 timer_get_next_tb(void)
@@ -118,6 +106,10 @@ static inline u64 timer_get_next_tb(void)
 	return __this_cpu_read(decrementers_next_tb);
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now);
+#endif
+
 /* Convert timebase ticks to nanoseconds */
 unsigned long long tb_to_ns(unsigned long long tb_ticks);
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 026b3c0b648c..7c9de3498548 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -510,6 +510,16 @@ EXPORT_SYMBOL(profile_pc);
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, irq_work_pending)));
+	return x;
+}
+
 static inline void set_irq_work_pending_flag(void)
 {
 	asm volatile("stb %0,%1(13)" : :
@@ -553,13 +563,44 @@ void arch_irq_work_raise(void)
 	preempt_enable();
 }
 
+static void set_dec_or_work(u64 val)
+{
+	set_dec(val);
+	/* We may have raced with new irq work */
+	if (unlikely(test_irq_work_pending()))
+		set_dec(1);
+}
+
 #else  /* CONFIG_IRQ_WORK */
 
 #define test_irq_work_pending()	0
 #define clear_irq_work_pending()
 
+static void set_dec_or_work(u64 val)
+{
+	set_dec(val);
+}
 #endif /* CONFIG_IRQ_WORK */
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+	WARN_ON_ONCE(!arch_irqs_disabled());
+	WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+	if (now >= *next_tb) {
+		local_paca->irq_happened |= PACA_IRQ_DEC;
+	} else {
+		now = *next_tb - now;
+		if (now <= decrementer_max)
+			set_dec_or_work(now);
+	}
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
 /*
  * timer_interrupt - gets called when the decrementer overflows,
  * with interrupts disabled.
@@ -620,10 +661,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
 	} else {
 		now = *next_tb - now;
 		if (now <= decrementer_max)
-			set_dec(now);
-		/* We may have raced with new irq work */
-		if (test_irq_work_pending())
-			set_dec(1);
+			set_dec_or_work(now);
 		__this_cpu_inc(irq_stat.timer_irqs_others);
 	}
 
@@ -865,11 +903,7 @@ static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev)
 {
 	__this_cpu_write(decrementers_next_tb, get_tb() + evt);
-	set_dec(evt);
-
-	/* We may have raced with new irq work */
-	if (test_irq_work_pending())
-		set_dec(1);
+	set_dec_or_work(evt);
 
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5ec534620e07..36e1db48fccf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3913,11 +3913,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
 
-	next_timer = timer_get_next_tb();
-	set_dec(next_timer - tb);
-	/* We may have raced with new irq work */
-	if (test_irq_work_pending())
-		set_dec(1);
+	timer_rearm_host_dec(tb);
+
 	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
 	kvmhv_load_host_pmu();
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 07/43] KVM: PPC: Book3S HV: POWER10 enable HAIL when running radix guests
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

HV interrupts may be taken with the MMU enabled when radix guests are
running. Enable LPCR[HAIL] on ISA v3.1 processors for radix guests.
Make this depend on the host LPCR[HAIL] being enabled. Currently that is
always enabled, but having this test means any issue that might require
LPCR[HAIL] to be disabled in the host will not have to be duplicated in
KVM.

-1380 cycles on P10 NULL hcall entry+exit

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 36e1db48fccf..ed713f49fbd5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4896,6 +4896,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  */
 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 {
+	unsigned long lpcr, lpcr_mask;
+
 	if (nesting_enabled(kvm))
 		kvmhv_release_all_nested(kvm);
 	kvmppc_rmap_reset(kvm);
@@ -4905,8 +4907,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 	kvm->arch.radix = 0;
 	spin_unlock(&kvm->mmu_lock);
 	kvmppc_free_radix(kvm);
-	kvmppc_update_lpcr(kvm, LPCR_VPM1,
-			   LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+	lpcr = LPCR_VPM1;
+	lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		lpcr_mask |= LPCR_HAIL;
+	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
 	return 0;
 }
 
@@ -4916,6 +4923,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
  */
 int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
 {
+	unsigned long lpcr, lpcr_mask;
 	int err;
 
 	err = kvmppc_init_vm_radix(kvm);
@@ -4927,8 +4935,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
 	kvm->arch.radix = 1;
 	spin_unlock(&kvm->mmu_lock);
 	kvmppc_free_hpt(&kvm->arch.hpt);
-	kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
-			   LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+	lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+	lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		lpcr_mask |= LPCR_HAIL;
+		if (cpu_has_feature(CPU_FTR_HVMODE) &&
+				(kvm->arch.host_lpcr & LPCR_HAIL))
+			lpcr |= LPCR_HAIL;
+	}
+	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
 	return 0;
 }
 
@@ -5092,6 +5109,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 		kvm->arch.mmu_ready = 1;
 		lpcr &= ~LPCR_VPM1;
 		lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+		if (cpu_has_feature(CPU_FTR_HVMODE) &&
+		    cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    (kvm->arch.host_lpcr & LPCR_HAIL))
+			lpcr |= LPCR_HAIL;
 		ret = kvmppc_init_vm_radix(kvm);
 		if (ret) {
 			kvmppc_free_lpid(kvm->arch.lpid);
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 08/43] powerpc/64s: Keep AMOR SPR a constant ~0 at runtime
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

This register controls supervisor SPR modifications, and as such is only
relevant for KVM. KVM always sets AMOR to ~0 on guest entry, and never
restores it coming back out to the host, so it can be kept constant and
avoid the mtSPR in KVM guest entry.

-21 cycles (9116) cycles POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/cpu_setup_power.c    |  8 ++++++++
 arch/powerpc/kernel/dt_cpu_ftrs.c        |  2 ++
 arch/powerpc/kvm/book3s_hv_p9_entry.c    |  2 --
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  2 --
 arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ---------------
 arch/powerpc/platforms/powernv/idle.c    |  8 +++-----
 6 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index 3cca88ee96d7..a29dc8326622 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
 		return;
 
 	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
 }
@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
 		return;
 
 	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
 }
@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
 		return;
 
 	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
 	init_HFSCR();
@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
 		return;
 
 	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
 	init_HFSCR();
@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
 	mtspr(SPRN_PSSCR, 0);
 	mtspr(SPRN_LPID, 0);
 	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
 	mtspr(SPRN_PSSCR, 0);
 	mtspr(SPRN_LPID, 0);
 	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
 	mtspr(SPRN_PSSCR, 0);
 	mtspr(SPRN_LPID, 0);
 	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
 	mtspr(SPRN_PSSCR, 0);
 	mtspr(SPRN_LPID, 0);
 	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
 	mtspr(SPRN_PCR, PCR_MASK);
 	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 358aee7c2d79..0a6b36b4bda8 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
 	mtspr(SPRN_LPCR, system_registers.lpcr);
 	if (hv_mode) {
 		mtspr(SPRN_LPID, 0);
+		mtspr(SPRN_AMOR, ~0);
 		mtspr(SPRN_HFSCR, system_registers.hfscr);
 		mtspr(SPRN_PCR, system_registers.pcr);
 	}
@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
 	}
 
 	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
 
 	lpcr = mfspr(SPRN_LPCR);
 	lpcr &=  ~LPCR_LPES0; /* HV external interrupts */
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index c4f3e066fcb4..a3281f0c9214 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -286,8 +286,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
 	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
 
-	mtspr(SPRN_AMOR, ~0UL);
-
 	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8dd437d7a2c6..007f87b97184 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -772,10 +772,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
 	ld	r6,VCPU_UAMOR(r4)
-	li	r7,-1
 	mtspr	SPRN_AMR,r5
 	mtspr	SPRN_UAMOR,r6
-	mtspr	SPRN_AMOR,r7
 
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	lwz	r5,VCPU_CTRL(r4)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fe236c38ce00..b985cfead5d7 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -566,18 +566,6 @@ void __init radix__early_init_devtree(void)
 	return;
 }
 
-static void radix_init_amor(void)
-{
-	/*
-	* In HV mode, we init AMOR (Authority Mask Override Register) so that
-	* the hypervisor and guest can setup IAMR (Instruction Authority Mask
-	* Register), enable key 0 and set it to 1.
-	*
-	* AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
-	*/
-	mtspr(SPRN_AMOR, (3ul << 62));
-}
-
 void __init radix__early_init_mmu(void)
 {
 	unsigned long lpcr;
@@ -638,7 +626,6 @@ void __init radix__early_init_mmu(void)
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
 		radix_init_partition_table();
-		radix_init_amor();
 	} else {
 		radix_init_pseries();
 	}
@@ -662,8 +649,6 @@ void radix__early_init_mmu_secondary(void)
 
 		set_ptcr_when_no_uv(__pa(partition_tb) |
 				    (PATB_SIZE_SHIFT - 12));
-
-		radix_init_amor();
 	}
 
 	radix__switch_mmu_context(NULL, &init_mm);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 180baecad914..f791ca041854 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -306,8 +306,8 @@ struct p7_sprs {
 	/* per thread SPRs that get lost in shallow states */
 	u64 amr;
 	u64 iamr;
-	u64 amor;
 	u64 uamor;
+	/* amor is restored to constant ~0 */
 };
 
 static unsigned long power7_idle_insn(unsigned long type)
@@ -378,7 +378,6 @@ static unsigned long power7_idle_insn(unsigned long type)
 	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 		sprs.amr	= mfspr(SPRN_AMR);
 		sprs.iamr	= mfspr(SPRN_IAMR);
-		sprs.amor	= mfspr(SPRN_AMOR);
 		sprs.uamor	= mfspr(SPRN_UAMOR);
 	}
 
@@ -397,7 +396,7 @@ static unsigned long power7_idle_insn(unsigned long type)
 			 */
 			mtspr(SPRN_AMR,		sprs.amr);
 			mtspr(SPRN_IAMR,	sprs.iamr);
-			mtspr(SPRN_AMOR,	sprs.amor);
+			mtspr(SPRN_AMOR,	~0);
 			mtspr(SPRN_UAMOR,	sprs.uamor);
 		}
 	}
@@ -687,7 +686,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 
 	sprs.amr	= mfspr(SPRN_AMR);
 	sprs.iamr	= mfspr(SPRN_IAMR);
-	sprs.amor	= mfspr(SPRN_AMOR);
 	sprs.uamor	= mfspr(SPRN_UAMOR);
 
 	srr1 = isa300_idle_stop_mayloss(psscr);		/* go idle */
@@ -708,7 +706,7 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 		 */
 		mtspr(SPRN_AMR,		sprs.amr);
 		mtspr(SPRN_IAMR,	sprs.iamr);
-		mtspr(SPRN_AMOR,	sprs.amor);
+		mtspr(SPRN_AMOR,	~0);
 		mtspr(SPRN_UAMOR,	sprs.uamor);
 
 		/*
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 09/43] KVM: PPC: Book3S HV: Don't always save PMU for guest capable of nesting
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Revert the workaround added by commit 63279eeb7f93a ("KVM: PPC: Book3S
HV: Always save guest pmu for guest capable of nesting").

Nested capable guests running with the earlier commit ("KVM: PPC: Book3S
HV Nested: Indicate guest PMU in-use in VPA") will now indicate the PMU
in-use status of their guests, which means the parent does not need to
unconditionally save the PMU for nested capable guests.

This will cause the PMU to break for nested guests when running older
nested hypervisor guests under a kernel with this change. It's unclear
there's an easy way to avoid that, so this could wait for a release or
so for the fix to filter into stable kernels.

-134 cycles (8982) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ed713f49fbd5..1f30f98b09d1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3901,8 +3901,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		vcpu->arch.vpa.dirty = 1;
 		save_pmu = lp->pmcregs_in_use;
 	}
-	/* Must save pmu if this guest is capable of running nested guests */
-	save_pmu |= nesting_enabled(vcpu->kvm);
 
 	kvmhv_save_guest_pmu(vcpu, save_pmu);
 #ifdef CONFIG_PPC_PSERIES
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 10/43] powerpc/64s: Always set PMU control registers to frozen/disabled when not in use
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

KVM PMU management code looks for particular frozen/disabled bits in
the PMU registers so it knows whether it must clear them when coming
out of a guest or not. Setting this up helps KVM make these optimisations
without getting confused. Longer term the better approach might be to
move guest/host PMU switching to the perf subsystem.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
 arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
 arch/powerpc/kvm/book3s_hv.c          | 5 +++++
 arch/powerpc/perf/core-book3s.c       | 7 +++++++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index a29dc8326622..3dc61e203f37 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
 static void init_PMU(void)
 {
 	mtspr(SPRN_MMCRA, 0);
-	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
 	mtspr(SPRN_MMCR1, 0);
 	mtspr(SPRN_MMCR2, 0);
 }
@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
 {
 	mtspr(SPRN_MMCR3, 0);
 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 /*
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 0a6b36b4bda8..06a089fbeaa7 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -353,7 +353,7 @@ static void init_pmu_power8(void)
 	}
 
 	mtspr(SPRN_MMCRA, 0);
-	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
 	mtspr(SPRN_MMCR1, 0);
 	mtspr(SPRN_MMCR2, 0);
 	mtspr(SPRN_MMCRS, 0);
@@ -392,7 +392,7 @@ static void init_pmu_power9(void)
 		mtspr(SPRN_MMCRC, 0);
 
 	mtspr(SPRN_MMCRA, 0);
-	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
 	mtspr(SPRN_MMCR1, 0);
 	mtspr(SPRN_MMCR2, 0);
 }
@@ -428,7 +428,7 @@ static void init_pmu_power10(void)
 
 	mtspr(SPRN_MMCR3, 0);
 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1f30f98b09d1..f7349d150828 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2593,6 +2593,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 #endif
 #endif
 	vcpu->arch.mmcr[0] = MMCR0_FC;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
+		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
+	}
+
 	vcpu->arch.ctrl = CTRL_RUNLATCH;
 	/* default to host PVR, since we can't spoof it */
 	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 51622411a7cc..e33b29ec1a65 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1361,6 +1361,13 @@ static void power_pmu_enable(struct pmu *pmu)
 		goto out;
 
 	if (cpuhw->n_events == 0) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+		} else {
+			mtspr(SPRN_MMCRA, 0);
+			mtspr(SPRN_MMCR0, MMCR0_FC);
+		}
 		ppc_set_pmu_inuse(0);
 		goto out;
 	}
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 11/43] KVM: PPC: Book3S HV P9: Implement PMU save/restore in C
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Implement the P9 path PMU save/restore code in C, and remove the
POWER9/10 code from the P7/8 path assembly.

-449 cycles (8533) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/asm-prototypes.h |   5 -
 arch/powerpc/kvm/book3s_hv.c              | 205 ++++++++++++++++++++--
 arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +----
 4 files changed, 200 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 02ee6f5ac9fe..928db8ef9a5a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -136,11 +136,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
 					bool preserve_nv) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-void kvmhv_save_host_pmu(void);
-void kvmhv_load_host_pmu(void);
-void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
-void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-
 void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
 
 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f7349d150828..b1b94b3563b7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3635,6 +3635,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 1);
 }
 
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+	unsigned long dscr;
+	unsigned long tidr;
+	unsigned long iamr;
+	unsigned long amr;
+	unsigned long fscr;
+
+	unsigned int pmc1;
+	unsigned int pmc2;
+	unsigned int pmc3;
+	unsigned int pmc4;
+	unsigned int pmc5;
+	unsigned int pmc6;
+	unsigned long mmcr0;
+	unsigned long mmcr1;
+	unsigned long mmcr2;
+	unsigned long mmcr3;
+	unsigned long mmcra;
+	unsigned long siar;
+	unsigned long sier1;
+	unsigned long sier2;
+	unsigned long sier3;
+	unsigned long sdar;
+};
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+	if (!(mmcr0 & MMCR0_FC))
+		goto do_freeze;
+	if (mmcra & MMCRA_SAMPLE_ENABLE)
+		goto do_freeze;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (!(mmcr0 & MMCR0_PMCCEXT))
+			goto do_freeze;
+		if (!(mmcra & MMCRA_BHRB_DISABLE))
+			goto do_freeze;
+	}
+	return;
+
+do_freeze:
+	mmcr0 = MMCR0_FC;
+	mmcra = 0;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mmcr0 |= MMCR0_PMCCEXT;
+		mmcra = MMCRA_BHRB_DISABLE;
+	}
+
+	mtspr(SPRN_MMCR0, mmcr0);
+	mtspr(SPRN_MMCRA, mmcra);
+	isync();
+}
+
+static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+	if (ppc_get_pmu_inuse()) {
+		/*
+		 * It might be better to put PMU handling (at least for the
+		 * host) in the perf subsystem because it knows more about what
+		 * is being used.
+		 */
+
+		/* POWER9, POWER10 do not implement HPMC or SPMC */
+
+		host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+		host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+		freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+		host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+		host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+		host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+		host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+		host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+		host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+		host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+		host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+		host_os_sprs->sdar = mfspr(SPRN_SDAR);
+		host_os_sprs->siar = mfspr(SPRN_SIAR);
+		host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+			host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+			host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+		}
+	}
+}
+
+static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+	mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+	mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+	mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+	mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+	mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+	mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+	mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+	mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+	mtspr(SPRN_SDAR, vcpu->arch.sdar);
+	mtspr(SPRN_SIAR, vcpu->arch.siar);
+	mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mtspr(SPRN_MMCR3, vcpu->arch.mmcr[4]);
+		mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+		mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+	}
+
+	/* Set MMCRA then MMCR0 last */
+	mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+	mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+	/* No isync necessary because we're starting counters */
+}
+
+static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+	struct lppaca *lp;
+	int save_pmu = 1;
+
+	lp = vcpu->arch.vpa.pinned_addr;
+	if (lp)
+		save_pmu = lp->pmcregs_in_use;
+
+	if (save_pmu) {
+		vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+		vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+		freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+		vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+		vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+		vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+		vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+		vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+		vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+		vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+		vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+		vcpu->arch.sdar = mfspr(SPRN_SDAR);
+		vcpu->arch.siar = mfspr(SPRN_SIAR);
+		vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+			vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+			vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+		}
+	} else {
+		freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+	}
+}
+
+static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+	if (ppc_get_pmu_inuse()) {
+		mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+		mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+		mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+		mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+		mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+		mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+		mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+		mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+		mtspr(SPRN_SDAR, host_os_sprs->sdar);
+		mtspr(SPRN_SIAR, host_os_sprs->siar);
+		mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+			mtspr(SPRN_SIER2, host_os_sprs->sier2);
+			mtspr(SPRN_SIER3, host_os_sprs->sier3);
+		}
+
+		/* Set MMCRA then MMCR0 last */
+		mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+		mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+		isync();
+	}
+}
+
 static void load_spr_state(struct kvm_vcpu *vcpu)
 {
 	mtspr(SPRN_DSCR, vcpu->arch.dscr);
@@ -3677,17 +3859,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
 	vcpu->arch.dscr = mfspr(SPRN_DSCR);
 }
 
-/*
- * Privileged (non-hypervisor) host registers to save.
- */
-struct p9_host_os_sprs {
-	unsigned long dscr;
-	unsigned long tidr;
-	unsigned long iamr;
-	unsigned long amr;
-	unsigned long fscr;
-};
-
 static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
 {
 	host_os_sprs->dscr = mfspr(SPRN_DSCR);
@@ -3735,7 +3906,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	struct p9_host_os_sprs host_os_sprs;
 	s64 dec;
 	u64 tb, next_timer;
-	int trap, save_pmu;
+	int trap;
 
 	WARN_ON_ONCE(vcpu->arch.ceded);
 
@@ -3748,7 +3919,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	save_p9_host_os_sprs(&host_os_sprs);
 
-	kvmhv_save_host_pmu();		/* saves it to PACA kvm_hstate */
+	save_p9_host_pmu(&host_os_sprs);
 
 	kvmppc_subcore_enter_guest();
 
@@ -3776,7 +3947,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		}
 	}
 #endif
-	kvmhv_load_guest_pmu(vcpu);
+	load_p9_guest_pmu(vcpu);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	load_fp_state(&vcpu->arch.fp);
@@ -3898,16 +4069,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
-	save_pmu = 1;
 	if (vcpu->arch.vpa.pinned_addr) {
 		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
 		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
 		lp->yield_count = cpu_to_be32(yield_count);
 		vcpu->arch.vpa.dirty = 1;
-		save_pmu = lp->pmcregs_in_use;
 	}
 
-	kvmhv_save_guest_pmu(vcpu, save_pmu);
+	save_p9_guest_pmu(vcpu);
 #ifdef CONFIG_PPC_PSERIES
 	if (kvmhv_on_pseries())
 		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
@@ -3920,7 +4089,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
-	kvmhv_load_host_pmu();
+	load_p9_host_pmu(&host_os_sprs);
 
 	kvmppc_subcore_exit_guest();
 
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 4444f83cb133..59d89e4b154a 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtlr	r0
 	blr
 
-_GLOBAL(kvmhv_save_host_pmu)
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+kvmhv_save_host_pmu:
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
 	li	r3, -1
@@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
 	std	r8, HSTATE_MMCR2(r13)
 	std	r9, HSTATE_SIER(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_MMCR3
-	mfspr	r6, SPRN_SIER2
-	mfspr	r7, SPRN_SIER3
-	std	r5, HSTATE_MMCR3(r13)
-	std	r6, HSTATE_SIER2(r13)
-	std	r7, HSTATE_SIER3(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	mfspr	r3, SPRN_PMC1
 	mfspr	r5, SPRN_PMC2
 	mfspr	r6, SPRN_PMC3
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 007f87b97184..0eb06734bc26 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2780,10 +2780,11 @@ kvmppc_msr_interrupt:
 	blr
 
 /*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
  * Load up guest PMU state.  R3 points to the vcpu struct.
  */
-_GLOBAL(kvmhv_load_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+kvmhv_load_guest_pmu:
 	mr	r4, r3
 	mflr	r0
 	li	r3, 1
@@ -2817,27 +2818,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	mtspr	SPRN_MMCRA, r6
 	mtspr	SPRN_SIAR, r7
 	mtspr	SPRN_SDAR, r8
-BEGIN_FTR_SECTION
-	ld      r5, VCPU_MMCR + 24(r4)
-	ld      r6, VCPU_SIER + 8(r4)
-	ld      r7, VCPU_SIER + 16(r4)
-	mtspr   SPRN_MMCR3, r5
-	mtspr   SPRN_SIER2, r6
-	mtspr   SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 BEGIN_FTR_SECTION
 	ld	r5, VCPU_MMCR + 16(r4)
 	ld	r6, VCPU_SIER(r4)
 	mtspr	SPRN_MMCR2, r5
 	mtspr	SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
 	lwz	r7, VCPU_PMC + 24(r4)
 	lwz	r8, VCPU_PMC + 28(r4)
 	ld	r9, VCPU_MMCRS(r4)
 	mtspr	SPRN_SPMC1, r7
 	mtspr	SPRN_SPMC2, r8
 	mtspr	SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_MMCR0, r3
 	isync
@@ -2845,10 +2836,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	blr
 
 /*
+ * void kvmhv_load_host_pmu(void)
+ *
  * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
  */
-_GLOBAL(kvmhv_load_host_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+kvmhv_load_host_pmu:
 	mflr	r0
 	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
 	cmpwi	r4, 0
@@ -2886,25 +2878,18 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_MMCR2, r8
 	mtspr	SPRN_SIER, r9
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	ld      r5, HSTATE_MMCR3(r13)
-	ld      r6, HSTATE_SIER2(r13)
-	ld      r7, HSTATE_SIER3(r13)
-	mtspr   SPRN_MMCR3, r5
-	mtspr   SPRN_SIER2, r6
-	mtspr   SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	mtspr	SPRN_MMCR0, r3
 	isync
 	mtlr	r0
 23:	blr
 
 /*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
  * Save guest PMU state into the vcpu struct.
  * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
  */
-_GLOBAL(kvmhv_save_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+kvmhv_save_guest_pmu:
 	mr	r9, r3
 	mr	r8, r4
 BEGIN_FTR_SECTION
@@ -2953,14 +2938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
 	std	r10, VCPU_MMCR + 16(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	mfspr   r5, SPRN_MMCR3
-	mfspr   r6, SPRN_SIER2
-	mfspr   r7, SPRN_SIER3
-	std     r5, VCPU_MMCR + 24(r9)
-	std     r6, VCPU_SIER + 8(r9)
-	std     r7, VCPU_SIER + 16(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	std	r7, VCPU_SIAR(r9)
 	std	r8, VCPU_SDAR(r9)
 	mfspr	r3, SPRN_PMC1
@@ -2978,7 +2955,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_SIER
 	std	r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
 	mfspr	r8, SPRN_MMCRS
@@ -2987,7 +2963,6 @@ BEGIN_FTR_SECTION_NESTED(96)
 	std	r8, VCPU_MMCRS(r9)
 	lis	r4, 0x8000
 	mtspr	SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 22:	blr
 
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 12/43] KVM: PPC: Book3S HV P9: Factor out yield_count increment
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Factor duplicated code into a helper function.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b1b94b3563b7..38d8afa16839 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3896,6 +3896,16 @@ static inline bool hcall_is_xics(unsigned long req)
 		req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
 }
 
+static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
+{
+	struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+	if (lp) {
+		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+		lp->yield_count = cpu_to_be32(yield_count);
+		vcpu->arch.vpa.dirty = 1;
+	}
+}
+
 /*
  * Guest entry for POWER9 and later CPUs.
  */
@@ -3926,12 +3936,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->entry_exit_map = 1;
 	vc->in_guest = 1;
 
-	if (vcpu->arch.vpa.pinned_addr) {
-		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
-		lp->yield_count = cpu_to_be32(yield_count);
-		vcpu->arch.vpa.dirty = 1;
-	}
+	vcpu_vpa_increment_dispatch(vcpu);
 
 	if (cpu_has_feature(CPU_FTR_TM) ||
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
@@ -4069,12 +4074,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
-	if (vcpu->arch.vpa.pinned_addr) {
-		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
-		lp->yield_count = cpu_to_be32(yield_count);
-		vcpu->arch.vpa.dirty = 1;
-	}
+	vcpu_vpa_increment_dispatch(vcpu);
 
 	save_p9_guest_pmu(vcpu);
 #ifdef CONFIG_PPC_PSERIES
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 13/43] KVM: PPC: Book3S HV P9: Factor PMU save/load into context switch functions
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Rather than guest/host save/retsore functions, implement context switch
functions that take care of details like the VPA update for nested.

The reason to split these kind of helpers into explicit save/load
functions is mainly to schedule SPR access nicely, but PMU is a special
case where the load requires mtSPR (to stop counters) and other
difficulties, so there's less possibility to schedule those nicely. The
SPR accesses also have side-effects if the PMU is running, and in later
changes we keep the host PMU running as long as possible so this code
can be better profiled, which also complicates scheduling.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 51 ++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 38d8afa16839..13b8389b0479 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3690,7 +3690,8 @@ static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
 	isync();
 }
 
-static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+				struct p9_host_os_sprs *host_os_sprs)
 {
 	if (ppc_get_pmu_inuse()) {
 		/*
@@ -3724,10 +3725,19 @@ static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
 			host_os_sprs->sier3 = mfspr(SPRN_SIER3);
 		}
 	}
-}
 
-static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
-{
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		if (vcpu->arch.vpa.pinned_addr) {
+			struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+			get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+		} else {
+			get_lppaca()->pmcregs_in_use = 1;
+		}
+	}
+#endif
+
+	/* load guest */
 	mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
 	mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
 	mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
@@ -3752,7 +3762,8 @@ static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
 	/* No isync necessary because we're starting counters */
 }
 
-static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
+static void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+				struct p9_host_os_sprs *host_os_sprs)
 {
 	struct lppaca *lp;
 	int save_pmu = 1;
@@ -3787,10 +3798,12 @@ static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
 	} else {
 		freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
 	}
-}
 
-static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
-{
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries())
+		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+#endif
+
 	if (ppc_get_pmu_inuse()) {
 		mtspr(SPRN_PMC1, host_os_sprs->pmc1);
 		mtspr(SPRN_PMC2, host_os_sprs->pmc2);
@@ -3929,8 +3942,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	save_p9_host_os_sprs(&host_os_sprs);
 
-	save_p9_host_pmu(&host_os_sprs);
-
 	kvmppc_subcore_enter_guest();
 
 	vc->entry_exit_map = 1;
@@ -3942,17 +3953,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
-#ifdef CONFIG_PPC_PSERIES
-	if (kvmhv_on_pseries()) {
-		if (vcpu->arch.vpa.pinned_addr) {
-			struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-			get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
-		} else {
-			get_lppaca()->pmcregs_in_use = 1;
-		}
-	}
-#endif
-	load_p9_guest_pmu(vcpu);
+	switch_pmu_to_guest(vcpu, &host_os_sprs);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	load_fp_state(&vcpu->arch.fp);
@@ -4076,11 +4077,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	vcpu_vpa_increment_dispatch(vcpu);
 
-	save_p9_guest_pmu(vcpu);
-#ifdef CONFIG_PPC_PSERIES
-	if (kvmhv_on_pseries())
-		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
-#endif
+	switch_pmu_to_host(vcpu, &host_os_sprs);
 
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
@@ -4089,8 +4086,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
-	load_p9_host_pmu(&host_os_sprs);
-
 	kvmppc_subcore_exit_guest();
 
 	return trap;
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 14/43] KVM: PPC: Book3S HV P9: Demand fault PMU SPRs when marked not inuse
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

The pmcregs_in_use field in the guest VPA can not be trusted to reflect
what the guest is doing with PMU SPRs, so the PMU must always be managed
(stopped) when exiting the guest, and SPR values set when entering the
guest to ensure it can't cause a covert channel or otherwise cause other
guests or the host to misbehave.

So prevent guest access to the PMU with HFSCR[PM] if pmcregs_in_use is
clear, and avoid the PMU SPR access on every partition switch. Guests
that set pmcregs_in_use incorrectly or when first setting it and using
the PMU will take a hypervisor facility unavailable interrupt that will
bring in the PMU SPRs.

-774 cycles (7759) cycles POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/kvm_host.h |   1 +
 arch/powerpc/kvm/book3s_hv.c        | 122 ++++++++++++++++++++++------
 arch/powerpc/kvm/book3s_hv_nested.c |  12 ++-
 3 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7e4c3a741951..5c003a5ff854 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -819,6 +819,7 @@ struct kvm_vcpu_arch {
 	/* For support of nested guests */
 	struct kvm_nested_guest *nested;
 	u32 nested_vcpu_id;
+	u64 nested_hfscr;
 	gpa_t nested_io_gpr;
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 13b8389b0479..0733bb95f439 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1349,6 +1349,20 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+/*
+ * If the lppaca had pmcregs_in_use clear when we exited the guest, then
+ * HFSCR_PM is cleared for next entry. If the guest then tries to access
+ * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
+ * back in the guest HFSCR will cause the next entry to load the PMU SPRs and
+ * allow the guest access to continue.
+ */
+static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hfscr |= HFSCR_PM;
+
+	return RESUME_GUEST;
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -1618,16 +1632,22 @@ XXX benchmark guest exits
 	 * to emulate.
 	 * Otherwise, we just generate a program interrupt to the guest.
 	 */
-	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
 		r = EMULATE_FAIL;
-		if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
-		    cpu_has_feature(CPU_FTR_ARCH_300))
-			r = kvmppc_emulate_doorbell_instr(vcpu);
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			unsigned long cause = vcpu->arch.hfscr >> 56;
+
+			if (cause == FSCR_MSGP_LG)
+				r = kvmppc_emulate_doorbell_instr(vcpu);
+			if (cause == FSCR_PM_LG)
+				r = kvmppc_pmu_unavailable(vcpu);
+		}
 		if (r == EMULATE_FAIL) {
 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 			r = RESUME_GUEST;
 		}
 		break;
+	}
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
@@ -1734,6 +1754,19 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		break;
 
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+		unsigned long cause = vcpu->arch.hfscr >> 56;
+
+		r = EMULATE_FAIL;
+		if (cause == FSCR_PM_LG && (vcpu->arch.nested_hfscr & HFSCR_PM))
+			r = kvmppc_pmu_unavailable(vcpu);
+
+		if (r == EMULATE_FAIL)
+			r = RESUME_HOST;
+
+		break;
+	}
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
 		/*
@@ -3693,6 +3726,17 @@ static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
 static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
 				struct p9_host_os_sprs *host_os_sprs)
 {
+	struct lppaca *lp;
+	int load_pmu = 1;
+
+	lp = vcpu->arch.vpa.pinned_addr;
+	if (lp)
+		load_pmu = lp->pmcregs_in_use;
+
+	if (load_pmu)
+	      vcpu->arch.hfscr |= HFSCR_PM;
+
+	/* Save host */
 	if (ppc_get_pmu_inuse()) {
 		/*
 		 * It might be better to put PMU handling (at least for the
@@ -3737,29 +3781,31 @@ static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
 	}
 #endif
 
-	/* load guest */
-	mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
-	mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
-	mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
-	mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
-	mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
-	mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
-	mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
-	mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
-	mtspr(SPRN_SDAR, vcpu->arch.sdar);
-	mtspr(SPRN_SIAR, vcpu->arch.siar);
-	mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+	/* Load guest */
+	if (vcpu->arch.hfscr & HFSCR_PM) {
+		mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+		mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+		mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+		mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+		mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+		mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+		mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+		mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+		mtspr(SPRN_SDAR, vcpu->arch.sdar);
+		mtspr(SPRN_SIAR, vcpu->arch.siar);
+		mtspr(SPRN_SIER, vcpu->arch.sier[0]);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-		mtspr(SPRN_MMCR3, vcpu->arch.mmcr[4]);
-		mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
-		mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
-	}
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCR3, vcpu->arch.mmcr[4]);
+			mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+			mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+		}
 
-	/* Set MMCRA then MMCR0 last */
-	mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
-	mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
-	/* No isync necessary because we're starting counters */
+		/* Set MMCRA then MMCR0 last */
+		mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+		mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+		/* No isync necessary because we're starting counters */
+	}
 }
 
 static void switch_pmu_to_host(struct kvm_vcpu *vcpu,
@@ -3795,9 +3841,31 @@ static void switch_pmu_to_host(struct kvm_vcpu *vcpu,
 			vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
 			vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
 		}
-	} else {
+
+	} else if (vcpu->arch.hfscr & HFSCR_PM) {
+		/*
+		 * The guest accessed PMC SPRs without specifying they should
+		 * be preserved. Stop them from counting if the guest had
+		 * started anything.
+		 */
 		freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
-	}
+
+		/*
+		 * Demand-fault PMU register access in the guest.
+		 *
+		 * This is used to grab the guest's VPA pmcregs_in_use value
+		 * and reflect it into the host's VPA in the case of a nested
+		 * hypervisor.
+		 *
+		 * It also avoids having to zero-out SPRs after each guest
+		 * exit to avoid side-channels when.
+		 *
+		 * This is cleared here when we exit the guest, so later HFSCR
+		 * interrupt handling can add it back to run the guest with
+		 * PM enabled next time.
+		 */
+		vcpu->arch.hfscr &= ~HFSCR_PM;
+	} /* otherwise the PMU should still be frozen from guest entry */
 
 #ifdef CONFIG_PPC_PSERIES
 	if (kvmhv_on_pseries())
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 60724f674421..6add13a22f56 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -103,7 +103,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	hr->dpdes = vc->dpdes;
-	hr->hfscr = vcpu->arch.hfscr;
+	hr->hfscr = vcpu->arch.nested_hfscr;
 	hr->purr = vcpu->arch.purr;
 	hr->spurr = vcpu->arch.spurr;
 	hr->ic = vcpu->arch.ic;
@@ -126,6 +126,10 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
 		hr->asdr = vcpu->arch.fault_gpa;
 		break;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		hr->hfscr &= ~HFSCR_INTR_CAUSE;
+		hr->hfscr |= vcpu->arch.hfscr & HFSCR_INTR_CAUSE;
+		break;
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 		hr->heir = vcpu->arch.emul_inst;
 		break;
@@ -161,9 +165,10 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 
 	/*
 	 * Don't let L1 enable features for L2 which we've disabled for L1,
-	 * but preserve the interrupt cause field.
+	 * but preserve the interrupt cause field and facilities that might
+	 * be disabled for demand faulting in the L1.
 	 */
-	hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+	hr->hfscr &= (HFSCR_INTR_CAUSE | HFSCR_PM | vcpu->arch.hfscr);
 
 	/* Don't let data address watchpoint match in hypervisor state */
 	hr->dawrx0 &= ~DAWRX_HYP;
@@ -342,6 +347,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	/* set L1 state to L2 state */
 	vcpu->arch.nested = l2;
 	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+	vcpu->arch.nested_hfscr = l2_hv.hfscr;
 	vcpu->arch.regs = l2_regs;
 
 	/* Guest must always run with ME enabled, HV disabled. */
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 15/43] KVM: PPC: Book3S HV: CTRL SPR does not require read-modify-write
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Processors that support KVM HV do not require read-modify-write of
the CTRL SPR to set/clear their thread's runlatch. Just write 1 or 0
to it.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c            |  2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0733bb95f439..f0298b286c42 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3920,7 +3920,7 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 	 */
 
 	if (!(vcpu->arch.ctrl & 1))
-		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+		mtspr(SPRN_CTRLT, 0);
 }
 
 static void store_spr_state(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0eb06734bc26..488a1e07958c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -775,12 +775,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_AMR,r5
 	mtspr	SPRN_UAMOR,r6
 
-	/* Restore state of CTRL run bit; assume 1 on entry */
+	/* Restore state of CTRL run bit; the host currently has it set to 1 */
 	lwz	r5,VCPU_CTRL(r4)
 	andi.	r5,r5,1
 	bne	4f
-	mfspr	r6,SPRN_CTRLF
-	clrrdi	r6,r6,1
+	li	r6,0
 	mtspr	SPRN_CTRLT,r6
 4:
 	/* Secondary threads wait for primary to have done partition switch */
@@ -1209,12 +1208,12 @@ guest_bypass:
 	stw	r0, VCPU_CPU(r9)
 	stw	r0, VCPU_THREAD_CPU(r9)
 
-	/* Save guest CTRL register, set runlatch to 1 */
+	/* Save guest CTRL register, set runlatch to 1 if it was clear */
 	mfspr	r6,SPRN_CTRLF
 	stw	r6,VCPU_CTRL(r9)
 	andi.	r0,r6,1
 	bne	4f
-	ori	r6,r6,1
+	li	r6,1
 	mtspr	SPRN_CTRLT,r6
 4:
 	/*
@@ -2220,8 +2219,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	 * Also clear the runlatch bit before napping.
 	 */
 kvm_do_nap:
-	mfspr	r0, SPRN_CTRLF
-	clrrdi	r0, r0, 1
+	li	r0,0
 	mtspr	SPRN_CTRLT, r0
 
 	li	r0,1
@@ -2240,8 +2238,7 @@ kvm_nap_sequence:		/* desired LPCR value in r5 */
 
 	bl	isa206_idle_insn_mayloss
 
-	mfspr	r0, SPRN_CTRLF
-	ori	r0, r0, 1
+	li	r0,1
 	mtspr	SPRN_CTRLT, r0
 
 	mtspr	SPRN_SRR1, r3
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 16/43] KVM: PPC: Book3S HV P9: Move SPRG restore to restore_p9_host_os_sprs
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Move the SPR update into its relevant helper function. This will
help with SPR scheduling improvements in later changes.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f0298b286c42..73a8b45249e8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3953,6 +3953,8 @@ static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
 static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 				    struct p9_host_os_sprs *host_os_sprs)
 {
+	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
+
 	mtspr(SPRN_PSPB, 0);
 	mtspr(SPRN_UAMOR, 0);
 
@@ -4152,8 +4154,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	timer_rearm_host_dec(tb);
 
-	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
-
 	kvmppc_subcore_exit_guest();
 
 	return trap;
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 17/43] KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

This reduces the number of mtmsrd required to enable facility bits when
saving/restoring registers, by having the KVM code set all bits up front
rather than using individual facility functions that set their particular
MSR bits.

-42 cycles (7803) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/process.c         | 24 +++++++++++
 arch/powerpc/kvm/book3s_hv.c          | 57 ++++++++++++++++++---------
 arch/powerpc/kvm/book3s_hv_p9_entry.c |  1 +
 3 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 89e34aa273e2..dfce089ac424 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -592,6 +592,30 @@ static void save_all(struct task_struct *tsk)
 	msr_check_and_clear(msr_all_available);
 }
 
+void save_user_regs_kvm(void)
+{
+	unsigned long usermsr;
+
+	if (!current->thread.regs)
+		return;
+
+	usermsr = current->thread.regs->msr;
+
+	if (usermsr & MSR_FP)
+		save_fpu(current);
+
+	if (usermsr & MSR_VEC)
+		save_altivec(current);
+
+	if (usermsr & MSR_TM) {
+                current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+                current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+                current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+                current->thread.regs->msr &= ~MSR_TM;
+	}
+}
+EXPORT_SYMBOL_GPL(save_user_regs_kvm);
+
 void flush_all_to_thread(struct task_struct *tsk)
 {
 	if (tsk->thread.regs) {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 73a8b45249e8..3ac5dbdb59f8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3999,6 +3999,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	struct p9_host_os_sprs host_os_sprs;
 	s64 dec;
 	u64 tb, next_timer;
+	unsigned long msr;
 	int trap;
 
 	WARN_ON_ONCE(vcpu->arch.ceded);
@@ -4010,8 +4011,23 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (next_timer < time_limit)
 		time_limit = next_timer;
 
+	vcpu->arch.ceded = 0;
+
 	save_p9_host_os_sprs(&host_os_sprs);
 
+	/* MSR bits may have been cleared by context switch */
+	msr = 0;
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr |= MSR_FP;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr |= MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr |= MSR_VSX;
+	if (cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+		msr |= MSR_TM;
+	msr = msr_check_and_set(msr);
+
 	kvmppc_subcore_enter_guest();
 
 	vc->entry_exit_map = 1;
@@ -4025,7 +4041,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	switch_pmu_to_guest(vcpu, &host_os_sprs);
 
-	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	load_fp_state(&vcpu->arch.fp);
 #ifdef CONFIG_ALTIVEC
 	load_vr_state(&vcpu->arch.vr);
@@ -4134,7 +4149,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	restore_p9_host_os_sprs(vcpu, &host_os_sprs);
 
-	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	store_fp_state(&vcpu->arch.fp);
 #ifdef CONFIG_ALTIVEC
 	store_vr_state(&vcpu->arch.vr);
@@ -4663,6 +4677,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 	goto done;
 }
 
+void save_user_regs_kvm(void);
+
 static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
@@ -4672,19 +4688,24 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	unsigned long user_tar = 0;
 	unsigned int user_vrsave;
 	struct kvm *kvm;
+	unsigned long msr;
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return -EINVAL;
 	}
 
+	/* No need to go into the guest when all we'll do is come back out */
+	if (signal_pending(current)) {
+		run->exit_reason = KVM_EXIT_INTR;
+		return -EINTR;
+	}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/*
 	 * Don't allow entry with a suspended transaction, because
 	 * the guest entry/exit code will lose it.
-	 * If the guest has TM enabled, save away their TM-related SPRs
-	 * (they will get restored by the TM unavailable interrupt).
 	 */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
 	    (current->thread.regs->msr & MSR_TM)) {
 		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
@@ -4692,12 +4713,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 			run->fail_entry.hardware_entry_failure_reason = 0;
 			return -EINVAL;
 		}
-		/* Enable TM so we can read the TM SPRs */
-		mtmsr(mfmsr() | MSR_TM);
-		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
-		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
-		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
-		current->thread.regs->msr &= ~MSR_TM;
 	}
 #endif
 
@@ -4712,18 +4727,24 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 
 	kvmppc_core_prepare_to_enter(vcpu);
 
-	/* No need to go into the guest when all we'll do is come back out */
-	if (signal_pending(current)) {
-		run->exit_reason = KVM_EXIT_INTR;
-		return -EINTR;
-	}
-
 	kvm = vcpu->kvm;
 	atomic_inc(&kvm->arch.vcpus_running);
 	/* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
 	smp_mb();
 
-	flush_all_to_thread(current);
+	msr = 0;
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr |= MSR_FP;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr |= MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr |= MSR_VSX;
+	if (cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+		msr |= MSR_TM;
+	msr = msr_check_and_set(msr);
+
+	save_user_regs_kvm();
 
 	/* Save userspace EBB and other register values */
 	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index a3281f0c9214..065bfd4d2c63 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -224,6 +224,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 		vc->tb_offset_applied = vc->tb_offset;
 	}
 
+	/* Could avoid mfmsr by passing around, but probably no big deal */
 	msr = mfmsr();
 
 	host_hfscr = mfspr(SPRN_HFSCR);
-- 
2.23.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox