All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 07/23] Add SLB switching code for entry/exit
@ 2009-07-07 14:17 Alexander Graf
  2009-07-08  4:38 ` Benjamin Herrenschmidt
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Alexander Graf @ 2009-07-07 14:17 UTC (permalink / raw)
  To: kvm-ppc

This is the really low level of guest entry/exit code.

Usually the Linux kernel resides in virtual memory 0xc000000000000000 to
0xffffffffffffffff. These addresses are mapped into every userspace
application.

When going into a 32 bit guest, this is perfectly fine. That one can't
access memory that high anyways.

Going into a 64 bit guest, the guest kernel probably is in the same
virtual memory region as the host, so we need to switch between those two.

During normal entry code we're in those virtual addresses though. So
we need a small wrapper in real memory that switches from host to guest
high SLB state and vice versa.

To store both host and guest state in the SLB, we store guest kernel SLB
entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff).

For details on which entries go where, please see the patch itself.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/970_slb.S |  456 ++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 456 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/kvm/970_slb.S

diff --git a/arch/powerpc/kvm/970_slb.S b/arch/powerpc/kvm/970_slb.S
new file mode 100644
index 0000000..6fd5740
--- /dev/null
+++ b/arch/powerpc/kvm/970_slb.S
@@ -0,0 +1,456 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/* Switch the SLB to the guest's context
+ *
+ * This function switches the SLB's contents to the guest's. This is achieved
+ * by switching Linux's kernel segments to unused segments and the guest's kernel
+ * segments from unused to kernel segments.
+ *
+ * Looking at the most significant nybble, we do the following on enty:
+ *
+ * 0  0000
+ * 1  0001
+ * 2  0010
+ * 3  0011
+ *
+ * 4  0100		->	c 1100
+ * 5  0101		->	d 1101
+ * 6  0110		->	e 1110
+ * 7  0111		->	f 1111
+ *
+ * 8  1000
+ * 9  1001
+ * a  1010
+ * b  1011
+ *
+ * c  1100		->	8 1000
+ * d  1101		->	9 1001
+ * e  1110		->	a 1010
+ * f  1111		->	b 1011
+ *
+ *
+ * That way we can run Linux as a guest, even though Linux is occupying the
+ * segments in question as host already.
+ *
+ * Required state:
+ *
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R0 = free
+ * R9 = guest IP
+ * R10 = guest MSR
+ * R11 = free
+ * R12 = free
+ * PACA[PACA_EXMC + EX_R9] = guest R9
+ * PACA[PACA_EXMC + EX_R10] = guest R10
+ * PACA[PACA_EXMC + EX_R11] = guest R11
+ * PACA[PACA_EXMC + EX_R12] = guest R12
+ * PACA[PACA_EXMC + EX_R13] = guest R13
+ * PACA[PACA_EXMC + EX_CCR] = guest CR
+ * PACA[PACA_EXMC + EX_R3] = guest XER
+ * SPRG1 = guest R0
+ */
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+	mtsrr0	r9
+	mtsrr1	r10
+
+	/* Don't mess with SLB magic for 32 bit guests */
+	rldicl.	r10, r10, 1, 63
+	beq	slb_do_enter
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		-
+	 * 9  1001		-
+	 * a  1010		-
+	 * b  1011		-
+	 *
+	 * c  1100		HOST 1100
+	 * d  1101		HOST 1101
+	 * e  1110		HOST 1110
+	 * f  1111		HOST 1111
+	 */
+
+	/* Replace 11xx -> 10xx */
+
+	/* for (r11 = 0; r11 < slb_entries; r11++) */
+	li	r11, 0
+slb_loop_11xx_10xx:
+
+	/* r10 = esid(r11) */
+	slbmfee	r10, r11
+	/* r0 = leftmost 2 bits of esid */
+	rldicl	r0, r10, 2, 62
+	/* esid & 0xc... = 0xc... */
+	cmpwi	r0, 3
+	/* no? skip it then */
+	bne+	slb_loop_11xx_10xx_skip
+	/* Skip invalid entries (V=0) */
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_11xx_10xx_skip
+	/* r9 = VSID */
+	slbmfev r9, r11
+	/* r0 = esid & ESID_MASK */
+	rldicr	r0, r10, 0, 35
+	/* r0 |= CLASS_BIT(VSID) */
+	rldic	r12, r9, 56 - 36, 36
+	or	r0, r0, r12
+	/* slbie(r0) */
+	slbie	r0
+	/* r0 = esid & 0xb... */
+	rldicr	r0, r10, 2, 62
+	rotrdi	r0, r0, 2
+	/* r0 |= r11 */
+	or	r0, r0, r11
+	/* slbmte(r9, r0) */
+	slbmte	r9, r0
+slb_loop_11xx_10xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_1)
+	cmpwi	r11, 0
+	blt	slb_loop_11xx_10xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		-
+	 * d  1101		-
+	 * e  1110		-
+	 * f  1111		-
+	 */
+
+	/* Replace 01xx -> 11xx */
+
+	lis	r12, 0x8000000000000000@highest
+	rldicr  r12, r12, 32, 31
+	li	r11, 0
+slb_loop_01xx_11xx:
+	slbmfee	r10, r11
+	rldicl	r0, r10, 2, 62
+	cmpwi	r0, 1
+	bne+	slb_loop_01xx_11xx_skip
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_01xx_11xx_skip
+	slbmfev	r9, r11
+	/* r0 = esid | 0x8... */
+	or	r0, r12, r10
+	or	r0, r0, r11
+	slbmte	r9, r0
+slb_loop_01xx_11xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_2)
+	cmpwi	r11, 0
+	blt	slb_loop_01xx_11xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		-
+	 * 5  0101		-
+	 * 6  0110		-
+	 * 7  0111		-
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		GUEST 1100
+	 * d  1101		GUEST 1101
+	 * e  1110		GUEST 1110
+	 * f  1111		GUEST 1111
+	 */
+
+slb_do_enter:
+
+	/* Enter guest */
+
+	mfspr	r0, SPRN_SPRG1
+
+	ld	r9, (PACA_EXMC+EX_R9)(r13)
+	ld	r10, (PACA_EXMC+EX_R10)(r13)
+	ld	r12, (PACA_EXMC+EX_R12)(r13)
+
+	lwz	r11, (PACA_EXMC+EX_CCR)(r13)
+	mtcr	r11
+
+	ld	r11, (PACA_EXMC+EX_R3)(r13)
+	mtxer	r11
+
+	ld	r11, (PACA_EXMC+EX_R11)(r13)
+	ld	r13, (PACA_EXMC+EX_R13)(r13)
+
+	RFI
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+
+	/* Register usage at this point:
+	 *
+	 * SPRG0 = reserved
+	 * SPRG1 = guest R13
+	 * SPRG2 = guest CR
+	 * SPRG3 = virt. PACA
+	 * R01   = host R1
+	 * R02   = host R2
+	 * R10   = guest PC
+	 * R11   = guest MSR
+	 * R12   = exit handler id
+	 * R13   = PACA
+	 * PACA.exmc.R9  = guest R1
+	 * PACA.exmc.R10 = guest R10
+	 * PACA.exmc.R11 = guest R11
+	 * PACA.exmc.R12 = guest R12
+	 * PACA.exmc.R13 = guest R2
+	 *
+	 */
+
+	/* Save registers */
+
+	std	r0, (PACA_EXMC+EX_SRR0)(r13)
+	std	r8, (PACA_EXMC+EX_DSISR)(r13)
+	std	r9, (PACA_EXMC+EX_R3)(r13)
+	std	r10, (PACA_EXMC+EX_LR)(r13)
+	std	r11, (PACA_EXMC+EX_DAR)(r13)
+
+	/*
+	 * In order for us to easily get the last instruction,
+	 * we got the #vmexit at, we exploit the fact that the
+	 * virtual layout is still the same here, so we can just
+	 * ld from the guest's PC address
+	 */
+
+	/* We only load the last instruction when it's safe */
+	cmpwi	r12, PPC970_INTERRUPT_DATA_STORAGE
+	beq	ld_last_inst
+	cmpwi	r12, PPC970_INTERRUPT_PROGRAM
+	beq	ld_last_inst
+
+	b	no_ld_last_inst
+
+ld_last_inst:
+	/* Save off the guest instruction we're at */
+	/*    1) enable paging for data */
+	mfmsr	r0
+	ori	r8, r0, MSR_DR			/* Enable paging for data */
+	mtmsr	r8
+	/*    2) fetch the instruction */
+	lwz	r8, 0(r10)
+	/*    3) disable paging again */
+	mtmsr	r0
+
+no_ld_last_inst:
+
+	/* Don't mess with SLB magic for 32 bit guests */
+	rldicl.	r11, r11, 1, 63
+	beq	slb_do_exit
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		-
+	 * 5  0101		-
+	 * 6  0110		-
+	 * 7  0111		-
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		GUEST 1100
+	 * d  1101		GUEST 1101
+	 * e  1110		GUEST 1110
+	 * f  1111		GUEST 1111
+	 */
+
+	/* Replace 11xx -> 01xx */
+
+	li	r11, 0
+slb_loop_11xx_01xx:
+
+	slbmfee	r10, r11
+	/* esid & 0xc... = 0xc... */
+	rldicl	r0, r10, 2, 62
+	cmpwi	r0, 3
+	bne+	slb_loop_11xx_01xx_skip
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_11xx_01xx_skip
+	slbmfev r9, r11
+	/* r0 = esid & ESID_MASK */
+	rldicr	r0, r10, 0, 35
+	/* r0 |= CLASS_BIT(VSID) */
+	rldimi	r0, r9, 56 - 36, 36
+	/* slbie(r0) */
+	slbie	r0
+	/* r0 = esid & 0x7... */
+	rldicl	r0, r10, 0, 1
+	or	r0, r0, r11
+	/* slbmte(r9, r0) */
+	slbmte	r9, r0
+slb_loop_11xx_01xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_3)
+	cmpwi	r11, 0
+	blt	slb_loop_11xx_01xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		-
+	 * d  1101		-
+	 * e  1110		-
+	 * f  1111		-
+	 */
+
+
+	/* Replace 10xx -> 11xx */
+
+	li	r11, 0
+slb_loop_10xx_11xx:
+	slbmfee	r10, r11
+	rldicl	r0, r10, 2, 62
+	cmpwi	r0, 2
+	bne+	slb_loop_10xx_11xx_skip
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_10xx_11xx_skip
+	slbmfev	r9, r11
+	/* r0 = esid | 0x4... */
+	lis	r0, 0x4000000000000000@highest
+	rldicr  r0, r0, 32, 31
+	or	r0, r0, r10
+	or	r0, r0, r11
+	slbmte	r9, r0
+slb_loop_10xx_11xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_4)
+	cmpwi	r11, 0
+	blt	slb_loop_10xx_11xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		-
+	 * 9  1001		-
+	 * a  1010		-
+	 * b  1011		-
+	 *
+	 * c  1100		HOST 1100
+	 * d  1101		HOST 1101
+	 * e  1110		HOST 1110
+	 * f  1111		HOST 1111
+	 */
+
+slb_do_exit:
+
+	/* Restore registers */
+
+	ld	r11, (PACA_EXMC+EX_DAR)(r13)
+	ld	r10, (PACA_EXMC+EX_LR)(r13)
+	ld	r9, (PACA_EXMC+EX_R3)(r13)
+	/* Save last inst */
+	stw	r8, (PACA_EXMC+EX_LR)(r13)
+	/* Restore on */
+	ld	r8, (PACA_EXMC+EX_DSISR)(r13)
+
+	/* RFI into the highmem handler */
+	mfmsr	r0
+	ori	r0, r0, MSR_IR|MSR_DR|MSR_RI	/* Enable paging */
+	mtsrr1	r0
+	ld	r0, PACASAVEDMSR(r13)		/* Highmem handler address */
+	mtsrr0	r0
+
+	ld	r0, (PACA_EXMC+EX_SRR0)(r13)
+
+	RFI
+kvmppc_handler_trampoline_exit_end:
+
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 07/23] Add SLB switching code for entry/exit
  2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
@ 2009-07-08  4:38 ` Benjamin Herrenschmidt
  2009-07-08  7:23 ` Alexander Graf
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2009-07-08  4:38 UTC (permalink / raw)
  To: kvm-ppc

On Tue, 2009-07-07 at 16:17 +0200, Alexander Graf wrote:
> This is the really low level of guest entry/exit code.
> 
> Usually the Linux kernel resides in virtual memory 0xc000000000000000 to
> 0xffffffffffffffff. These addresses are mapped into every userspace
> application.
> 
> When going into a 32 bit guest, this is perfectly fine. That one can't
> access memory that high anyways.
> 
> Going into a 64 bit guest, the guest kernel probably is in the same
> virtual memory region as the host, so we need to switch between those two.
> 
> During normal entry code we're in those virtual addresses though. So
> we need a small wrapper in real memory that switches from host to guest
> high SLB state and vice versa.
> 
> To store both host and guest state in the SLB, we store guest kernel SLB
> entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff).
> 
> For details on which entries go where, please see the patch itself.

Note that we have an unused VSID bit at the moment on 64-bit afaik. We
could probably use that to differenciate guest kernel VSIDs from host
kernel VSIDs. That would avoid having to muck around with the EAs
themselves that much no ?

I'm not sure I understand exactly what you are doing here, we should
discuss this on IRC one of these days I suppose. But you should be able
to just get rid of the host kernel SLBs completely with some care, as
there are some critical code path where taking an exception without
having the SLB entry around for entry 0 and the kernel stack will
blow...

But just blow them off, and when returning to the kernel, just put back
the ones that are needed (aka slb_flush_and_rebolt). You will need to
play carefully with that though, look at the code in slb.c, as the real
pHyp hypervisor that may lie underneath will potentially muck around the
SLBs and will restore them occasionally from the special in-memory
shadows, so you probably want to switch the content of those too.

Of course none of that will work on legacy iSeries or Power3 but I think
we can safely say we don't care :-)

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 07/23] Add SLB switching code for entry/exit
  2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
  2009-07-08  4:38 ` Benjamin Herrenschmidt
@ 2009-07-08  7:23 ` Alexander Graf
  2009-07-08  7:43 ` Benjamin Herrenschmidt
  2009-07-16 13:30 ` Alexander Graf
  3 siblings, 0 replies; 5+ messages in thread
From: Alexander Graf @ 2009-07-08  7:23 UTC (permalink / raw)
  To: kvm-ppc


On 08.07.2009, at 06:38, Benjamin Herrenschmidt wrote:

> On Tue, 2009-07-07 at 16:17 +0200, Alexander Graf wrote:
>> This is the really low level of guest entry/exit code.
>>
>> Usually the Linux kernel resides in virtual memory  
>> 0xc000000000000000 to
>> 0xffffffffffffffff. These addresses are mapped into every userspace
>> application.
>>
>> When going into a 32 bit guest, this is perfectly fine. That one  
>> can't
>> access memory that high anyways.
>>
>> Going into a 64 bit guest, the guest kernel probably is in the same
>> virtual memory region as the host, so we need to switch between  
>> those two.
>>
>> During normal entry code we're in those virtual addresses though. So
>> we need a small wrapper in real memory that switches from host to  
>> guest
>> high SLB state and vice versa.
>>
>> To store both host and guest state in the SLB, we store guest  
>> kernel SLB
>> entries in a different range (0x40000000000000000 -  
>> 0x7ffffffffffffffff).
>>
>> For details on which entries go where, please see the patch itself.
>
> Note that we have an unused VSID bit at the moment on 64-bit afaik. We
> could probably use that to differenciate guest kernel VSIDs from host
> kernel VSIDs. That would avoid having to muck around with the EAs
> themselves that much no ?

Well, the problem is that we can't have two ESIDs for the same EA in  
the SLB. So what I tried was to have guest ESIDs and host ESIDs  
(PAGE_OFFSET+) live in the same SLB by removing the most significant  
bit of the guest ESID.

>
> I'm not sure I understand exactly what you are doing here, we should
> discuss this on IRC one of these days I suppose. But you should be  
> able
> to just get rid of the host kernel SLBs completely with some care, as
> there are some critical code path where taking an exception without
> having the SLB entry around for entry 0 and the kernel stack will
> blow...

Yeah, I've encountered quite a bunch of those :-).

> But just blow them off, and when returning to the kernel, just put  
> back
> the ones that are needed (aka slb_flush_and_rebolt). You will need to
> play carefully with that though, look at the code in slb.c, as the  
> real
> pHyp hypervisor that may lie underneath will potentially muck around  
> the
> SLBs and will restore them occasionally from the special in-memory
> shadows, so you probably want to switch the content of those too.

Yikes. So pHyp restores SLB entries from a shadow? Sounds like I need  
to mess with that one too :-(.

I'm not really fond of all the SLB switching code in general. Best  
case would probably be to have a host and guest shadow SLB in the RMA  
that the real mode code can take to switch the _full_ SLB.

That way we'd also get rid of the CONTEXT_GUEST stuff in the kernel  
module, where we are in Linux, but have guest SLB entries active  
already.

> Of course none of that will work on legacy iSeries or Power3 but I  
> think
> we can safely say we don't care :-)

Any reason it doesn't work on Power3? :-). It definitely does not work  
on iSeries, though the code could be made to work there FWIW.

Alex

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 07/23] Add SLB switching code for entry/exit
  2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
  2009-07-08  4:38 ` Benjamin Herrenschmidt
  2009-07-08  7:23 ` Alexander Graf
@ 2009-07-08  7:43 ` Benjamin Herrenschmidt
  2009-07-16 13:30 ` Alexander Graf
  3 siblings, 0 replies; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2009-07-08  7:43 UTC (permalink / raw)
  To: kvm-ppc

On Wed, 2009-07-08 at 09:23 +0200, Alexander Graf wrote:
> Well, the problem is that we can't have two ESIDs for the same EA in  
> the SLB. So what I tried was to have guest ESIDs and host ESIDs  
> (PAGE_OFFSET+) live in the same SLB by removing the most significant  
> bit of the guest ESID.

I'll reply to that later when I understand what you are doing better.

> Yikes. So pHyp restores SLB entries from a shadow? Sounds like I need  
> to mess with that one too :-(.

Right.

> I'm not really fond of all the SLB switching code in general. Best  
> case would probably be to have a host and guest shadow SLB in the RMA  
> that the real mode code can take to switch the _full_ SLB.
> 
> That way we'd also get rid of the CONTEXT_GUEST stuff in the kernel  
> module, where we are in Linux, but have guest SLB entries active  
> already.

Same comment as above :-) I'm sure together we can come up with
something quite optimal but I need to catch up a bit first :-)

> Any reason it doesn't work on Power3? :-). It definitely does not work  
> on iSeries, though the code could be made to work there FWIW.

On Power3 you'd have to implement STAB support instead of SLB (get
yourself a PowerPC Book3 pre-2.0 if that's available publically and you
are masochist :-)

On legacy iSeries, it's harder, you don't have access to real mode and
the hypervisor is more invasive. I wouldn't bother with these. Any
recent iSeries (POWER5 or later) uses pHyp and so shouldn't be a
problem.

Cheers
Ben.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 07/23] Add SLB switching code for entry/exit
  2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
                   ` (2 preceding siblings ...)
  2009-07-08  7:43 ` Benjamin Herrenschmidt
@ 2009-07-16 13:30 ` Alexander Graf
  3 siblings, 0 replies; 5+ messages in thread
From: Alexander Graf @ 2009-07-16 13:30 UTC (permalink / raw)
  To: kvm-ppc

This is the really low level of guest entry/exit code.

Usually the Linux kernel resides in virtual memory 0xc000000000000000 to
0xffffffffffffffff. These addresses are mapped into every userspace
application.

When going into a 32 bit guest, this is perfectly fine. That one can't
access memory that high anyways.

Going into a 64 bit guest, the guest kernel probably is in the same
virtual memory region as the host, so we need to switch between those two.

During normal entry code we're in those virtual addresses though. So
we need a small wrapper in real memory that switches from host to guest
high SLB state and vice versa.

To store both host and guest state in the SLB, we store guest kernel SLB
entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff).

For details on which entries go where, please see the patch itself.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_slb.S |  456 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 456 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_64_slb.S

diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
new file mode 100644
index 0000000..c5d2bf3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -0,0 +1,456 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/* Switch the SLB to the guest's context
+ *
+ * This function switches the SLB's contents to the guest's. This is achieved
+ * by switching Linux's kernel segments to unused segments and the guest's kernel
+ * segments from unused to kernel segments.
+ *
+ * Looking at the most significant nybble, we do the following on enty:
+ *
+ * 0  0000
+ * 1  0001
+ * 2  0010
+ * 3  0011
+ *
+ * 4  0100		->	c 1100
+ * 5  0101		->	d 1101
+ * 6  0110		->	e 1110
+ * 7  0111		->	f 1111
+ *
+ * 8  1000
+ * 9  1001
+ * a  1010
+ * b  1011
+ *
+ * c  1100		->	8 1000
+ * d  1101		->	9 1001
+ * e  1110		->	a 1010
+ * f  1111		->	b 1011
+ *
+ *
+ * That way we can run Linux as a guest, even though Linux is occupying the
+ * segments in question as host already.
+ *
+ * Required state:
+ *
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R0 = free
+ * R9 = guest IP
+ * R10 = guest MSR
+ * R11 = free
+ * R12 = free
+ * PACA[PACA_EXMC + EX_R9] = guest R9
+ * PACA[PACA_EXMC + EX_R10] = guest R10
+ * PACA[PACA_EXMC + EX_R11] = guest R11
+ * PACA[PACA_EXMC + EX_R12] = guest R12
+ * PACA[PACA_EXMC + EX_R13] = guest R13
+ * PACA[PACA_EXMC + EX_CCR] = guest CR
+ * PACA[PACA_EXMC + EX_R3] = guest XER
+ * SPRG1 = guest R0
+ */
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+	mtsrr0	r9
+	mtsrr1	r10
+
+	/* Don't mess with SLB magic for 32 bit guests */
+	rldicl.	r10, r10, 1, 63
+	beq	slb_do_enter
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		-
+	 * 9  1001		-
+	 * a  1010		-
+	 * b  1011		-
+	 *
+	 * c  1100		HOST 1100
+	 * d  1101		HOST 1101
+	 * e  1110		HOST 1110
+	 * f  1111		HOST 1111
+	 */
+
+	/* Replace 11xx -> 10xx */
+
+	/* for (r11 = 0; r11 < slb_entries; r11++) */
+	li	r11, 0
+slb_loop_11xx_10xx:
+
+	/* r10 = esid(r11) */
+	slbmfee	r10, r11
+	/* r0 = leftmost 2 bits of esid */
+	rldicl	r0, r10, 2, 62
+	/* esid & 0xc... = 0xc... */
+	cmpwi	r0, 3
+	/* no? skip it then */
+	bne+	slb_loop_11xx_10xx_skip
+	/* Skip invalid entries (V=0) */
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_11xx_10xx_skip
+	/* r9 = VSID */
+	slbmfev r9, r11
+	/* r0 = esid & ESID_MASK */
+	rldicr	r0, r10, 0, 35
+	/* r0 |= CLASS_BIT(VSID) */
+	rldic	r12, r9, 56 - 36, 36
+	or	r0, r0, r12
+	/* slbie(r0) */
+	slbie	r0
+	/* r0 = esid & 0xb... */
+	rldicr	r0, r10, 2, 62
+	rotrdi	r0, r0, 2
+	/* r0 |= r11 */
+	or	r0, r0, r11
+	/* slbmte(r9, r0) */
+	slbmte	r9, r0
+slb_loop_11xx_10xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_1)
+	cmpwi	r11, 0
+	blt	slb_loop_11xx_10xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		-
+	 * d  1101		-
+	 * e  1110		-
+	 * f  1111		-
+	 */
+
+	/* Replace 01xx -> 11xx */
+
+	lis	r12, 0x8000000000000000@highest
+	rldicr  r12, r12, 32, 31
+	li	r11, 0
+slb_loop_01xx_11xx:
+	slbmfee	r10, r11
+	rldicl	r0, r10, 2, 62
+	cmpwi	r0, 1
+	bne+	slb_loop_01xx_11xx_skip
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_01xx_11xx_skip
+	slbmfev	r9, r11
+	/* r0 = esid | 0x8... */
+	or	r0, r12, r10
+	or	r0, r0, r11
+	slbmte	r9, r0
+slb_loop_01xx_11xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_2)
+	cmpwi	r11, 0
+	blt	slb_loop_01xx_11xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		-
+	 * 5  0101		-
+	 * 6  0110		-
+	 * 7  0111		-
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		GUEST 1100
+	 * d  1101		GUEST 1101
+	 * e  1110		GUEST 1110
+	 * f  1111		GUEST 1111
+	 */
+
+slb_do_enter:
+
+	/* Enter guest */
+
+	mfspr	r0, SPRN_SPRG1
+
+	ld	r9, (PACA_EXMC+EX_R9)(r13)
+	ld	r10, (PACA_EXMC+EX_R10)(r13)
+	ld	r12, (PACA_EXMC+EX_R12)(r13)
+
+	lwz	r11, (PACA_EXMC+EX_CCR)(r13)
+	mtcr	r11
+
+	ld	r11, (PACA_EXMC+EX_R3)(r13)
+	mtxer	r11
+
+	ld	r11, (PACA_EXMC+EX_R11)(r13)
+	ld	r13, (PACA_EXMC+EX_R13)(r13)
+
+	RFI
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+
+	/* Register usage at this point:
+	 *
+	 * SPRG0 = reserved
+	 * SPRG1 = guest R13
+	 * SPRG2 = guest CR
+	 * SPRG3 = virt. PACA
+	 * R01   = host R1
+	 * R02   = host R2
+	 * R10   = guest PC
+	 * R11   = guest MSR
+	 * R12   = exit handler id
+	 * R13   = PACA
+	 * PACA.exmc.R9  = guest R1
+	 * PACA.exmc.R10 = guest R10
+	 * PACA.exmc.R11 = guest R11
+	 * PACA.exmc.R12 = guest R12
+	 * PACA.exmc.R13 = guest R2
+	 *
+	 */
+
+	/* Save registers */
+
+	std	r0, (PACA_EXMC+EX_SRR0)(r13)
+	std	r8, (PACA_EXMC+EX_DSISR)(r13)
+	std	r9, (PACA_EXMC+EX_R3)(r13)
+	std	r10, (PACA_EXMC+EX_LR)(r13)
+	std	r11, (PACA_EXMC+EX_DAR)(r13)
+
+	/*
+	 * In order for us to easily get the last instruction,
+	 * we got the #vmexit at, we exploit the fact that the
+	 * virtual layout is still the same here, so we can just
+	 * ld from the guest's PC address
+	 */
+
+	/* We only load the last instruction when it's safe */
+	cmpwi	r12, BOOK3S_INTERRUPT_DATA_STORAGE
+	beq	ld_last_inst
+	cmpwi	r12, BOOK3S_INTERRUPT_PROGRAM
+	beq	ld_last_inst
+
+	b	no_ld_last_inst
+
+ld_last_inst:
+	/* Save off the guest instruction we're at */
+	/*    1) enable paging for data */
+	mfmsr	r0
+	ori	r8, r0, MSR_DR			/* Enable paging for data */
+	mtmsr	r8
+	/*    2) fetch the instruction */
+	lwz	r8, 0(r10)
+	/*    3) disable paging again */
+	mtmsr	r0
+
+no_ld_last_inst:
+
+	/* Don't mess with SLB magic for 32 bit guests */
+	rldicl.	r11, r11, 1, 63
+	beq	slb_do_exit
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		-
+	 * 5  0101		-
+	 * 6  0110		-
+	 * 7  0111		-
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		GUEST 1100
+	 * d  1101		GUEST 1101
+	 * e  1110		GUEST 1110
+	 * f  1111		GUEST 1111
+	 */
+
+	/* Replace 11xx -> 01xx */
+
+	li	r11, 0
+slb_loop_11xx_01xx:
+
+	slbmfee	r10, r11
+	/* esid & 0xc... = 0xc... */
+	rldicl	r0, r10, 2, 62
+	cmpwi	r0, 3
+	bne+	slb_loop_11xx_01xx_skip
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_11xx_01xx_skip
+	slbmfev r9, r11
+	/* r0 = esid & ESID_MASK */
+	rldicr	r0, r10, 0, 35
+	/* r0 |= CLASS_BIT(VSID) */
+	rldimi	r0, r9, 56 - 36, 36
+	/* slbie(r0) */
+	slbie	r0
+	/* r0 = esid & 0x7... */
+	rldicl	r0, r10, 0, 1
+	or	r0, r0, r11
+	/* slbmte(r9, r0) */
+	slbmte	r9, r0
+slb_loop_11xx_01xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_3)
+	cmpwi	r11, 0
+	blt	slb_loop_11xx_01xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		HOST 1100
+	 * 9  1001		HOST 1101
+	 * a  1010		HOST 1110
+	 * b  1011		HOST 1111
+	 *
+	 * c  1100		-
+	 * d  1101		-
+	 * e  1110		-
+	 * f  1111		-
+	 */
+
+
+	/* Replace 10xx -> 11xx */
+
+	li	r11, 0
+slb_loop_10xx_11xx:
+	slbmfee	r10, r11
+	rldicl	r0, r10, 2, 62
+	cmpwi	r0, 2
+	bne+	slb_loop_10xx_11xx_skip
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_10xx_11xx_skip
+	slbmfev	r9, r11
+	/* r0 = esid | 0x4... */
+	lis	r0, 0x4000000000000000@highest
+	rldicr  r0, r0, 32, 31
+	or	r0, r0, r10
+	or	r0, r0, r11
+	slbmte	r9, r0
+slb_loop_10xx_11xx_skip:
+	addi	r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_4)
+	cmpwi	r11, 0
+	blt	slb_loop_10xx_11xx
+
+	/*
+	 * SLB most significant nybble
+	 *
+	 * 0  0000		GUEST 0000
+	 * 1  0001		-
+	 * 2  0010		-
+	 * 3  0011		-
+	 *
+	 * 4  0100		GUEST 1100
+	 * 5  0101		GUEST 1101
+	 * 6  0110		GUEST 1110
+	 * 7  0111		GUEST 1111
+	 *
+	 * 8  1000		-
+	 * 9  1001		-
+	 * a  1010		-
+	 * b  1011		-
+	 *
+	 * c  1100		HOST 1100
+	 * d  1101		HOST 1101
+	 * e  1110		HOST 1110
+	 * f  1111		HOST 1111
+	 */
+
+slb_do_exit:
+
+	/* Restore registers */
+
+	ld	r11, (PACA_EXMC+EX_DAR)(r13)
+	ld	r10, (PACA_EXMC+EX_LR)(r13)
+	ld	r9, (PACA_EXMC+EX_R3)(r13)
+	/* Save last inst */
+	stw	r8, (PACA_EXMC+EX_LR)(r13)
+	/* Restore on */
+	ld	r8, (PACA_EXMC+EX_DSISR)(r13)
+
+	/* RFI into the highmem handler */
+	mfmsr	r0
+	ori	r0, r0, MSR_IR|MSR_DR|MSR_RI	/* Enable paging */
+	mtsrr1	r0
+	ld	r0, PACASAVEDMSR(r13)		/* Highmem handler address */
+	mtsrr0	r0
+
+	ld	r0, (PACA_EXMC+EX_SRR0)(r13)
+
+	RFI
+kvmppc_handler_trampoline_exit_end:
+
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-07-16 13:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
2009-07-08  4:38 ` Benjamin Herrenschmidt
2009-07-08  7:23 ` Alexander Graf
2009-07-08  7:43 ` Benjamin Herrenschmidt
2009-07-16 13:30 ` Alexander Graf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.