* [PATCH 07/23] Add SLB switching code for entry/exit
@ 2009-07-07 14:17 Alexander Graf
2009-07-08 4:38 ` Benjamin Herrenschmidt
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Alexander Graf @ 2009-07-07 14:17 UTC (permalink / raw)
To: kvm-ppc
This is the really low level of guest entry/exit code.
Usually the Linux kernel resides in virtual memory 0xc000000000000000 to
0xffffffffffffffff. These addresses are mapped into every userspace
application.
When going into a 32 bit guest, this is perfectly fine. That one can't
access memory that high anyways.
Going into a 64 bit guest, the guest kernel probably is in the same
virtual memory region as the host, so we need to switch between those two.
During normal entry code we're in those virtual addresses though. So
we need a small wrapper in real memory that switches from host to guest
high SLB state and vice versa.
To store both host and guest state in the SLB, we store guest kernel SLB
entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff).
For details on which entries go where, please see the patch itself.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/970_slb.S | 456 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 456 insertions(+), 0 deletions(-)
create mode 100644 arch/powerpc/kvm/970_slb.S
diff --git a/arch/powerpc/kvm/970_slb.S b/arch/powerpc/kvm/970_slb.S
new file mode 100644
index 0000000..6fd5740
--- /dev/null
+++ b/arch/powerpc/kvm/970_slb.S
@@ -0,0 +1,456 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/* Switch the SLB to the guest's context
+ *
+ * This function switches the SLB's contents to the guest's. This is achieved
+ * by switching Linux's kernel segments to unused segments and the guest's kernel
+ * segments from unused to kernel segments.
+ *
+ * Looking at the most significant nybble, we do the following on enty:
+ *
+ * 0 0000
+ * 1 0001
+ * 2 0010
+ * 3 0011
+ *
+ * 4 0100 -> c 1100
+ * 5 0101 -> d 1101
+ * 6 0110 -> e 1110
+ * 7 0111 -> f 1111
+ *
+ * 8 1000
+ * 9 1001
+ * a 1010
+ * b 1011
+ *
+ * c 1100 -> 8 1000
+ * d 1101 -> 9 1001
+ * e 1110 -> a 1010
+ * f 1111 -> b 1011
+ *
+ *
+ * That way we can run Linux as a guest, even though Linux is occupying the
+ * segments in question as host already.
+ *
+ * Required state:
+ *
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R0 = free
+ * R9 = guest IP
+ * R10 = guest MSR
+ * R11 = free
+ * R12 = free
+ * PACA[PACA_EXMC + EX_R9] = guest R9
+ * PACA[PACA_EXMC + EX_R10] = guest R10
+ * PACA[PACA_EXMC + EX_R11] = guest R11
+ * PACA[PACA_EXMC + EX_R12] = guest R12
+ * PACA[PACA_EXMC + EX_R13] = guest R13
+ * PACA[PACA_EXMC + EX_CCR] = guest CR
+ * PACA[PACA_EXMC + EX_R3] = guest XER
+ * SPRG1 = guest R0
+ */
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+ mtsrr0 r9
+ mtsrr1 r10
+
+ /* Don't mess with SLB magic for 32 bit guests */
+ rldicl. r10, r10, 1, 63
+ beq slb_do_enter
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 -
+ * 9 1001 -
+ * a 1010 -
+ * b 1011 -
+ *
+ * c 1100 HOST 1100
+ * d 1101 HOST 1101
+ * e 1110 HOST 1110
+ * f 1111 HOST 1111
+ */
+
+ /* Replace 11xx -> 10xx */
+
+ /* for (r11 = 0; r11 < slb_entries; r11++) */
+ li r11, 0
+slb_loop_11xx_10xx:
+
+ /* r10 = esid(r11) */
+ slbmfee r10, r11
+ /* r0 = leftmost 2 bits of esid */
+ rldicl r0, r10, 2, 62
+ /* esid & 0xc... = 0xc... */
+ cmpwi r0, 3
+ /* no? skip it then */
+ bne+ slb_loop_11xx_10xx_skip
+ /* Skip invalid entries (V=0) */
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_11xx_10xx_skip
+ /* r9 = VSID */
+ slbmfev r9, r11
+ /* r0 = esid & ESID_MASK */
+ rldicr r0, r10, 0, 35
+ /* r0 |= CLASS_BIT(VSID) */
+ rldic r12, r9, 56 - 36, 36
+ or r0, r0, r12
+ /* slbie(r0) */
+ slbie r0
+ /* r0 = esid & 0xb... */
+ rldicr r0, r10, 2, 62
+ rotrdi r0, r0, 2
+ /* r0 |= r11 */
+ or r0, r0, r11
+ /* slbmte(r9, r0) */
+ slbmte r9, r0
+slb_loop_11xx_10xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_1)
+ cmpwi r11, 0
+ blt slb_loop_11xx_10xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 -
+ * d 1101 -
+ * e 1110 -
+ * f 1111 -
+ */
+
+ /* Replace 01xx -> 11xx */
+
+ lis r12, 0x8000000000000000@highest
+ rldicr r12, r12, 32, 31
+ li r11, 0
+slb_loop_01xx_11xx:
+ slbmfee r10, r11
+ rldicl r0, r10, 2, 62
+ cmpwi r0, 1
+ bne+ slb_loop_01xx_11xx_skip
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_01xx_11xx_skip
+ slbmfev r9, r11
+ /* r0 = esid | 0x8... */
+ or r0, r12, r10
+ or r0, r0, r11
+ slbmte r9, r0
+slb_loop_01xx_11xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_2)
+ cmpwi r11, 0
+ blt slb_loop_01xx_11xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 -
+ * 5 0101 -
+ * 6 0110 -
+ * 7 0111 -
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 GUEST 1100
+ * d 1101 GUEST 1101
+ * e 1110 GUEST 1110
+ * f 1111 GUEST 1111
+ */
+
+slb_do_enter:
+
+ /* Enter guest */
+
+ mfspr r0, SPRN_SPRG1
+
+ ld r9, (PACA_EXMC+EX_R9)(r13)
+ ld r10, (PACA_EXMC+EX_R10)(r13)
+ ld r12, (PACA_EXMC+EX_R12)(r13)
+
+ lwz r11, (PACA_EXMC+EX_CCR)(r13)
+ mtcr r11
+
+ ld r11, (PACA_EXMC+EX_R3)(r13)
+ mtxer r11
+
+ ld r11, (PACA_EXMC+EX_R11)(r13)
+ ld r13, (PACA_EXMC+EX_R13)(r13)
+
+ RFI
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+
+ /* Register usage at this point:
+ *
+ * SPRG0 = reserved
+ * SPRG1 = guest R13
+ * SPRG2 = guest CR
+ * SPRG3 = virt. PACA
+ * R01 = host R1
+ * R02 = host R2
+ * R10 = guest PC
+ * R11 = guest MSR
+ * R12 = exit handler id
+ * R13 = PACA
+ * PACA.exmc.R9 = guest R1
+ * PACA.exmc.R10 = guest R10
+ * PACA.exmc.R11 = guest R11
+ * PACA.exmc.R12 = guest R12
+ * PACA.exmc.R13 = guest R2
+ *
+ */
+
+ /* Save registers */
+
+ std r0, (PACA_EXMC+EX_SRR0)(r13)
+ std r8, (PACA_EXMC+EX_DSISR)(r13)
+ std r9, (PACA_EXMC+EX_R3)(r13)
+ std r10, (PACA_EXMC+EX_LR)(r13)
+ std r11, (PACA_EXMC+EX_DAR)(r13)
+
+ /*
+ * In order for us to easily get the last instruction,
+ * we got the #vmexit at, we exploit the fact that the
+ * virtual layout is still the same here, so we can just
+ * ld from the guest's PC address
+ */
+
+ /* We only load the last instruction when it's safe */
+ cmpwi r12, PPC970_INTERRUPT_DATA_STORAGE
+ beq ld_last_inst
+ cmpwi r12, PPC970_INTERRUPT_PROGRAM
+ beq ld_last_inst
+
+ b no_ld_last_inst
+
+ld_last_inst:
+ /* Save off the guest instruction we're at */
+ /* 1) enable paging for data */
+ mfmsr r0
+ ori r8, r0, MSR_DR /* Enable paging for data */
+ mtmsr r8
+ /* 2) fetch the instruction */
+ lwz r8, 0(r10)
+ /* 3) disable paging again */
+ mtmsr r0
+
+no_ld_last_inst:
+
+ /* Don't mess with SLB magic for 32 bit guests */
+ rldicl. r11, r11, 1, 63
+ beq slb_do_exit
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 -
+ * 5 0101 -
+ * 6 0110 -
+ * 7 0111 -
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 GUEST 1100
+ * d 1101 GUEST 1101
+ * e 1110 GUEST 1110
+ * f 1111 GUEST 1111
+ */
+
+ /* Replace 11xx -> 01xx */
+
+ li r11, 0
+slb_loop_11xx_01xx:
+
+ slbmfee r10, r11
+ /* esid & 0xc... = 0xc... */
+ rldicl r0, r10, 2, 62
+ cmpwi r0, 3
+ bne+ slb_loop_11xx_01xx_skip
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_11xx_01xx_skip
+ slbmfev r9, r11
+ /* r0 = esid & ESID_MASK */
+ rldicr r0, r10, 0, 35
+ /* r0 |= CLASS_BIT(VSID) */
+ rldimi r0, r9, 56 - 36, 36
+ /* slbie(r0) */
+ slbie r0
+ /* r0 = esid & 0x7... */
+ rldicl r0, r10, 0, 1
+ or r0, r0, r11
+ /* slbmte(r9, r0) */
+ slbmte r9, r0
+slb_loop_11xx_01xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_3)
+ cmpwi r11, 0
+ blt slb_loop_11xx_01xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 -
+ * d 1101 -
+ * e 1110 -
+ * f 1111 -
+ */
+
+
+ /* Replace 10xx -> 11xx */
+
+ li r11, 0
+slb_loop_10xx_11xx:
+ slbmfee r10, r11
+ rldicl r0, r10, 2, 62
+ cmpwi r0, 2
+ bne+ slb_loop_10xx_11xx_skip
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_10xx_11xx_skip
+ slbmfev r9, r11
+ /* r0 = esid | 0x4... */
+ lis r0, 0x4000000000000000@highest
+ rldicr r0, r0, 32, 31
+ or r0, r0, r10
+ or r0, r0, r11
+ slbmte r9, r0
+slb_loop_10xx_11xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_4)
+ cmpwi r11, 0
+ blt slb_loop_10xx_11xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 -
+ * 9 1001 -
+ * a 1010 -
+ * b 1011 -
+ *
+ * c 1100 HOST 1100
+ * d 1101 HOST 1101
+ * e 1110 HOST 1110
+ * f 1111 HOST 1111
+ */
+
+slb_do_exit:
+
+ /* Restore registers */
+
+ ld r11, (PACA_EXMC+EX_DAR)(r13)
+ ld r10, (PACA_EXMC+EX_LR)(r13)
+ ld r9, (PACA_EXMC+EX_R3)(r13)
+ /* Save last inst */
+ stw r8, (PACA_EXMC+EX_LR)(r13)
+ /* Restore on */
+ ld r8, (PACA_EXMC+EX_DSISR)(r13)
+
+ /* RFI into the highmem handler */
+ mfmsr r0
+ ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
+ mtsrr1 r0
+ ld r0, PACASAVEDMSR(r13) /* Highmem handler address */
+ mtsrr0 r0
+
+ ld r0, (PACA_EXMC+EX_SRR0)(r13)
+
+ RFI
+kvmppc_handler_trampoline_exit_end:
+
--
1.6.0.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 07/23] Add SLB switching code for entry/exit
2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
@ 2009-07-08 4:38 ` Benjamin Herrenschmidt
2009-07-08 7:23 ` Alexander Graf
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2009-07-08 4:38 UTC (permalink / raw)
To: kvm-ppc
On Tue, 2009-07-07 at 16:17 +0200, Alexander Graf wrote:
> This is the really low level of guest entry/exit code.
>
> Usually the Linux kernel resides in virtual memory 0xc000000000000000 to
> 0xffffffffffffffff. These addresses are mapped into every userspace
> application.
>
> When going into a 32 bit guest, this is perfectly fine. That one can't
> access memory that high anyways.
>
> Going into a 64 bit guest, the guest kernel probably is in the same
> virtual memory region as the host, so we need to switch between those two.
>
> During normal entry code we're in those virtual addresses though. So
> we need a small wrapper in real memory that switches from host to guest
> high SLB state and vice versa.
>
> To store both host and guest state in the SLB, we store guest kernel SLB
> entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff).
>
> For details on which entries go where, please see the patch itself.
Note that we have an unused VSID bit at the moment on 64-bit afaik. We
could probably use that to differenciate guest kernel VSIDs from host
kernel VSIDs. That would avoid having to muck around with the EAs
themselves that much no ?
I'm not sure I understand exactly what you are doing here, we should
discuss this on IRC one of these days I suppose. But you should be able
to just get rid of the host kernel SLBs completely with some care, as
there are some critical code path where taking an exception without
having the SLB entry around for entry 0 and the kernel stack will
blow...
But just blow them off, and when returning to the kernel, just put back
the ones that are needed (aka slb_flush_and_rebolt). You will need to
play carefully with that though, look at the code in slb.c, as the real
pHyp hypervisor that may lie underneath will potentially muck around the
SLBs and will restore them occasionally from the special in-memory
shadows, so you probably want to switch the content of those too.
Of course none of that will work on legacy iSeries or Power3 but I think
we can safely say we don't care :-)
Cheers,
Ben.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 07/23] Add SLB switching code for entry/exit
2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
2009-07-08 4:38 ` Benjamin Herrenschmidt
@ 2009-07-08 7:23 ` Alexander Graf
2009-07-08 7:43 ` Benjamin Herrenschmidt
2009-07-16 13:30 ` Alexander Graf
3 siblings, 0 replies; 5+ messages in thread
From: Alexander Graf @ 2009-07-08 7:23 UTC (permalink / raw)
To: kvm-ppc
On 08.07.2009, at 06:38, Benjamin Herrenschmidt wrote:
> On Tue, 2009-07-07 at 16:17 +0200, Alexander Graf wrote:
>> This is the really low level of guest entry/exit code.
>>
>> Usually the Linux kernel resides in virtual memory
>> 0xc000000000000000 to
>> 0xffffffffffffffff. These addresses are mapped into every userspace
>> application.
>>
>> When going into a 32 bit guest, this is perfectly fine. That one
>> can't
>> access memory that high anyways.
>>
>> Going into a 64 bit guest, the guest kernel probably is in the same
>> virtual memory region as the host, so we need to switch between
>> those two.
>>
>> During normal entry code we're in those virtual addresses though. So
>> we need a small wrapper in real memory that switches from host to
>> guest
>> high SLB state and vice versa.
>>
>> To store both host and guest state in the SLB, we store guest
>> kernel SLB
>> entries in a different range (0x40000000000000000 -
>> 0x7ffffffffffffffff).
>>
>> For details on which entries go where, please see the patch itself.
>
> Note that we have an unused VSID bit at the moment on 64-bit afaik. We
> could probably use that to differenciate guest kernel VSIDs from host
> kernel VSIDs. That would avoid having to muck around with the EAs
> themselves that much no ?
Well, the problem is that we can't have two ESIDs for the same EA in
the SLB. So what I tried was to have guest ESIDs and host ESIDs
(PAGE_OFFSET+) live in the same SLB by removing the most significant
bit of the guest ESID.
>
> I'm not sure I understand exactly what you are doing here, we should
> discuss this on IRC one of these days I suppose. But you should be
> able
> to just get rid of the host kernel SLBs completely with some care, as
> there are some critical code path where taking an exception without
> having the SLB entry around for entry 0 and the kernel stack will
> blow...
Yeah, I've encountered quite a bunch of those :-).
> But just blow them off, and when returning to the kernel, just put
> back
> the ones that are needed (aka slb_flush_and_rebolt). You will need to
> play carefully with that though, look at the code in slb.c, as the
> real
> pHyp hypervisor that may lie underneath will potentially muck around
> the
> SLBs and will restore them occasionally from the special in-memory
> shadows, so you probably want to switch the content of those too.
Yikes. So pHyp restores SLB entries from a shadow? Sounds like I need
to mess with that one too :-(.
I'm not really fond of all the SLB switching code in general. Best
case would probably be to have a host and guest shadow SLB in the RMA
that the real mode code can take to switch the _full_ SLB.
That way we'd also get rid of the CONTEXT_GUEST stuff in the kernel
module, where we are in Linux, but have guest SLB entries active
already.
> Of course none of that will work on legacy iSeries or Power3 but I
> think
> we can safely say we don't care :-)
Any reason it doesn't work on Power3? :-). It definitely does not work
on iSeries, though the code could be made to work there FWIW.
Alex
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 07/23] Add SLB switching code for entry/exit
2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
2009-07-08 4:38 ` Benjamin Herrenschmidt
2009-07-08 7:23 ` Alexander Graf
@ 2009-07-08 7:43 ` Benjamin Herrenschmidt
2009-07-16 13:30 ` Alexander Graf
3 siblings, 0 replies; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2009-07-08 7:43 UTC (permalink / raw)
To: kvm-ppc
On Wed, 2009-07-08 at 09:23 +0200, Alexander Graf wrote:
> Well, the problem is that we can't have two ESIDs for the same EA in
> the SLB. So what I tried was to have guest ESIDs and host ESIDs
> (PAGE_OFFSET+) live in the same SLB by removing the most significant
> bit of the guest ESID.
I'll reply to that later when I understand what you are doing better.
> Yikes. So pHyp restores SLB entries from a shadow? Sounds like I need
> to mess with that one too :-(.
Right.
> I'm not really fond of all the SLB switching code in general. Best
> case would probably be to have a host and guest shadow SLB in the RMA
> that the real mode code can take to switch the _full_ SLB.
>
> That way we'd also get rid of the CONTEXT_GUEST stuff in the kernel
> module, where we are in Linux, but have guest SLB entries active
> already.
Same comment as above :-) I'm sure together we can come up with
something quite optimal but I need to catch up a bit first :-)
> Any reason it doesn't work on Power3? :-). It definitely does not work
> on iSeries, though the code could be made to work there FWIW.
On Power3 you'd have to implement STAB support instead of SLB (get
yourself a PowerPC Book3 pre-2.0 if that's available publically and you
are masochist :-)
On legacy iSeries, it's harder, you don't have access to real mode and
the hypervisor is more invasive. I wouldn't bother with these. Any
recent iSeries (POWER5 or later) uses pHyp and so shouldn't be a
problem.
Cheers
Ben.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 07/23] Add SLB switching code for entry/exit
2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
` (2 preceding siblings ...)
2009-07-08 7:43 ` Benjamin Herrenschmidt
@ 2009-07-16 13:30 ` Alexander Graf
3 siblings, 0 replies; 5+ messages in thread
From: Alexander Graf @ 2009-07-16 13:30 UTC (permalink / raw)
To: kvm-ppc
This is the really low level of guest entry/exit code.
Usually the Linux kernel resides in virtual memory 0xc000000000000000 to
0xffffffffffffffff. These addresses are mapped into every userspace
application.
When going into a 32 bit guest, this is perfectly fine. That one can't
access memory that high anyways.
Going into a 64 bit guest, the guest kernel probably is in the same
virtual memory region as the host, so we need to switch between those two.
During normal entry code we're in those virtual addresses though. So
we need a small wrapper in real memory that switches from host to guest
high SLB state and vice versa.
To store both host and guest state in the SLB, we store guest kernel SLB
entries in a different range (0x40000000000000000 - 0x7ffffffffffffffff).
For details on which entries go where, please see the patch itself.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_64_slb.S | 456 ++++++++++++++++++++++++++++++++++++++
1 files changed, 456 insertions(+), 0 deletions(-)
create mode 100644 arch/powerpc/kvm/book3s_64_slb.S
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
new file mode 100644
index 0000000..c5d2bf3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -0,0 +1,456 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/* Switch the SLB to the guest's context
+ *
+ * This function switches the SLB's contents to the guest's. This is achieved
+ * by switching Linux's kernel segments to unused segments and the guest's kernel
+ * segments from unused to kernel segments.
+ *
+ * Looking at the most significant nybble, we do the following on enty:
+ *
+ * 0 0000
+ * 1 0001
+ * 2 0010
+ * 3 0011
+ *
+ * 4 0100 -> c 1100
+ * 5 0101 -> d 1101
+ * 6 0110 -> e 1110
+ * 7 0111 -> f 1111
+ *
+ * 8 1000
+ * 9 1001
+ * a 1010
+ * b 1011
+ *
+ * c 1100 -> 8 1000
+ * d 1101 -> 9 1001
+ * e 1110 -> a 1010
+ * f 1111 -> b 1011
+ *
+ *
+ * That way we can run Linux as a guest, even though Linux is occupying the
+ * segments in question as host already.
+ *
+ * Required state:
+ *
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R0 = free
+ * R9 = guest IP
+ * R10 = guest MSR
+ * R11 = free
+ * R12 = free
+ * PACA[PACA_EXMC + EX_R9] = guest R9
+ * PACA[PACA_EXMC + EX_R10] = guest R10
+ * PACA[PACA_EXMC + EX_R11] = guest R11
+ * PACA[PACA_EXMC + EX_R12] = guest R12
+ * PACA[PACA_EXMC + EX_R13] = guest R13
+ * PACA[PACA_EXMC + EX_CCR] = guest CR
+ * PACA[PACA_EXMC + EX_R3] = guest XER
+ * SPRG1 = guest R0
+ */
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+ mtsrr0 r9
+ mtsrr1 r10
+
+ /* Don't mess with SLB magic for 32 bit guests */
+ rldicl. r10, r10, 1, 63
+ beq slb_do_enter
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 -
+ * 9 1001 -
+ * a 1010 -
+ * b 1011 -
+ *
+ * c 1100 HOST 1100
+ * d 1101 HOST 1101
+ * e 1110 HOST 1110
+ * f 1111 HOST 1111
+ */
+
+ /* Replace 11xx -> 10xx */
+
+ /* for (r11 = 0; r11 < slb_entries; r11++) */
+ li r11, 0
+slb_loop_11xx_10xx:
+
+ /* r10 = esid(r11) */
+ slbmfee r10, r11
+ /* r0 = leftmost 2 bits of esid */
+ rldicl r0, r10, 2, 62
+ /* esid & 0xc... = 0xc... */
+ cmpwi r0, 3
+ /* no? skip it then */
+ bne+ slb_loop_11xx_10xx_skip
+ /* Skip invalid entries (V=0) */
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_11xx_10xx_skip
+ /* r9 = VSID */
+ slbmfev r9, r11
+ /* r0 = esid & ESID_MASK */
+ rldicr r0, r10, 0, 35
+ /* r0 |= CLASS_BIT(VSID) */
+ rldic r12, r9, 56 - 36, 36
+ or r0, r0, r12
+ /* slbie(r0) */
+ slbie r0
+ /* r0 = esid & 0xb... */
+ rldicr r0, r10, 2, 62
+ rotrdi r0, r0, 2
+ /* r0 |= r11 */
+ or r0, r0, r11
+ /* slbmte(r9, r0) */
+ slbmte r9, r0
+slb_loop_11xx_10xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_1)
+ cmpwi r11, 0
+ blt slb_loop_11xx_10xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 -
+ * d 1101 -
+ * e 1110 -
+ * f 1111 -
+ */
+
+ /* Replace 01xx -> 11xx */
+
+ lis r12, 0x8000000000000000@highest
+ rldicr r12, r12, 32, 31
+ li r11, 0
+slb_loop_01xx_11xx:
+ slbmfee r10, r11
+ rldicl r0, r10, 2, 62
+ cmpwi r0, 1
+ bne+ slb_loop_01xx_11xx_skip
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_01xx_11xx_skip
+ slbmfev r9, r11
+ /* r0 = esid | 0x8... */
+ or r0, r12, r10
+ or r0, r0, r11
+ slbmte r9, r0
+slb_loop_01xx_11xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_2)
+ cmpwi r11, 0
+ blt slb_loop_01xx_11xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 -
+ * 5 0101 -
+ * 6 0110 -
+ * 7 0111 -
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 GUEST 1100
+ * d 1101 GUEST 1101
+ * e 1110 GUEST 1110
+ * f 1111 GUEST 1111
+ */
+
+slb_do_enter:
+
+ /* Enter guest */
+
+ mfspr r0, SPRN_SPRG1
+
+ ld r9, (PACA_EXMC+EX_R9)(r13)
+ ld r10, (PACA_EXMC+EX_R10)(r13)
+ ld r12, (PACA_EXMC+EX_R12)(r13)
+
+ lwz r11, (PACA_EXMC+EX_CCR)(r13)
+ mtcr r11
+
+ ld r11, (PACA_EXMC+EX_R3)(r13)
+ mtxer r11
+
+ ld r11, (PACA_EXMC+EX_R11)(r13)
+ ld r13, (PACA_EXMC+EX_R13)(r13)
+
+ RFI
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+
+ /* Register usage at this point:
+ *
+ * SPRG0 = reserved
+ * SPRG1 = guest R13
+ * SPRG2 = guest CR
+ * SPRG3 = virt. PACA
+ * R01 = host R1
+ * R02 = host R2
+ * R10 = guest PC
+ * R11 = guest MSR
+ * R12 = exit handler id
+ * R13 = PACA
+ * PACA.exmc.R9 = guest R1
+ * PACA.exmc.R10 = guest R10
+ * PACA.exmc.R11 = guest R11
+ * PACA.exmc.R12 = guest R12
+ * PACA.exmc.R13 = guest R2
+ *
+ */
+
+ /* Save registers */
+
+ std r0, (PACA_EXMC+EX_SRR0)(r13)
+ std r8, (PACA_EXMC+EX_DSISR)(r13)
+ std r9, (PACA_EXMC+EX_R3)(r13)
+ std r10, (PACA_EXMC+EX_LR)(r13)
+ std r11, (PACA_EXMC+EX_DAR)(r13)
+
+ /*
+ * In order for us to easily get the last instruction,
+ * we got the #vmexit at, we exploit the fact that the
+ * virtual layout is still the same here, so we can just
+ * ld from the guest's PC address
+ */
+
+ /* We only load the last instruction when it's safe */
+ cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE
+ beq ld_last_inst
+ cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
+ beq ld_last_inst
+
+ b no_ld_last_inst
+
+ld_last_inst:
+ /* Save off the guest instruction we're at */
+ /* 1) enable paging for data */
+ mfmsr r0
+ ori r8, r0, MSR_DR /* Enable paging for data */
+ mtmsr r8
+ /* 2) fetch the instruction */
+ lwz r8, 0(r10)
+ /* 3) disable paging again */
+ mtmsr r0
+
+no_ld_last_inst:
+
+ /* Don't mess with SLB magic for 32 bit guests */
+ rldicl. r11, r11, 1, 63
+ beq slb_do_exit
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 -
+ * 5 0101 -
+ * 6 0110 -
+ * 7 0111 -
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 GUEST 1100
+ * d 1101 GUEST 1101
+ * e 1110 GUEST 1110
+ * f 1111 GUEST 1111
+ */
+
+ /* Replace 11xx -> 01xx */
+
+ li r11, 0
+slb_loop_11xx_01xx:
+
+ slbmfee r10, r11
+ /* esid & 0xc... = 0xc... */
+ rldicl r0, r10, 2, 62
+ cmpwi r0, 3
+ bne+ slb_loop_11xx_01xx_skip
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_11xx_01xx_skip
+ slbmfev r9, r11
+ /* r0 = esid & ESID_MASK */
+ rldicr r0, r10, 0, 35
+ /* r0 |= CLASS_BIT(VSID) */
+ rldimi r0, r9, 56 - 36, 36
+ /* slbie(r0) */
+ slbie r0
+ /* r0 = esid & 0x7... */
+ rldicl r0, r10, 0, 1
+ or r0, r0, r11
+ /* slbmte(r9, r0) */
+ slbmte r9, r0
+slb_loop_11xx_01xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_3)
+ cmpwi r11, 0
+ blt slb_loop_11xx_01xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 HOST 1100
+ * 9 1001 HOST 1101
+ * a 1010 HOST 1110
+ * b 1011 HOST 1111
+ *
+ * c 1100 -
+ * d 1101 -
+ * e 1110 -
+ * f 1111 -
+ */
+
+
+ /* Replace 10xx -> 11xx */
+
+ li r11, 0
+slb_loop_10xx_11xx:
+ slbmfee r10, r11
+ rldicl r0, r10, 2, 62
+ cmpwi r0, 2
+ bne+ slb_loop_10xx_11xx_skip
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_10xx_11xx_skip
+ slbmfev r9, r11
+ /* r0 = esid | 0x4... */
+ lis r0, 0x4000000000000000@highest
+ rldicr r0, r0, 32, 31
+ or r0, r0, r10
+ or r0, r0, r11
+ slbmte r9, r0
+slb_loop_10xx_11xx_skip:
+ addi r11, r11, 1
+_GLOBAL(kvmppc_patch_slb_4)
+ cmpwi r11, 0
+ blt slb_loop_10xx_11xx
+
+ /*
+ * SLB most significant nybble
+ *
+ * 0 0000 GUEST 0000
+ * 1 0001 -
+ * 2 0010 -
+ * 3 0011 -
+ *
+ * 4 0100 GUEST 1100
+ * 5 0101 GUEST 1101
+ * 6 0110 GUEST 1110
+ * 7 0111 GUEST 1111
+ *
+ * 8 1000 -
+ * 9 1001 -
+ * a 1010 -
+ * b 1011 -
+ *
+ * c 1100 HOST 1100
+ * d 1101 HOST 1101
+ * e 1110 HOST 1110
+ * f 1111 HOST 1111
+ */
+
+slb_do_exit:
+
+ /* Restore registers */
+
+ ld r11, (PACA_EXMC+EX_DAR)(r13)
+ ld r10, (PACA_EXMC+EX_LR)(r13)
+ ld r9, (PACA_EXMC+EX_R3)(r13)
+ /* Save last inst */
+ stw r8, (PACA_EXMC+EX_LR)(r13)
+ /* Restore on */
+ ld r8, (PACA_EXMC+EX_DSISR)(r13)
+
+ /* RFI into the highmem handler */
+ mfmsr r0
+ ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
+ mtsrr1 r0
+ ld r0, PACASAVEDMSR(r13) /* Highmem handler address */
+ mtsrr0 r0
+
+ ld r0, (PACA_EXMC+EX_SRR0)(r13)
+
+ RFI
+kvmppc_handler_trampoline_exit_end:
+
--
1.6.0.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-07-16 13:30 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-07 14:17 [PATCH 07/23] Add SLB switching code for entry/exit Alexander Graf
2009-07-08 4:38 ` Benjamin Herrenschmidt
2009-07-08 7:23 ` Alexander Graf
2009-07-08 7:43 ` Benjamin Herrenschmidt
2009-07-16 13:30 ` Alexander Graf
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.