All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 06/23] Add 970 highmem asm code
@ 2009-07-07 14:17 Alexander Graf
  2009-07-08  4:30 ` Benjamin Herrenschmidt
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Alexander Graf @ 2009-07-07 14:17 UTC (permalink / raw)
  To: kvm-ppc

This is the of entry / exit code. In order to switch between host and guest
context, we need to switch register state and call the exit code handler on
exit.

This assembly file does exactly that. To finally enter the guest it calls
into 970_slb.S. On exit it gets jumped at from 970_slb.S too.

Add header definition for highmem handler

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h |    1 +
 arch/powerpc/kvm/970_interrupts.S  |  422 ++++++++++++++++++++++++++++++++++++
 2 files changed, 423 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/kvm/970_interrupts.S

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2c6ee34..269ee46 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -39,6 +39,7 @@ enum emulation_result {
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern char kvmppc_handlers_start[];
 extern unsigned long kvmppc_handler_len;
+extern void kvmppc_handler_highmem(void);
 
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
 extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/970_interrupts.S b/arch/powerpc/kvm/970_interrupts.S
new file mode 100644
index 0000000..14f4112
--- /dev/null
+++ b/arch/powerpc/kvm/970_interrupts.S
@@ -0,0 +1,422 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/kvm_970_asm.h>
+
+/*****************************************************************************
+ *                                                                           *
+ *     Guest entry / exit code that is in kernel module memory (highmem)     *
+ *                                                                           *
+ ****************************************************************************/
+
+/* Registers:
+ *  r3: kvm_run pointer
+ *  r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_entry)
+
+kvm_start_entry:
+
+	/* Save host state to the stack */
+	stdu	r1, -HOST_STACK_SIZE(r1)
+	std	r3, HOST_STACK_RUN(r1)
+	std	r4, HOST_STACK_VCPU(r1)
+
+	std	r14, HOST_STACK_R14(r1)
+	std	r15, HOST_STACK_R15(r1)
+	std	r16, HOST_STACK_R16(r1)
+	std	r17, HOST_STACK_R17(r1)
+	std	r18, HOST_STACK_R18(r1)
+	std	r19, HOST_STACK_R19(r1)
+	std	r20, HOST_STACK_R20(r1)
+	std	r21, HOST_STACK_R21(r1)
+	std	r22, HOST_STACK_R22(r1)
+	std	r23, HOST_STACK_R23(r1)
+	std	r24, HOST_STACK_R24(r1)
+	std	r25, HOST_STACK_R25(r1)
+	std	r26, HOST_STACK_R26(r1)
+	std	r27, HOST_STACK_R27(r1)
+	std	r28, HOST_STACK_R28(r1)
+	std	r29, HOST_STACK_R29(r1)
+	std	r30, HOST_STACK_R30(r1)
+	std	r31, HOST_STACK_R31(r1)
+	mflr	r14
+	std	r14, HOST_STACK_LR(r1)
+
+/* XXX optimize non-volatile loading away */
+kvm_start_lightweight:
+
+	DISABLE_INTERRUPTS
+
+	/* Save R1/R2 in the PACA */
+	std	r1, PACAR1(r13)
+	std	r2, (PACA_EXMC+EX_SRR0)(r13)
+	ld	r3, VCPU_HIGHMEM_HANDLER(r4)
+	std	r3, PACASAVEDMSR(r13)
+
+	/* Load non-volatile guest state from the vcpu */
+	ld	r14, VCPU_GPR(r14)(r4)
+	ld	r15, VCPU_GPR(r15)(r4)
+	ld	r16, VCPU_GPR(r16)(r4)
+	ld	r17, VCPU_GPR(r17)(r4)
+	ld	r18, VCPU_GPR(r18)(r4)
+	ld	r19, VCPU_GPR(r19)(r4)
+	ld	r20, VCPU_GPR(r20)(r4)
+	ld	r21, VCPU_GPR(r21)(r4)
+	ld	r22, VCPU_GPR(r22)(r4)
+	ld	r23, VCPU_GPR(r23)(r4)
+	ld	r24, VCPU_GPR(r24)(r4)
+	ld	r25, VCPU_GPR(r25)(r4)
+	ld	r26, VCPU_GPR(r26)(r4)
+	ld	r27, VCPU_GPR(r27)(r4)
+	ld	r28, VCPU_GPR(r28)(r4)
+	ld	r29, VCPU_GPR(r29)(r4)
+	ld	r30, VCPU_GPR(r30)(r4)
+	ld	r31, VCPU_GPR(r31)(r4)
+
+	ld	r9, VCPU_PC(r4)			/* r9 = vcpu->arch.pc */
+	ld	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
+
+	ld	r3, VCPU_TRAMPOLINE_ENTER(r4)
+	mtsrr0	r3
+
+	loadimm	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+	mtsrr1	r3
+
+	/* Load guest state in the respective registers */
+	lwz	r3, VCPU_CR(r4)		/* r3 = vcpu->arch.cr */
+	stw	r3, (PACA_EXMC + EX_CCR)(r13)
+
+	ld	r3, VCPU_CTR(r4)	/* r3 = vcpu->arch.ctr */
+	mtctr	r3			/* CTR = r3 */
+
+	ld	r3, VCPU_LR(r4)		/* r3 = vcpu->arch.lr */
+	mtlr	r3			/* LR = r3 */
+
+	ld	r3, VCPU_XER(r4)	/* r3 = vcpu->arch.xer */
+	std	r3, (PACA_EXMC + EX_R3)(r13)
+
+	/* This sets the Magic value for the trampoline:
+	 *
+	 * PPC64: SPRG3 |= 1
+	 */
+	setmagc	r3
+
+	/* Some guests may need to have dcbz set to 32 byte length.
+	 *
+	 * Usually we ensure that by patching the guest's instructions
+	 * to trap on dcbz and emulate it in the hypervisor.
+	 *
+	 * If we can, we should tell the CPU to use 32 byte dcbz though,
+	 * because that's a lot faster.
+	 */
+
+	ld	r3, VCPU_HFLAGS(r4)
+	rldicl.	r3, r3, 0, 63		/* CR = ((r3 & 1) = 0) */
+	beq	no_dcbz32_on
+
+	mfspr   r3,SPRN_HID5
+	ori     r3, r3, 0x80		/* XXX HID5_dcbz32 = 0x80 */
+	mtspr   SPRN_HID5,r3
+
+no_dcbz32_on:
+	/*	Load guest GPRs */
+
+	ld	r3, VCPU_GPR(r9)(r4)
+	std	r3, (PACA_EXMC + EX_R9)(r13)
+	ld	r3, VCPU_GPR(r10)(r4)
+	std	r3, (PACA_EXMC + EX_R10)(r13)
+	ld	r3, VCPU_GPR(r11)(r4)
+	std	r3, (PACA_EXMC + EX_R11)(r13)
+	ld	r3, VCPU_GPR(r12)(r4)
+	std	r3, (PACA_EXMC + EX_R12)(r13)
+	ld	r3, VCPU_GPR(r13)(r4)
+	std	r3, (PACA_EXMC + EX_R13)(r13)
+
+	ld	r0, VCPU_GPR(r0)(r4)
+	mtspr	SPRN_SPRG1, r0
+
+	ld	r1, VCPU_GPR(r1)(r4)
+	ld	r2, VCPU_GPR(r2)(r4)
+	ld	r3, VCPU_GPR(r3)(r4)
+	ld	r5, VCPU_GPR(r5)(r4)
+	ld	r6, VCPU_GPR(r6)(r4)
+	ld	r7, VCPU_GPR(r7)(r4)
+	ld	r8, VCPU_GPR(r8)(r4)
+	ld	r4, VCPU_GPR(r4)(r4)
+
+	/* Jump to SLB patching handlder and into our guest */
+	RFI
+
+/*
+ * This is the handler in module memory. It gets jumped at from the
+ * lowmem trampoline code, so it's basically the guest exit code.
+ *
+ */
+
+.global kvmppc_handler_highmem
+kvmppc_handler_highmem:
+
+	/* SPRG usage at this point:
+	 *
+	 * SPRG0 = reserved
+	 * SPRG1 = guest R13
+	 * SPRG2 = guest CR
+	 * SPRG3 = virt. PACA
+	 * R01   = host R1
+	 * R02   = host R2
+	 * R10   = guest PC
+	 * R11   = guest MSR
+	 * R12   = exit handler id
+	 * R13   = PACA
+	 * PACA.exgen.R9  = guest R1
+	 * PACA.exgen.R10 = guest R10
+	 * PACA.exgen.R11 = guest R11
+	 * PACA.exgen.R12 = guest R12
+	 * PACA.exgen.R13 = guest R2
+	 * PACA.exgen.LR  = guest instruction
+	 *
+	 */
+
+	std	r3, (PACA_EXMC+EX_R3)(r13)
+
+	/* save the exit id in R3 */
+	mr	r3, r12
+
+	/* R12 = vcpu */
+	ld	r12, HOST_STACK_VCPU(r1)
+
+	/* Now save the guest state */
+
+	std	r0, VCPU_GPR(r0)(r12)
+	std	r4, VCPU_GPR(r4)(r12)
+	std	r5, VCPU_GPR(r5)(r12)
+	std	r6, VCPU_GPR(r6)(r12)
+	std	r7, VCPU_GPR(r7)(r12)
+	std	r8, VCPU_GPR(r8)(r12)
+	std	r9, VCPU_GPR(r9)(r12)
+
+	/* R13 is in SPRG1 */
+	mfspr	r5, SPRN_SPRG1
+	std	r5, VCPU_GPR(r13)(r12)
+
+	/* get registers from PACA */
+	mfpaca	r5, r3, EX_R3, r12
+	mfpaca	r5, r1, EX_R9, r12
+	mfpaca	r5, r10, EX_R10, r12
+	mfpaca	r5, r11, EX_R11, r12
+	mfpaca	r5, r12, EX_R12, r12
+	mfpaca	r5, r2, EX_R13, r12
+
+	lwz	r5, (PACA_EXMC+EX_LR)(r13)
+	stw	r5, VCPU_LAST_INST(r12)
+
+	ld	r5, VCPU_HFLAGS(r12)
+	rldicl.	r5, r5, 0, 63		/* CR = ((r5 & 1) = 0) */
+	beq	no_dcbz32_off
+
+	mfspr   r5,SPRN_HID5
+	rldimi  r5,r5,6,56
+	mtspr   SPRN_HID5,r5
+
+no_dcbz32_off:
+
+	/* XXX maybe skip on lightweight? */
+	std	r14, VCPU_GPR(r14)(r12)
+	std	r15, VCPU_GPR(r15)(r12)
+	std	r16, VCPU_GPR(r16)(r12)
+	std	r17, VCPU_GPR(r17)(r12)
+	std	r18, VCPU_GPR(r18)(r12)
+	std	r19, VCPU_GPR(r19)(r12)
+	std	r20, VCPU_GPR(r20)(r12)
+	std	r21, VCPU_GPR(r21)(r12)
+	std	r22, VCPU_GPR(r22)(r12)
+	std	r23, VCPU_GPR(r23)(r12)
+	std	r24, VCPU_GPR(r24)(r12)
+	std	r25, VCPU_GPR(r25)(r12)
+	std	r26, VCPU_GPR(r26)(r12)
+	std	r27, VCPU_GPR(r27)(r12)
+	std	r28, VCPU_GPR(r28)(r12)
+	std	r29, VCPU_GPR(r29)(r12)
+	std	r30, VCPU_GPR(r30)(r12)
+	std	r31, VCPU_GPR(r31)(r12)
+
+	/* Restore non-volatile host registers */
+	ld	r14, HOST_STACK_R14(r1)
+	ld	r15, HOST_STACK_R15(r1)
+	ld	r16, HOST_STACK_R16(r1)
+	ld	r17, HOST_STACK_R17(r1)
+	ld	r18, HOST_STACK_R18(r1)
+	ld	r19, HOST_STACK_R19(r1)
+	ld	r20, HOST_STACK_R20(r1)
+	ld	r21, HOST_STACK_R21(r1)
+	ld	r22, HOST_STACK_R22(r1)
+	ld	r23, HOST_STACK_R23(r1)
+	ld	r24, HOST_STACK_R24(r1)
+	ld	r25, HOST_STACK_R25(r1)
+	ld	r26, HOST_STACK_R26(r1)
+	ld	r27, HOST_STACK_R27(r1)
+	ld	r28, HOST_STACK_R28(r1)
+	ld	r29, HOST_STACK_R29(r1)
+	ld	r30, HOST_STACK_R30(r1)
+	ld	r31, HOST_STACK_R31(r1)
+
+	/* Save guest PC (R10) */
+	std	r10, VCPU_PC(r12)
+
+	/* Save guest msr (R11) */
+	std	r11, VCPU_SHADOW_MSR(r12)
+
+	/* Save guest CR (SPRG2) */
+	mfspr	r5, SPRN_SPRG2
+	stw	r5, VCPU_CR(r12)
+
+	/* Save guest CTR (in R12) */
+	mfctr	r5
+	std	r5, VCPU_CTR(r12)
+
+	/* Save guest LR */
+	mflr	r5
+	std	r5, VCPU_LR(r12)
+
+	/* Save guest XER */
+	mfxer	r5
+	std	r5, VCPU_XER(r12)
+
+	/* Save guest DAR */
+	mfdar	r5
+	std	r5, VCPU_FAULT_DEAR(r12)
+
+	/* Save guest DSISR */
+	mfdsisr	r5
+	std	r5, VCPU_FAULT_DSISR(r12)
+
+	/* Restore host msr -> SRR1 */
+	ld	r7, VCPU_HOST_MSR(r12)
+	mtsrr1	r7
+
+	/* Restore host IP -> SRR0 */
+	ld	r6, VCPU_HOST_RETIP(r12)
+	mtsrr0	r6
+
+	/* For some interrupts, we need to call the real Linux */
+	/* handler, so it can do work for us. This has to happen */
+	/* as if the interrupt arrived from the kernel though, */
+	/* so let's fake it here where most state is restored. */
+
+	/* Call Linux for hardware interrupts/decrementer */
+	/* r3 = address of interrupt handler (exit reason) */
+
+	cmpwi	r3, PPC970_INTERRUPT_EXTERNAL
+	beq	call_linux_handler
+	cmpwi	r3, PPC970_INTERRUPT_DECREMENTER
+	beq	call_linux_handler
+
+	/* Back to Paged Mode! (goto kvm_return_point) with interrupts enabled */
+	RFI
+
+call_linux_handler:
+
+	/* If we land here we need to jump back to the handler we */
+	/* came from. */
+
+	/* We have a page that we can access from real mode, so let's */
+	/* jump back to that and use it as a trampoline to get back into the */
+	/* interrupt handler! */
+
+	/* Enable soft interrupts again, so the handler acts */
+	li	r5, 1
+	stb	r5, PACASOFTIRQEN(r13)
+
+	/* R3 still contains the exit code, */
+	/* R6 VCPU_HOST_RETIP and */
+	/* R7 VCPU_HOST_MSR */
+
+	mtlr	r3
+
+	ld	r5, VCPU_TRAMPOLINE_LOWMEM(r12)
+	mtsrr0	r5
+	loadimm	r5, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+	mtsrr1	r5
+
+	RFI
+
+.global kvm_return_point
+kvm_return_point:
+
+	/* Jump back to lightweight entry if we're supposed to */
+	/* go back into the guest */
+	mr	r5, r3
+	ld	r3, HOST_STACK_RUN(r1)
+	ld	r4, HOST_STACK_VCPU(r1)
+	bl	KVMPPC_HANDLE_EXIT
+
+#if 0 /* XXX get lightweight exits back */
+	cmpwi	r3, RESUME_GUEST
+	bne	kvm_exit_heavyweight
+
+	/* put VCPU and KVM_RUN back into place and roll again! */
+	ld	r3, HOST_STACK_RUN(r1)
+	ld	r4, HOST_STACK_VCPU(r1)
+	b	kvm_start_lightweight
+
+kvm_exit_heavyweight:
+	/* Restore non-volatile host registers */
+	ld	r14, HOST_STACK_LR(r1)
+	mtlr	r14
+	ld	r14, HOST_STACK_R14(r1)
+	ld	r15, HOST_STACK_R15(r1)
+	ld	r16, HOST_STACK_R16(r1)
+	ld	r17, HOST_STACK_R17(r1)
+	ld	r18, HOST_STACK_R18(r1)
+	ld	r19, HOST_STACK_R19(r1)
+	ld	r20, HOST_STACK_R20(r1)
+	ld	r21, HOST_STACK_R21(r1)
+	ld	r22, HOST_STACK_R22(r1)
+	ld	r23, HOST_STACK_R23(r1)
+	ld	r24, HOST_STACK_R24(r1)
+	ld	r25, HOST_STACK_R25(r1)
+	ld	r26, HOST_STACK_R26(r1)
+	ld	r27, HOST_STACK_R27(r1)
+	ld	r28, HOST_STACK_R28(r1)
+	ld	r29, HOST_STACK_R29(r1)
+	ld	r30, HOST_STACK_R30(r1)
+	ld	r31, HOST_STACK_R31(r1)
+
+	addi    r1, r1, HOST_STACK_SIZE
+#else
+	ld	r4, HOST_STACK_LR(r1)
+	mtlr	r4
+
+	cmpwi	r3, RESUME_GUEST
+	bne	kvm_exit_heavyweight
+
+	ld	r3, HOST_STACK_RUN(r1)
+	ld	r4, HOST_STACK_VCPU(r1)
+
+	addi    r1, r1, HOST_STACK_SIZE
+
+	b	kvm_start_entry
+
+kvm_exit_heavyweight:
+
+	addi    r1, r1, HOST_STACK_SIZE
+#endif
+
+	blr
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 06/23] Add 970 highmem asm code
  2009-07-07 14:17 [PATCH 06/23] Add 970 highmem asm code Alexander Graf
@ 2009-07-08  4:30 ` Benjamin Herrenschmidt
  2009-07-08  7:14 ` Alexander Graf
  2009-07-08  7:37 ` Benjamin Herrenschmidt
  2 siblings, 0 replies; 4+ messages in thread
From: Benjamin Herrenschmidt @ 2009-07-08  4:30 UTC (permalink / raw)
  To: kvm-ppc

On Tue, 2009-07-07 at 16:17 +0200, Alexander Graf wrote:
> This is the of entry / exit code. In order to switch between host and guest
> context, we need to switch register state and call the exit code handler on
> exit.
> 
> This assembly file does exactly that. To finally enter the guest it calls
> into 970_slb.S. On exit it gets jumped at from 970_slb.S too.
> 
> Add header definition for highmem handler
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---

Why "highmem" ? IE, That terminology usually means something completely
different in Linux :-) (Aka, memory beyond the linear mapping which is
a concept that does not exist on ppc64).

I suppose you mean code that runs outside of the RMA ? (AKA Real Memory
Area, which is the memory that can be accessed while in real mode).

I have a few comments, this is in no way an in-depth review, I don't yet
totally see the big picture of your implementation but a few things I
spotted along the way:

>  arch/powerpc/include/asm/kvm_ppc.h |    1 +

 .../...

> +
> +/*****************************************************************************
> + *                                                                           *
> + *     Guest entry / exit code that is in kernel module memory (highmem)     *
> + *                                                                           *
> + ****************************************************************************/
> +
> +/* Registers:
> + *  r3: kvm_run pointer
> + *  r4: vcpu pointer
> + */
> +_GLOBAL(__kvmppc_vcpu_entry)
> +
> +kvm_start_entry:
> +
> +	/* Save host state to the stack */
> +	stdu	r1, -HOST_STACK_SIZE(r1)
> +	std	r3, HOST_STACK_RUN(r1)
> +	std	r4, HOST_STACK_VCPU(r1)
> +
> +	std	r14, HOST_STACK_R14(r1)
> +	std	r15, HOST_STACK_R15(r1)
> +	std	r16, HOST_STACK_R16(r1)
> +	std	r17, HOST_STACK_R17(r1)
> +	std	r18, HOST_STACK_R18(r1)
> +	std	r19, HOST_STACK_R19(r1)
> +	std	r20, HOST_STACK_R20(r1)
> +	std	r21, HOST_STACK_R21(r1)
> +	std	r22, HOST_STACK_R22(r1)
> +	std	r23, HOST_STACK_R23(r1)
> +	std	r24, HOST_STACK_R24(r1)
> +	std	r25, HOST_STACK_R25(r1)
> +	std	r26, HOST_STACK_R26(r1)
> +	std	r27, HOST_STACK_R27(r1)
> +	std	r28, HOST_STACK_R28(r1)
> +	std	r29, HOST_STACK_R29(r1)
> +	std	r30, HOST_STACK_R30(r1)
> +	std	r31, HOST_STACK_R31(r1)
> +	mflr	r14
> +	std	r14, HOST_STACK_LR(r1)

Can we make that look closer to a pt_regs maybe or is that not
worth it ?

> +/* XXX optimize non-volatile loading away */
> +kvm_start_lightweight:
> +
> +	DISABLE_INTERRUPTS
> +
> +	/* Save R1/R2 in the PACA */
> +	std	r1, PACAR1(r13)
> +	std	r2, (PACA_EXMC+EX_SRR0)(r13)
> +	ld	r3, VCPU_HIGHMEM_HANDLER(r4)
> +	std	r3, PACASAVEDMSR(r13)
> +
> +	/* Load non-volatile guest state from the vcpu */
> +	ld	r14, VCPU_GPR(r14)(r4)
> +	ld	r15, VCPU_GPR(r15)(r4)
> +	ld	r16, VCPU_GPR(r16)(r4)
> +	ld	r17, VCPU_GPR(r17)(r4)
> +	ld	r18, VCPU_GPR(r18)(r4)
> +	ld	r19, VCPU_GPR(r19)(r4)
> +	ld	r20, VCPU_GPR(r20)(r4)
> +	ld	r21, VCPU_GPR(r21)(r4)
> +	ld	r22, VCPU_GPR(r22)(r4)
> +	ld	r23, VCPU_GPR(r23)(r4)
> +	ld	r24, VCPU_GPR(r24)(r4)
> +	ld	r25, VCPU_GPR(r25)(r4)
> +	ld	r26, VCPU_GPR(r26)(r4)
> +	ld	r27, VCPU_GPR(r27)(r4)
> +	ld	r28, VCPU_GPR(r28)(r4)
> +	ld	r29, VCPU_GPR(r29)(r4)
> +	ld	r30, VCPU_GPR(r30)(r4)
> +	ld	r31, VCPU_GPR(r31)(r4)
> +
> +	ld	r9, VCPU_PC(r4)			/* r9 = vcpu->arch.pc */
> +	ld	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
> +
> +	ld	r3, VCPU_TRAMPOLINE_ENTER(r4)
> +	mtsrr0	r3
> +
> +	loadimm	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
> +	mtsrr1	r3
> +
> +	/* Load guest state in the respective registers */
> +	lwz	r3, VCPU_CR(r4)		/* r3 = vcpu->arch.cr */
> +	stw	r3, (PACA_EXMC + EX_CCR)(r13)
> +
> +	ld	r3, VCPU_CTR(r4)	/* r3 = vcpu->arch.ctr */
> +	mtctr	r3			/* CTR = r3 */
> +
> +	ld	r3, VCPU_LR(r4)		/* r3 = vcpu->arch.lr */
> +	mtlr	r3			/* LR = r3 */
> +
> +	ld	r3, VCPU_XER(r4)	/* r3 = vcpu->arch.xer */
> +	std	r3, (PACA_EXMC + EX_R3)(r13)
> +
> +	/* This sets the Magic value for the trampoline:
> +	 *
> +	 * PPC64: SPRG3 |= 1
> +	 */
> +	setmagc	r3
> +
> +	/* Some guests may need to have dcbz set to 32 byte length.
> +	 *
> +	 * Usually we ensure that by patching the guest's instructions
> +	 * to trap on dcbz and emulate it in the hypervisor.
> +	 *
> +	 * If we can, we should tell the CPU to use 32 byte dcbz though,
> +	 * because that's a lot faster.
> +	 */
> +
> +	ld	r3, VCPU_HFLAGS(r4)
> +	rldicl.	r3, r3, 0, 63		/* CR = ((r3 & 1) = 0) */
> +	beq	no_dcbz32_on
> +
> +	mfspr   r3,SPRN_HID5
> +	ori     r3, r3, 0x80		/* XXX HID5_dcbz32 = 0x80 */
> +	mtspr   SPRN_HID5,r3
> +
> +no_dcbz32_on:

The whole dcbz stuff could probably be a cpufeature block so it
gets nop'ed out when running on other processors than 970 since
they don't all support that magic dcbz trick. Also, I think HID5
is a HV reserved register thus you won't be able to do that trick
when running yourself with MSR:HV=0, for example when running on
a js2x blade.

> +	/* Save guest DAR */
> +	mfdar	r5
> +	std	r5, VCPU_FAULT_DEAR(r12)

The guest is running with MSR:PR set to 0 or 1 ? If 1, it doesn't have
access to DAR or DSISR so I don't quite see the point of
saving/restoring them here, you can just hand out the register straight
off your shadow when taking the protection faults as the guest tries
to access them. If the guest is running with PR:0 then there is no
protection of the host against the guest which sucks :-)

Or do I miss something ?

> +	/* Save guest DSISR */
> +	mfdsisr	r5
> +	std	r5, VCPU_FAULT_DSISR(r12)
> +
> +	/* Restore host msr -> SRR1 */
> +	ld	r7, VCPU_HOST_MSR(r12)
> +	mtsrr1	r7
> +
> +	/* Restore host IP -> SRR0 */
> +	ld	r6, VCPU_HOST_RETIP(r12)
> +	mtsrr0	r6
> +
> +	/* For some interrupts, we need to call the real Linux */
> +	/* handler, so it can do work for us. This has to happen */
> +	/* as if the interrupt arrived from the kernel though, */
> +	/* so let's fake it here where most state is restored. */
> +
> +	/* Call Linux for hardware interrupts/decrementer */
> +	/* r3 = address of interrupt handler (exit reason) */
> +
> +	cmpwi	r3, PPC970_INTERRUPT_EXTERNAL
> +	beq	call_linux_handler
> +	cmpwi	r3, PPC970_INTERRUPT_DECREMENTER
> +	beq	call_linux_handler
> +
> +	/* Back to Paged Mode! (goto kvm_return_point) with interrupts enabled */
> +	RFI

Ok so I need to understand better the whole model... ie how you get
in/out of the guest etc... I would have thought you wanted to call into
kernel interrupts such as DEC or EE as if coming from userspace
actually... 

> +call_linux_handler:
> +
> +	/* If we land here we need to jump back to the handler we */
> +	/* came from. */
> +
> +	/* We have a page that we can access from real mode, so let's */
> +	/* jump back to that and use it as a trampoline to get back into the */
> +	/* interrupt handler! */
> +
> +	/* Enable soft interrupts again, so the handler acts */
> +	li	r5, 1
> +	stb	r5, PACASOFTIRQEN(r13)

But we aren't supposed to enter the timer or EE with softirq enabled...
BTW we probably also need to record some of that stuff with lockdep
but we can look at that later.

> +	/* R3 still contains the exit code, */
> +	/* R6 VCPU_HOST_RETIP and */
> +	/* R7 VCPU_HOST_MSR */
> +
> +	mtlr	r3
> +
> +	ld	r5, VCPU_TRAMPOLINE_LOWMEM(r12)
> +	mtsrr0	r5
> +	loadimm	r5, MSR_KERNEL & ~(MSR_IR | MSR_DR)
> +	mtsrr1	r5
> +
> +	RFI
> +
> +.global kvm_return_point
> +kvm_return_point:
> +
> +	/* Jump back to lightweight entry if we're supposed to */
> +	/* go back into the guest */
> +	mr	r5, r3
> +	ld	r3, HOST_STACK_RUN(r1)
> +	ld	r4, HOST_STACK_VCPU(r1)
> +	bl	KVMPPC_HANDLE_EXIT
> +
> +#if 0 /* XXX get lightweight exits back */
> +	cmpwi	r3, RESUME_GUEST
> +	bne	kvm_exit_heavyweight
> +
> +	/* put VCPU and KVM_RUN back into place and roll again! */
> +	ld	r3, HOST_STACK_RUN(r1)
> +	ld	r4, HOST_STACK_VCPU(r1)
> +	b	kvm_start_lightweight
> +
> +kvm_exit_heavyweight:
> +	/* Restore non-volatile host registers */
> +	ld	r14, HOST_STACK_LR(r1)
> +	mtlr	r14
> +	ld	r14, HOST_STACK_R14(r1)
> +	ld	r15, HOST_STACK_R15(r1)
> +	ld	r16, HOST_STACK_R16(r1)
> +	ld	r17, HOST_STACK_R17(r1)
> +	ld	r18, HOST_STACK_R18(r1)
> +	ld	r19, HOST_STACK_R19(r1)
> +	ld	r20, HOST_STACK_R20(r1)
> +	ld	r21, HOST_STACK_R21(r1)
> +	ld	r22, HOST_STACK_R22(r1)
> +	ld	r23, HOST_STACK_R23(r1)
> +	ld	r24, HOST_STACK_R24(r1)
> +	ld	r25, HOST_STACK_R25(r1)
> +	ld	r26, HOST_STACK_R26(r1)
> +	ld	r27, HOST_STACK_R27(r1)
> +	ld	r28, HOST_STACK_R28(r1)
> +	ld	r29, HOST_STACK_R29(r1)
> +	ld	r30, HOST_STACK_R30(r1)
> +	ld	r31, HOST_STACK_R31(r1)
> +
> +	addi    r1, r1, HOST_STACK_SIZE
> +#else
> +	ld	r4, HOST_STACK_LR(r1)
> +	mtlr	r4
> +
> +	cmpwi	r3, RESUME_GUEST
> +	bne	kvm_exit_heavyweight
> +
> +	ld	r3, HOST_STACK_RUN(r1)
> +	ld	r4, HOST_STACK_VCPU(r1)
> +
> +	addi    r1, r1, HOST_STACK_SIZE
> +
> +	b	kvm_start_entry
> +
> +kvm_exit_heavyweight:
> +
> +	addi    r1, r1, HOST_STACK_SIZE
> +#endif
> +
> +	blr
> -- 
> 1.6.0.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 06/23] Add 970 highmem asm code
  2009-07-07 14:17 [PATCH 06/23] Add 970 highmem asm code Alexander Graf
  2009-07-08  4:30 ` Benjamin Herrenschmidt
@ 2009-07-08  7:14 ` Alexander Graf
  2009-07-08  7:37 ` Benjamin Herrenschmidt
  2 siblings, 0 replies; 4+ messages in thread
From: Alexander Graf @ 2009-07-08  7:14 UTC (permalink / raw)
  To: kvm-ppc


On 08.07.2009, at 06:30, Benjamin Herrenschmidt wrote:

> On Tue, 2009-07-07 at 16:17 +0200, Alexander Graf wrote:
>> This is the of entry / exit code. In order to switch between host  
>> and guest
>> context, we need to switch register state and call the exit code  
>> handler on
>> exit.
>>
>> This assembly file does exactly that. To finally enter the guest it  
>> calls
>> into 970_slb.S. On exit it gets jumped at from 970_slb.S too.
>>
>> Add header definition for highmem handler
>>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> ---
>
> Why "highmem" ? IE, That terminology usually means something  
> completely
> different in Linux :-) (Aka, memory beyond the linear mapping which is
> a concept that does not exist on ppc64).
>
> I suppose you mean code that runs outside of the RMA ? (AKA Real  
> Memory
> Area, which is the memory that can be accessed while in real mode).

Yes, I'm open for naming convention suggestions :-).

> I have a few comments, this is in no way an in-depth review, I don't  
> yet
> totally see the big picture of your implementation but a few things I
> spotted along the way:
>
>> arch/powerpc/include/asm/kvm_ppc.h |    1 +
>
> .../...

mh?

>
>> +
>> +/ 
>> *****************************************************************************
>> +  
>> *                                                                           *
>> + *     Guest entry / exit code that is in kernel module memory  
>> (highmem)     *
>> +  
>> *                                                                           *
>> +  
>> ****************************************************************************/
>> +
>> +/* Registers:
>> + *  r3: kvm_run pointer
>> + *  r4: vcpu pointer
>> + */
>> +_GLOBAL(__kvmppc_vcpu_entry)
>> +
>> +kvm_start_entry:
>> +
>> +	/* Save host state to the stack */
>> +	stdu	r1, -HOST_STACK_SIZE(r1)
>> +	std	r3, HOST_STACK_RUN(r1)
>> +	std	r4, HOST_STACK_VCPU(r1)
>> +
>> +	std	r14, HOST_STACK_R14(r1)
>> +	std	r15, HOST_STACK_R15(r1)
>> +	std	r16, HOST_STACK_R16(r1)
>> +	std	r17, HOST_STACK_R17(r1)
>> +	std	r18, HOST_STACK_R18(r1)
>> +	std	r19, HOST_STACK_R19(r1)
>> +	std	r20, HOST_STACK_R20(r1)
>> +	std	r21, HOST_STACK_R21(r1)
>> +	std	r22, HOST_STACK_R22(r1)
>> +	std	r23, HOST_STACK_R23(r1)
>> +	std	r24, HOST_STACK_R24(r1)
>> +	std	r25, HOST_STACK_R25(r1)
>> +	std	r26, HOST_STACK_R26(r1)
>> +	std	r27, HOST_STACK_R27(r1)
>> +	std	r28, HOST_STACK_R28(r1)
>> +	std	r29, HOST_STACK_R29(r1)
>> +	std	r30, HOST_STACK_R30(r1)
>> +	std	r31, HOST_STACK_R31(r1)
>> +	mflr	r14
>> +	std	r14, HOST_STACK_LR(r1)
>
> Can we make that look closer to a pt_regs maybe or is that not
> worth it ?

Yeah, that should be definitely possible. While it's not really  
necessary it makes the code smaller, so it's probably worth it ;-).

>
>> +/* XXX optimize non-volatile loading away */
>> +kvm_start_lightweight:
>> +
>> +	DISABLE_INTERRUPTS
>> +
>> +	/* Save R1/R2 in the PACA */
>> +	std	r1, PACAR1(r13)
>> +	std	r2, (PACA_EXMC+EX_SRR0)(r13)
>> +	ld	r3, VCPU_HIGHMEM_HANDLER(r4)
>> +	std	r3, PACASAVEDMSR(r13)
>> +
>> +	/* Load non-volatile guest state from the vcpu */
>> +	ld	r14, VCPU_GPR(r14)(r4)
>> +	ld	r15, VCPU_GPR(r15)(r4)
>> +	ld	r16, VCPU_GPR(r16)(r4)
>> +	ld	r17, VCPU_GPR(r17)(r4)
>> +	ld	r18, VCPU_GPR(r18)(r4)
>> +	ld	r19, VCPU_GPR(r19)(r4)
>> +	ld	r20, VCPU_GPR(r20)(r4)
>> +	ld	r21, VCPU_GPR(r21)(r4)
>> +	ld	r22, VCPU_GPR(r22)(r4)
>> +	ld	r23, VCPU_GPR(r23)(r4)
>> +	ld	r24, VCPU_GPR(r24)(r4)
>> +	ld	r25, VCPU_GPR(r25)(r4)
>> +	ld	r26, VCPU_GPR(r26)(r4)
>> +	ld	r27, VCPU_GPR(r27)(r4)
>> +	ld	r28, VCPU_GPR(r28)(r4)
>> +	ld	r29, VCPU_GPR(r29)(r4)
>> +	ld	r30, VCPU_GPR(r30)(r4)
>> +	ld	r31, VCPU_GPR(r31)(r4)
>> +
>> +	ld	r9, VCPU_PC(r4)			/* r9 = vcpu->arch.pc */
>> +	ld	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
>> +
>> +	ld	r3, VCPU_TRAMPOLINE_ENTER(r4)
>> +	mtsrr0	r3
>> +
>> +	loadimm	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
>> +	mtsrr1	r3
>> +
>> +	/* Load guest state in the respective registers */
>> +	lwz	r3, VCPU_CR(r4)		/* r3 = vcpu->arch.cr */
>> +	stw	r3, (PACA_EXMC + EX_CCR)(r13)
>> +
>> +	ld	r3, VCPU_CTR(r4)	/* r3 = vcpu->arch.ctr */
>> +	mtctr	r3			/* CTR = r3 */
>> +
>> +	ld	r3, VCPU_LR(r4)		/* r3 = vcpu->arch.lr */
>> +	mtlr	r3			/* LR = r3 */
>> +
>> +	ld	r3, VCPU_XER(r4)	/* r3 = vcpu->arch.xer */
>> +	std	r3, (PACA_EXMC + EX_R3)(r13)
>> +
>> +	/* This sets the Magic value for the trampoline:
>> +	 *
>> +	 * PPC64: SPRG3 |= 1
>> +	 */
>> +	setmagc	r3
>> +
>> +	/* Some guests may need to have dcbz set to 32 byte length.
>> +	 *
>> +	 * Usually we ensure that by patching the guest's instructions
>> +	 * to trap on dcbz and emulate it in the hypervisor.
>> +	 *
>> +	 * If we can, we should tell the CPU to use 32 byte dcbz though,
>> +	 * because that's a lot faster.
>> +	 */
>> +
>> +	ld	r3, VCPU_HFLAGS(r4)
>> +	rldicl.	r3, r3, 0, 63		/* CR = ((r3 & 1) = 0) */
>> +	beq	no_dcbz32_on
>> +
>> +	mfspr   r3,SPRN_HID5
>> +	ori     r3, r3, 0x80		/* XXX HID5_dcbz32 = 0x80 */
>> +	mtspr   SPRN_HID5,r3
>> +
>> +no_dcbz32_on:
>
> The whole dcbz stuff could probably be a cpufeature block so it
> gets nop'ed out when running on other processors than 970 since
> they don't all support that magic dcbz trick.

Yeah, I never really understood those cpufeature blocks ...

> Also, I think HID5
> is a HV reserved register thus you won't be able to do that trick
> when running yourself with MSR:HV=0, for example when running on
> a js2x blade.

Yes, it is. That's why the HFLAGS bit is only set when HV=1 :-).

>
>> +	/* Save guest DAR */
>> +	mfdar	r5
>> +	std	r5, VCPU_FAULT_DEAR(r12)
>
> The guest is running with MSR:PR set to 0 or 1 ? If 1, it doesn't have
> access to DAR or DSISR so I don't quite see the point of
> saving/restoring them here, you can just hand out the register  
> straight
> off your shadow when taking the protection faults as the guest tries
> to access them. If the guest is running with PR:0 then there is no
> protection of the host against the guest which sucks :-)
>
> Or do I miss something ?

FAULT_* are basically the registers that store where the guest  
faulted. So if the guest triggers a data store interrupt, the  
corresponding dar gets stored to a vcpu field, so we don't clobber it  
later.

Yes, the guest runs with PR=1 :-).

>
>> +	/* Save guest DSISR */
>> +	mfdsisr	r5
>> +	std	r5, VCPU_FAULT_DSISR(r12)
>> +
>> +	/* Restore host msr -> SRR1 */
>> +	ld	r7, VCPU_HOST_MSR(r12)
>> +	mtsrr1	r7
>> +
>> +	/* Restore host IP -> SRR0 */
>> +	ld	r6, VCPU_HOST_RETIP(r12)
>> +	mtsrr0	r6
>> +
>> +	/* For some interrupts, we need to call the real Linux */
>> +	/* handler, so it can do work for us. This has to happen */
>> +	/* as if the interrupt arrived from the kernel though, */
>> +	/* so let's fake it here where most state is restored. */
>> +
>> +	/* Call Linux for hardware interrupts/decrementer */
>> +	/* r3 = address of interrupt handler (exit reason) */
>> +
>> +	cmpwi	r3, PPC970_INTERRUPT_EXTERNAL
>> +	beq	call_linux_handler
>> +	cmpwi	r3, PPC970_INTERRUPT_DECREMENTER
>> +	beq	call_linux_handler
>> +
>> +	/* Back to Paged Mode! (goto kvm_return_point) with interrupts  
>> enabled */
>> +	RFI
>
> Ok so I need to understand better the whole model... ie how you get
> in/out of the guest etc... I would have thought you wanted to call  
> into
> kernel interrupts such as DEC or EE as if coming from userspace
> actually...

I don't think we can easily have Linux running while we're in the  
guest context. What if the DEC issues the scheduler, which schedules  
off and back again? How would it know where to resume the guest? And  
who'd set the magic bit in SPRG3?

When running a PPC64 guest things get even worse, as we have to switch  
the SLB as well, which is actually the slow part of the entry/exit  
code atm.

Maybe we could work around those problems by integrating things a bit  
more, but I doubt it's necessary. Host DEC and EE interrupts shouldn't  
really hurt performance that much.

What we do here is do a full guest exit cycle and go back to the Linux  
handler we came from, so it can handle the interrupt we intercepted.  
That way we're in normal kernel code from the point of view of every  
other part of Linux.

>
>> +call_linux_handler:
>> +
>> +	/* If we land here we need to jump back to the handler we */
>> +	/* came from. */
>> +
>> +	/* We have a page that we can access from real mode, so let's */
>> +	/* jump back to that and use it as a trampoline to get back into  
>> the */
>> +	/* interrupt handler! */
>> +
>> +	/* Enable soft interrupts again, so the handler acts */
>> +	li	r5, 1
>> +	stb	r5, PACASOFTIRQEN(r13)
>
> But we aren't supposed to enter the timer or EE with softirq  
> enabled...
> BTW we probably also need to record some of that stuff with lockdep
> but we can look at that later.

Maybe I'm calling it wrong? Basically, I want Linux to handle  
interrupts :-). And I did a irq_local_disable before, so this is the  
asm equivalent of _enable, no?

Alex

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 06/23] Add 970 highmem asm code
  2009-07-07 14:17 [PATCH 06/23] Add 970 highmem asm code Alexander Graf
  2009-07-08  4:30 ` Benjamin Herrenschmidt
  2009-07-08  7:14 ` Alexander Graf
@ 2009-07-08  7:37 ` Benjamin Herrenschmidt
  2 siblings, 0 replies; 4+ messages in thread
From: Benjamin Herrenschmidt @ 2009-07-08  7:37 UTC (permalink / raw)
  To: kvm-ppc

On Wed, 2009-07-08 at 09:14 +0200, Alexander Graf wrote:

> >> arch/powerpc/include/asm/kvm_ppc.h |    1 +
> >
> > .../...
> 
> mh?

Just a standard way to say I snipped some of the quote :-)

> Yeah, that should be definitely possible. While it's not really  
> necessary it makes the code smaller, so it's probably worth it ;-).

Could also make debugging easier. In fact you should make the whole
thing look like an interrupt frame (aka pt_regs + STACK_FRAME_OVERHEAD)
and stick in a similar signature than we put in our interrupt frames
(see the exception common macro) so we properly see them for what they
are in xmon etc...

> >> +/* XXX optimize non-volatile loading away */
> >> +kvm_start_lightweight:
> >> +
> >> +	DISABLE_INTERRUPTS

BTW. If this is coming from C code, I'd rather have a hard_irq_disable()
call in the C code before calling into the asm.

> >> +	/* This sets the Magic value for the trampoline:
> >> +	 *
> >> +	 * PPC64: SPRG3 |= 1
> >> +	 */
> >> +	setmagc	r3
> >> +

From the moment we do that, we must not take an exception until we
actually end up in the guest right ? So the code below must not
take an SLB miss.

However that is not guaranteed I think that your VCPU thingy pointed to
by r4 is currently in a bolted SLB entry. On some P5 or later machines,
the SLB is effectively volatile: the underlying pHyp hypervisor can crap
on it, though it will restore bits of it via the shadow SLB data
structure in main memory. However, unless you arrange for the VCPU
structure to be in the first 256M of memory, it won't be covered by that
shadow. You may want to modify the SLB code when using KVM to also
"bolt" the VCPU or delay the flicking of SPRG3 if you can get away with
clobbering a GPR ...

> > The whole dcbz stuff could probably be a cpufeature block so it
> > gets nop'ed out when running on other processors than 970 since
> > they don't all support that magic dcbz trick.
> 
> Yeah, I never really understood those cpufeature blocks ...

Hehehe :-) There's also the MMU features and FW features btw :-) The
base principle is that we stick references to the start and end of the
block into an ELF section along with a mask & value of CPU feature bits
to compare against. At boot time, if it doesn't match we NOP out
everything between start and stop. Recently, Michael Ellerman also
improved on it by allowing to have "alternate", ie two implementations
of the block of code, the first one in by default, the second one in a
separate ELF section, and the second one is copied over the first one
(and padded with NOPs, branches are fixed up too) if the CPU features
don't match, which allows to have "alternate" implementations of perf.
critical asm code (of course, the "default" implementation needs to be
larger or equal in size to the "alternate" one).

> > Also, I think HID5
> > is a HV reserved register thus you won't be able to do that trick
> > when running yourself with MSR:HV=0, for example when running on
> > a js2x blade.
> 
> Yes, it is. That's why the HFLAGS bit is only set when HV=1 :-).

Ok. This is also something that should only be done on a real 970, 970FX
or 970MP processor as others don't have that bit in HID5 afaik.

> FAULT_* are basically the registers that store where the guest  
> faulted. So if the guest triggers a data store interrupt, the  
> corresponding dar gets stored to a vcpu field, so we don't clobber it  
> later.

Ok.

> Yes, the guest runs with PR=1 :-).

Right, that was my understanding too but heh, better being sure :-)

> I don't think we can easily have Linux running while we're in the  
> guest context. What if the DEC issues the scheduler, which schedules  
> off and back again? How would it know where to resume the guest? And  
> who'd set the magic bit in SPRG3?

No, you misunderstood me. But then, I need to better "get" what you are
doing. For example, with MOL, the guest is split in two... the part that
is in the virtual machine, but also the parts that run as a normal linux
process (which do the device emulation etc...). The trick when we take
any exception is we context switch back to make it look like we are
coming from that part, basically from the magic syscall where the
"linux" part of the guest called into the kernel to switch into
emulation.

I have to get more familiar with how KVM does these things though to
provide a more useful feedback.

> When running a PPC64 guest things get even worse, as we have to switch  
> the SLB as well, which is actually the slow part of the entry/exit  
> code atm.

I'm not totally sure we really have to, I need to better understand what
you do with the SLB, and that with my own knowledge of what Linux needs,
we can probably simplify things quite a bit. For example, most of the
Linux host side SLB entries can just be ditched.

> Maybe we could work around those problems by integrating things a bit  
> more, but I doubt it's necessary. Host DEC and EE interrupts shouldn't  
> really hurt performance that much.

Right. Beware that MacOS 9, if you ever want to run that, will trigger
shitloads of guest DEC interrupts tho.

> What we do here is do a full guest exit cycle and go back to the Linux  
> handler we came from, so it can handle the interrupt we intercepted.  
> That way we're in normal kernel code from the point of view of every  
> other part of Linux.

But don't we do that for any interrupt ? I don't quite get why DEC and
EE are "special" here...

What about machine checks, for excample ? Or system reset ? I understand
that you want synchronous interrupts such as FP, altivec, etc... to be
routed back to the guest but DEC and EE aren't the only ones that need
to be reflected back to Linux are they ?

> Maybe I'm calling it wrong? Basically, I want Linux to handle  
> interrupts :-). And I did a irq_local_disable before, so this is the  
> asm equivalent of _enable, no?

Well, no, if you were to do that you should call raw_local_irq_restore()
since we may need to do some "fixups" for example if an interrupt did
happen while we were soft-disabled.

But then, you should not call into the linux EE or decrementer handler
with interrupts enabled in the first place. You should really just make
it look like you took the interrupt from the underlying userland process
in which the guest runs...

Catch me on IRC, I need to better understand your model, and we can sort
that out.

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-07-08  7:37 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-07 14:17 [PATCH 06/23] Add 970 highmem asm code Alexander Graf
2009-07-08  4:30 ` Benjamin Herrenschmidt
2009-07-08  7:14 ` Alexander Graf
2009-07-08  7:37 ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.