public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Joerg Roedel <joro@8bytes.org>
To: Alexander Graf <agraf@suse.de>
Cc: kvm@vger.kernel.org, anthony@codemonkey.ws, avi@qumranet.com
Subject: Re: [PATCH 8/9] Add VMEXIT handler and intercepts v2
Date: Wed, 10 Sep 2008 21:12:08 +0200	[thread overview]
Message-ID: <20080910191208.GE27426@8bytes.org> (raw)
In-Reply-To: <1220601084-17763-9-git-send-email-agraf@suse.de>

On Fri, Sep 05, 2008 at 09:51:23AM +0200, Alexander Graf wrote:
> This adds the #VMEXIT intercept, so we return to the level 1 guest
> when something happens in the level 2 guest that should return to
> the level 1 guest.
> 
> v2 implements HIF handling and cleans up exception interception
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  arch/x86/kvm/svm.c |  319 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 319 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c47f039..8318a63 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -74,6 +74,13 @@ module_param(npt, int, S_IRUGO);
>  static void kvm_reput_irq(struct vcpu_svm *svm);
>  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
>  
> +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
> +static int nested_svm_vmexit(struct vcpu_svm *svm);
> +static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
> +			     void *arg2, void *opaque);
> +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> +				      bool has_error_code, u32 error_code);
> +
>  static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>  {
>  	return container_of(vcpu, struct vcpu_svm, vcpu);
> @@ -223,6 +230,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	/* If we are within a nested VM we'd better #VMEXIT and let the
> +	   guest handle the exception */
> +	if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
> +		return;
> +
>  	svm->vmcb->control.event_inj = nr
>  		| SVM_EVTINJ_VALID
>  		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
> @@ -1185,6 +1197,43 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	return 1;
>  }
>  
> +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> +				      bool has_error_code, u32 error_code)
> +{
> +	if (is_nested(svm)) {
> +		svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
> +		svm->vmcb->control.exit_code_hi = 0;
> +		svm->vmcb->control.exit_info_1 = error_code;
> +		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
> +		if (nested_svm_exit_handled(svm, false)) {
> +			nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
> +
> +			nested_svm_vmexit(svm);
> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int nested_svm_intr(struct vcpu_svm *svm)
> +{
> +	if (is_nested(svm)) {
> +		if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))

HF_HIF_MASK?
Do you really need to check GIF if the vcpu is in guest mode? The guest
itself can't influence the GIF so it should always be set.

> +			return 0;
> +
> +		svm->vmcb->control.exit_code = SVM_EXIT_INTR;
> +
> +		if (nested_svm_exit_handled(svm, false)) {
> +			nsvm_printk("VMexit -> INTR\n");
> +			nested_svm_vmexit(svm);
> +			return 1;
> +		}

The VMEXIT is only required if the guest vmcb has set V_INTR_MASKING.
Otherwise we can inject the interrupt directly into the l2 guest. This
is no problem for running KVM-in-KVM because KVM always sets
V_INTR_MASKING. But to keep the implementation close to real hardware
behavior this bit should be checked.

> +	}
> +
> +	return 0;
> +}
> +
>  static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
>  {
>  	struct page *page;
> @@ -1251,6 +1300,257 @@ static int nested_svm_do(struct vcpu_svm *svm,
>  	return retval;
>  }
>  
> +static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
> +					void *arg1,
> +					void *arg2,
> +					void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	bool kvm_overrides = *(bool *)opaque;
> +	u32 exit_code = svm->vmcb->control.exit_code;
> +
> +	if (kvm_overrides) {
> +		switch (exit_code) {
> +		case SVM_EXIT_INTR:
> +		case SVM_EXIT_NMI:
> +			return 0;
> +		/* For now we are always handling NPFs when using them */
> +		case SVM_EXIT_NPF:
> +			if (npt_enabled)
> +				return 0;
> +			break;
> +		/* When we're shadowing, trap PFs */
> +		case SVM_EXIT_EXCP_BASE + PF_VECTOR:
> +			if (!npt_enabled)
> +				return 0;
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +
> +	switch (exit_code) {
> +	case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
> +		u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
> +		if (nested_vmcb->control.intercept_cr_read & cr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
> +		u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
> +		if (nested_vmcb->control.intercept_cr_write & cr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
> +		u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
> +		if (nested_vmcb->control.intercept_dr_read & dr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
> +		u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
> +		if (nested_vmcb->control.intercept_dr_write & dr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
> +		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
> +		if (nested_vmcb->control.intercept_exceptions & excp_bits)
> +			return 1;
> +		break;
> +	}
> +	default: {
> +		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
> +		nsvm_printk("exit code: 0x%x\n", exit_code);
> +		if (nested_vmcb->control.intercept & exit_bits)
> +			return 1;
> +	}
> +	}
> +
> +	return 0;
> +}
> +
> +#ifdef NESTED_KVM_MERGE_IOPM
> +static int nested_svm_exit_handled_io(struct vcpu_svm *svm,
> +				      void *arg1, void *arg2,
> +				      void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	u16 param = (u16)(svm->vmcb->control.exit_info_1);
> +	u16 port = (u16)(svm->vmcb->control.exit_info_1 >> 16);
> +	u16 mask = (1 << ((param >> 4) & 7)) - 1;
> +	u8 *iopm = (u8 *)arg2 + (port / 8);
> +	u16 iopmw = iopm[0] | (iopm[1] << 8);
> +
> +	if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
> +		return 0;
> +	if (iopmw & (mask << (port & 7)))
> +		return 1;
> +
> +	nsvm_printk("nKVM: No IO-Intercept on param=0x%hx port=0x%hx "
> +		    "mask=0x%hx iopm=0x%hx\n", param, port, mask, iopmw);
> +
> +	return 0;
> +}
> +#endif
> +
> +static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
> +				       void *arg1, void *arg2,
> +				       void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	u8 *msrpm = (u8 *)arg2;
> +        u32 t0, t1;
> +	u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
> +	u32 param = svm->vmcb->control.exit_info_1 & 1;
> +
> +	if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
> +		return 0;
> +
> +	switch(msr) {
> +	case 0 ... 0x1fff:
> +		t0 = (msr * 2) % 8;
> +		t1 = msr / 8;
> +		break;
> +	case 0xc0000000 ... 0xc0001fff:
> +		t0 = (8192 + msr - 0xc0000000) * 2;
> +		t1 = (t0 / 8);
> +		t0 %= 8;
> +		break;
> +	case 0xc0010000 ... 0xc0011fff:
> +		t0 = (16384 + msr - 0xc0010000) * 2;
> +		t1 = (t0 / 8);
> +		t0 %= 8;
> +		break;
> +	default:
> +		return 1;
> +		break;
> +	}
> +	if (msrpm[t1] & ((1 << param) << t0))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
> +{
> +	bool k = kvm_override;
> +
> +	switch (svm->vmcb->control.exit_code) {
> +#ifdef NESTED_KVM_MERGE_IOPM
> +	case SVM_EXIT_IOIO:
> +		return nested_svm_do(svm, svm->nested_vmcb,
> +				     svm->nested_vmcb_iopm, NULL,
> +				     nested_svm_exit_handled_io);
> +		break;
> +#endif
> +	case SVM_EXIT_MSR:
> +		return nested_svm_do(svm, svm->nested_vmcb,
> +				     svm->nested_vmcb_msrpm, NULL,
> +				     nested_svm_exit_handled_msr);
> +	default: break;
> +	}
> +
> +	return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
> +			     nested_svm_exit_handled_real);
> +}
> +
> +static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
> +				  void *arg2, void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	struct vmcb *hsave = (struct vmcb *)arg2;
> +	u64 nested_save[] = { nested_vmcb->save.cr0,
> +			      nested_vmcb->save.cr3,
> +			      nested_vmcb->save.cr4,
> +			      nested_vmcb->save.efer,
> +			      nested_vmcb->control.intercept_cr_read,
> +			      nested_vmcb->control.intercept_cr_write,
> +			      nested_vmcb->control.intercept_dr_read,
> +			      nested_vmcb->control.intercept_dr_write,
> +			      nested_vmcb->control.intercept_exceptions,
> +			      nested_vmcb->control.intercept,
> +			      nested_vmcb->control.msrpm_base_pa,
> +			      nested_vmcb->control.iopm_base_pa,
> +			      nested_vmcb->control.tsc_offset };
> +
> +	/* Give the current vmcb to the guest */
> +	memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
> +	nested_vmcb->save.cr0 = nested_save[0];
> +	if (!npt_enabled)
> +		nested_vmcb->save.cr3 = nested_save[1];
> +	nested_vmcb->save.cr4 = nested_save[2];
> +	nested_vmcb->save.efer = nested_save[3];
> +	nested_vmcb->control.intercept_cr_read = nested_save[4];
> +	nested_vmcb->control.intercept_cr_write = nested_save[5];
> +	nested_vmcb->control.intercept_dr_read = nested_save[6];
> +	nested_vmcb->control.intercept_dr_write = nested_save[7];
> +	nested_vmcb->control.intercept_exceptions = nested_save[8];
> +	nested_vmcb->control.intercept = nested_save[9];
> +	nested_vmcb->control.msrpm_base_pa = nested_save[10];
> +	nested_vmcb->control.iopm_base_pa = nested_save[11];
> +	nested_vmcb->control.tsc_offset = nested_save[12];
> +
> +	if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
> +	    (nested_vmcb->control.int_vector)) {
> +		nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
> +				nested_vmcb->control.int_vector);
> +	}
> +
> +	/* Restore the original control entries */
> +	svm->vmcb->control = hsave->control;
> +
> +	/* Flush the virtual TLB */
> +	force_new_asid(&svm->vcpu);
> +
> +	/* Kill any pending exceptions */
> +	if (svm->vcpu.arch.exception.pending == true)
> +		nsvm_printk("WARNING: Pending Exception\n");
> +	svm->vcpu.arch.exception.pending = false;
> +
> +	/* Restore selected save entries */
> +	svm->vmcb->save.es = hsave->save.es;
> +	svm->vmcb->save.cs = hsave->save.cs;
> +	svm->vmcb->save.ss = hsave->save.ss;
> +	svm->vmcb->save.ds = hsave->save.ds;
> +	svm->vmcb->save.gdtr = hsave->save.gdtr;
> +	svm->vmcb->save.idtr = hsave->save.idtr;
> +	svm->vmcb->save.rflags = hsave->save.rflags;
> +	svm_set_efer(&svm->vcpu, hsave->save.efer);
> +	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
> +	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
> +	if (npt_enabled) {
> +		svm->vmcb->save.cr3 = hsave->save.cr3;
> +		svm->vcpu.arch.cr3 = hsave->save.cr3;
> +	} else {
> +		kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
> +	}
> +	kvm_mmu_reset_context(&svm->vcpu);
> +	kvm_mmu_load(&svm->vcpu);
> +	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
> +	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
> +	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
> +	svm->vmcb->save.dr7 = 0;
> +	svm->vmcb->save.cpl = 0;
> +	svm->vmcb->control.exit_int_info = 0;
> +
> +	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
> +	/* Exit nested SVM mode */
> +	svm->nested_vmcb = 0;
> +
> +	return 0;
> +}
> +
> +static int nested_svm_vmexit(struct vcpu_svm *svm)
> +{
> +	nsvm_printk("VMexit\n");
> +	if (nested_svm_do(svm, svm->nested_vmcb, svm->nested_hsave,
> +			  NULL, nested_svm_vmexit_real))
> +		return 1;
> +
> +	return 0;
> +}
>  
>  static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
>  				  void *arg2, void *opaque)
> @@ -1831,6 +2131,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
>  		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
>  
> +	if (is_nested(svm)) {
> +		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
> +			    exit_code, svm->vmcb->control.exit_info_1,
> +			    svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
> +		if (nested_svm_exit_handled(svm, true)) {
> +			nested_svm_vmexit(svm);
> +			nsvm_printk("-> #VMEXIT\n");
> +			return 1;
> +		}
> +	}
> +
>  	if (npt_enabled) {
>  		int mmu_reload = 0;
>  		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
> @@ -1917,6 +2228,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	nested_svm_intr(svm);
> +
>  	svm_inject_irq(svm, irq);
>  }
>  
> @@ -1962,6 +2275,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
>  	if (!kvm_cpu_has_interrupt(vcpu))
>  		goto out;
>  
> +	if (nested_svm_intr(svm))
> +		goto out;
> +
>  	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
>  		goto out;
>  
> @@ -2014,6 +2330,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  	struct vmcb_control_area *control = &svm->vmcb->control;
>  
> +	if (nested_svm_intr(svm))
> +		return;
> +
>  	svm->vcpu.arch.interrupt_window_open =
>  		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
>  		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
> -- 
> 1.5.6

  parent reply	other threads:[~2008-09-10 19:12 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-05  7:51 [PATCH 0/9] Add support for nested SVM (kernel) v2 Alexander Graf
2008-09-05  7:51 ` [PATCH 1/9] Add CPUID feature flag for SVM v2 Alexander Graf
2008-09-05  7:51   ` [PATCH 2/9] Clean up VINTR setting v2 Alexander Graf
2008-09-05  7:51     ` [PATCH 3/9] Implement GIF, clgi and stgi v2 Alexander Graf
2008-09-05  7:51       ` [PATCH 4/9] Add helper functions for nested SVM v2 Alexander Graf
2008-09-05  7:51         ` [PATCH 5/9] Implement hsave v2 Alexander Graf
2008-09-05  7:51           ` [PATCH 6/9] Add VMLOAD and VMSAVE handlers v2 Alexander Graf
2008-09-05  7:51             ` [PATCH 7/9] Add VMRUN handler v2 Alexander Graf
2008-09-05  7:51               ` [PATCH 8/9] Add VMEXIT handler and intercepts v2 Alexander Graf
2008-09-05  7:51                 ` [PATCH 9/9] Allow setting the SVME bit v2 Alexander Graf
2008-09-10 19:12                 ` Joerg Roedel [this message]
2008-09-15 16:11                   ` [PATCH 8/9] Add VMEXIT handler and intercepts v2 Alexander Graf
2008-09-10 19:06       ` [PATCH 3/9] Implement GIF, clgi and stgi v2 Joerg Roedel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080910191208.GE27426@8bytes.org \
    --to=joro@8bytes.org \
    --cc=agraf@suse.de \
    --cc=anthony@codemonkey.ws \
    --cc=avi@qumranet.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox