All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joerg Roedel <joro@8bytes.org>
To: Alexander Graf <agraf@suse.de>
Cc: kvm@vger.kernel.org, anthony@codemonkey.ws, avi@qumranet.com
Subject: Re: [PATCH 8/9] Add VMEXIT handler and intercepts v2
Date: Wed, 10 Sep 2008 21:12:08 +0200	[thread overview]
Message-ID: <20080910191208.GE27426@8bytes.org> (raw)
In-Reply-To: <1220601084-17763-9-git-send-email-agraf@suse.de>

On Fri, Sep 05, 2008 at 09:51:23AM +0200, Alexander Graf wrote:
> This adds the #VMEXIT intercept, so we return to the level 1 guest
> when something happens in the level 2 guest that should return to
> the level 1 guest.
> 
> v2 implements HIF handling and cleans up exception interception
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  arch/x86/kvm/svm.c |  319 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 319 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c47f039..8318a63 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -74,6 +74,13 @@ module_param(npt, int, S_IRUGO);
>  static void kvm_reput_irq(struct vcpu_svm *svm);
>  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
>  
> +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
> +static int nested_svm_vmexit(struct vcpu_svm *svm);
> +static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
> +			     void *arg2, void *opaque);
> +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> +				      bool has_error_code, u32 error_code);
> +
>  static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>  {
>  	return container_of(vcpu, struct vcpu_svm, vcpu);
> @@ -223,6 +230,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	/* If we are within a nested VM we'd better #VMEXIT and let the
> +	   guest handle the exception */
> +	if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
> +		return;
> +
>  	svm->vmcb->control.event_inj = nr
>  		| SVM_EVTINJ_VALID
>  		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
> @@ -1185,6 +1197,43 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	return 1;
>  }
>  
> +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> +				      bool has_error_code, u32 error_code)
> +{
> +	if (is_nested(svm)) {
> +		svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
> +		svm->vmcb->control.exit_code_hi = 0;
> +		svm->vmcb->control.exit_info_1 = error_code;
> +		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
> +		if (nested_svm_exit_handled(svm, false)) {
> +			nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
> +
> +			nested_svm_vmexit(svm);
> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int nested_svm_intr(struct vcpu_svm *svm)
> +{
> +	if (is_nested(svm)) {
> +		if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))

HF_HIF_MASK?
Do you really need to check GIF if the vcpu is in guest mode? The guest
itself can't influence the GIF so it should always be set.

> +			return 0;
> +
> +		svm->vmcb->control.exit_code = SVM_EXIT_INTR;
> +
> +		if (nested_svm_exit_handled(svm, false)) {
> +			nsvm_printk("VMexit -> INTR\n");
> +			nested_svm_vmexit(svm);
> +			return 1;
> +		}

The VMEXIT is only required if the guest vmcb has set V_INTR_MASKING.
Otherwise we can inject the interrupt directly into the l2 guest. This
is no problem for running KVM-in-KVM because KVM always sets
V_INTR_MASKING. But to keep the implementation close to real hardware
behavior this bit should be checked.

> +	}
> +
> +	return 0;
> +}
> +
>  static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
>  {
>  	struct page *page;
> @@ -1251,6 +1300,257 @@ static int nested_svm_do(struct vcpu_svm *svm,
>  	return retval;
>  }
>  
> +static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
> +					void *arg1,
> +					void *arg2,
> +					void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	bool kvm_overrides = *(bool *)opaque;
> +	u32 exit_code = svm->vmcb->control.exit_code;
> +
> +	if (kvm_overrides) {
> +		switch (exit_code) {
> +		case SVM_EXIT_INTR:
> +		case SVM_EXIT_NMI:
> +			return 0;
> +		/* For now we are always handling NPFs when using them */
> +		case SVM_EXIT_NPF:
> +			if (npt_enabled)
> +				return 0;
> +			break;
> +		/* When we're shadowing, trap PFs */
> +		case SVM_EXIT_EXCP_BASE + PF_VECTOR:
> +			if (!npt_enabled)
> +				return 0;
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +
> +	switch (exit_code) {
> +	case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
> +		u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
> +		if (nested_vmcb->control.intercept_cr_read & cr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
> +		u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
> +		if (nested_vmcb->control.intercept_cr_write & cr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
> +		u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
> +		if (nested_vmcb->control.intercept_dr_read & dr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
> +		u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
> +		if (nested_vmcb->control.intercept_dr_write & dr_bits)
> +			return 1;
> +		break;
> +	}
> +	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
> +		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
> +		if (nested_vmcb->control.intercept_exceptions & excp_bits)
> +			return 1;
> +		break;
> +	}
> +	default: {
> +		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
> +		nsvm_printk("exit code: 0x%x\n", exit_code);
> +		if (nested_vmcb->control.intercept & exit_bits)
> +			return 1;
> +	}
> +	}
> +
> +	return 0;
> +}
> +
> +#ifdef NESTED_KVM_MERGE_IOPM
> +static int nested_svm_exit_handled_io(struct vcpu_svm *svm,
> +				      void *arg1, void *arg2,
> +				      void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	u16 param = (u16)(svm->vmcb->control.exit_info_1);
> +	u16 port = (u16)(svm->vmcb->control.exit_info_1 >> 16);
> +	u16 mask = (1 << ((param >> 4) & 7)) - 1;
> +	u8 *iopm = (u8 *)arg2 + (port / 8);
> +	u16 iopmw = iopm[0] | (iopm[1] << 8);
> +
> +	if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
> +		return 0;
> +	if (iopmw & (mask << (port & 7)))
> +		return 1;
> +
> +	nsvm_printk("nKVM: No IO-Intercept on param=0x%hx port=0x%hx "
> +		    "mask=0x%hx iopm=0x%hx\n", param, port, mask, iopmw);
> +
> +	return 0;
> +}
> +#endif
> +
> +static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
> +				       void *arg1, void *arg2,
> +				       void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	u8 *msrpm = (u8 *)arg2;
> +        u32 t0, t1;
> +	u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
> +	u32 param = svm->vmcb->control.exit_info_1 & 1;
> +
> +	if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
> +		return 0;
> +
> +	switch(msr) {
> +	case 0 ... 0x1fff:
> +		t0 = (msr * 2) % 8;
> +		t1 = msr / 8;
> +		break;
> +	case 0xc0000000 ... 0xc0001fff:
> +		t0 = (8192 + msr - 0xc0000000) * 2;
> +		t1 = (t0 / 8);
> +		t0 %= 8;
> +		break;
> +	case 0xc0010000 ... 0xc0011fff:
> +		t0 = (16384 + msr - 0xc0010000) * 2;
> +		t1 = (t0 / 8);
> +		t0 %= 8;
> +		break;
> +	default:
> +		return 1;
> +		break;
> +	}
> +	if (msrpm[t1] & ((1 << param) << t0))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
> +{
> +	bool k = kvm_override;
> +
> +	switch (svm->vmcb->control.exit_code) {
> +#ifdef NESTED_KVM_MERGE_IOPM
> +	case SVM_EXIT_IOIO:
> +		return nested_svm_do(svm, svm->nested_vmcb,
> +				     svm->nested_vmcb_iopm, NULL,
> +				     nested_svm_exit_handled_io);
> +		break;
> +#endif
> +	case SVM_EXIT_MSR:
> +		return nested_svm_do(svm, svm->nested_vmcb,
> +				     svm->nested_vmcb_msrpm, NULL,
> +				     nested_svm_exit_handled_msr);
> +	default: break;
> +	}
> +
> +	return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
> +			     nested_svm_exit_handled_real);
> +}
> +
> +static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
> +				  void *arg2, void *opaque)
> +{
> +	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
> +	struct vmcb *hsave = (struct vmcb *)arg2;
> +	u64 nested_save[] = { nested_vmcb->save.cr0,
> +			      nested_vmcb->save.cr3,
> +			      nested_vmcb->save.cr4,
> +			      nested_vmcb->save.efer,
> +			      nested_vmcb->control.intercept_cr_read,
> +			      nested_vmcb->control.intercept_cr_write,
> +			      nested_vmcb->control.intercept_dr_read,
> +			      nested_vmcb->control.intercept_dr_write,
> +			      nested_vmcb->control.intercept_exceptions,
> +			      nested_vmcb->control.intercept,
> +			      nested_vmcb->control.msrpm_base_pa,
> +			      nested_vmcb->control.iopm_base_pa,
> +			      nested_vmcb->control.tsc_offset };
> +
> +	/* Give the current vmcb to the guest */
> +	memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
> +	nested_vmcb->save.cr0 = nested_save[0];
> +	if (!npt_enabled)
> +		nested_vmcb->save.cr3 = nested_save[1];
> +	nested_vmcb->save.cr4 = nested_save[2];
> +	nested_vmcb->save.efer = nested_save[3];
> +	nested_vmcb->control.intercept_cr_read = nested_save[4];
> +	nested_vmcb->control.intercept_cr_write = nested_save[5];
> +	nested_vmcb->control.intercept_dr_read = nested_save[6];
> +	nested_vmcb->control.intercept_dr_write = nested_save[7];
> +	nested_vmcb->control.intercept_exceptions = nested_save[8];
> +	nested_vmcb->control.intercept = nested_save[9];
> +	nested_vmcb->control.msrpm_base_pa = nested_save[10];
> +	nested_vmcb->control.iopm_base_pa = nested_save[11];
> +	nested_vmcb->control.tsc_offset = nested_save[12];
> +
> +	if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
> +	    (nested_vmcb->control.int_vector)) {
> +		nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
> +				nested_vmcb->control.int_vector);
> +	}
> +
> +	/* Restore the original control entries */
> +	svm->vmcb->control = hsave->control;
> +
> +	/* Flush the virtual TLB */
> +	force_new_asid(&svm->vcpu);
> +
> +	/* Kill any pending exceptions */
> +	if (svm->vcpu.arch.exception.pending == true)
> +		nsvm_printk("WARNING: Pending Exception\n");
> +	svm->vcpu.arch.exception.pending = false;
> +
> +	/* Restore selected save entries */
> +	svm->vmcb->save.es = hsave->save.es;
> +	svm->vmcb->save.cs = hsave->save.cs;
> +	svm->vmcb->save.ss = hsave->save.ss;
> +	svm->vmcb->save.ds = hsave->save.ds;
> +	svm->vmcb->save.gdtr = hsave->save.gdtr;
> +	svm->vmcb->save.idtr = hsave->save.idtr;
> +	svm->vmcb->save.rflags = hsave->save.rflags;
> +	svm_set_efer(&svm->vcpu, hsave->save.efer);
> +	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
> +	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
> +	if (npt_enabled) {
> +		svm->vmcb->save.cr3 = hsave->save.cr3;
> +		svm->vcpu.arch.cr3 = hsave->save.cr3;
> +	} else {
> +		kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
> +	}
> +	kvm_mmu_reset_context(&svm->vcpu);
> +	kvm_mmu_load(&svm->vcpu);
> +	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
> +	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
> +	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
> +	svm->vmcb->save.dr7 = 0;
> +	svm->vmcb->save.cpl = 0;
> +	svm->vmcb->control.exit_int_info = 0;
> +
> +	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
> +	/* Exit nested SVM mode */
> +	svm->nested_vmcb = 0;
> +
> +	return 0;
> +}
> +
> +static int nested_svm_vmexit(struct vcpu_svm *svm)
> +{
> +	nsvm_printk("VMexit\n");
> +	if (nested_svm_do(svm, svm->nested_vmcb, svm->nested_hsave,
> +			  NULL, nested_svm_vmexit_real))
> +		return 1;
> +
> +	return 0;
> +}
>  
>  static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
>  				  void *arg2, void *opaque)
> @@ -1831,6 +2131,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
>  		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
>  
> +	if (is_nested(svm)) {
> +		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
> +			    exit_code, svm->vmcb->control.exit_info_1,
> +			    svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
> +		if (nested_svm_exit_handled(svm, true)) {
> +			nested_svm_vmexit(svm);
> +			nsvm_printk("-> #VMEXIT\n");
> +			return 1;
> +		}
> +	}
> +
>  	if (npt_enabled) {
>  		int mmu_reload = 0;
>  		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
> @@ -1917,6 +2228,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	nested_svm_intr(svm);
> +
>  	svm_inject_irq(svm, irq);
>  }
>  
> @@ -1962,6 +2275,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
>  	if (!kvm_cpu_has_interrupt(vcpu))
>  		goto out;
>  
> +	if (nested_svm_intr(svm))
> +		goto out;
> +
>  	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
>  		goto out;
>  
> @@ -2014,6 +2330,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  	struct vmcb_control_area *control = &svm->vmcb->control;
>  
> +	if (nested_svm_intr(svm))
> +		return;
> +
>  	svm->vcpu.arch.interrupt_window_open =
>  		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
>  		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
> -- 
> 1.5.6

  parent reply	other threads:[~2008-09-10 19:12 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-05  7:51 [PATCH 0/9] Add support for nested SVM (kernel) v2 Alexander Graf
2008-09-05  7:51 ` [PATCH 1/9] Add CPUID feature flag for SVM v2 Alexander Graf
2008-09-05  7:51   ` [PATCH 2/9] Clean up VINTR setting v2 Alexander Graf
2008-09-05  7:51     ` [PATCH 3/9] Implement GIF, clgi and stgi v2 Alexander Graf
2008-09-05  7:51       ` [PATCH 4/9] Add helper functions for nested SVM v2 Alexander Graf
2008-09-05  7:51         ` [PATCH 5/9] Implement hsave v2 Alexander Graf
2008-09-05  7:51           ` [PATCH 6/9] Add VMLOAD and VMSAVE handlers v2 Alexander Graf
2008-09-05  7:51             ` [PATCH 7/9] Add VMRUN handler v2 Alexander Graf
2008-09-05  7:51               ` [PATCH 8/9] Add VMEXIT handler and intercepts v2 Alexander Graf
2008-09-05  7:51                 ` [PATCH 9/9] Allow setting the SVME bit v2 Alexander Graf
2008-09-10 19:12                 ` Joerg Roedel [this message]
2008-09-15 16:11                   ` [PATCH 8/9] Add VMEXIT handler and intercepts v2 Alexander Graf
2008-09-10 19:06       ` [PATCH 3/9] Implement GIF, clgi and stgi v2 Joerg Roedel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080910191208.GE27426@8bytes.org \
    --to=joro@8bytes.org \
    --cc=agraf@suse.de \
    --cc=anthony@codemonkey.ws \
    --cc=avi@qumranet.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.