Re: [PATCH 2/7] Nested VMX patch 2 implements vmclear

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Gleb Natapov <gleb@redhat.com>
To: oritw@il.ibm.com
Cc: avi@redhat.com, kvm@vger.kernel.org, benami@il.ibm.com,
	abelg@il.ibm.com, muli@il.ibm.com, aliguori@us.ibm.com,
	mdday@us.ibm.com
Subject: Re: [PATCH 2/7] Nested VMX patch 2 implements vmclear
Date: Mon, 28 Dec 2009 16:57:50 +0200	[thread overview]
Message-ID: <20091228145750.GA4257@redhat.com> (raw)
In-Reply-To: <1260470309-7166-3-git-send-email-oritw@il.ibm.com>

On Thu, Dec 10, 2009 at 08:38:24PM +0200, oritw@il.ibm.com wrote:
> From: Orit Wasserman <oritw@il.ibm.com>
> 
> ---
>  arch/x86/kvm/vmx.c |  235 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/x86.c |    5 +-
>  arch/x86/kvm/x86.h |    3 +
>  3 files changed, 240 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2726a6c..a7ffd5e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -93,13 +93,39 @@ struct shared_msr_entry {
>  };
>  
>  struct __attribute__ ((__packed__)) level_state {
> +	/* Has the level1 guest done vmclear? */
> +	bool vmclear;
> +};
> +
> +/*
> + * This structure is mapped to guest memory.
> + * It is packed in order to preseve the binary content
> + * after live migration.
> + * If there are changed in the content or layout the revision_id must be updated.
> + */
> +struct __attribute__ ((__packed__)) nested_vmcs_page {
> +	u32 revision_id;
> +	u32 abort;
> +	struct level_state l2_state;
> +};
> +
> +struct nested_vmcs_list {
> +	struct list_head list;
> +	gpa_t vmcs_addr;
> +	struct vmcs *l2_vmcs;
>  };
>  
>  struct nested_vmx {
>  	/* Has the level1 guest done vmxon? */
>  	bool vmxon;
> +	/* What is the location of the current vmcs l1 keeps for l2 */
> +	gpa_t current_vmptr;
>  	/* Level 1 state for switching to level 2 and back */
>  	struct level_state *l1_state;
> +	/* list of vmcs for each l2 guest created by l1 */
> +	struct list_head l2_vmcs_list;
> +	/* l2 page corresponding to the current vmcs set by l1 */
> +	struct nested_vmcs_page *current_l2_page;
>  };
>  
>  struct vcpu_vmx {
> @@ -156,6 +182,76 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
>  	return container_of(vcpu, struct vcpu_vmx, vcpu);
>  }
>  
> +static struct page *nested_get_page(struct kvm_vcpu *vcpu,
> +				    u64 vmcs_addr)
> +{
> +	struct page *vmcs_page = NULL;
> +
> +	down_read(&current->mm->mmap_sem);
> +	vmcs_page = gfn_to_page(vcpu->kvm, vmcs_addr >> PAGE_SHIFT);
> +	up_read(&current->mm->mmap_sem);
> +
> +	if (is_error_page(vmcs_page)) {
> +		printk(KERN_ERR "%s error allocating page 0x%llx\n",
> +		       __func__, vmcs_addr);
> +		kvm_release_page_clean(vmcs_page);
> +		return NULL;
> +	}
> +
> +	return vmcs_page;
> +
> +}
> +
> +static int nested_map_current(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct page *vmcs_page =
> +		nested_get_page(vcpu, vmx->nested.current_vmptr);
> +	struct nested_vmcs_page *mapped_page;
> +
> +	if (vmcs_page == NULL) {
> +		printk(KERN_INFO "%s: failure in nested_get_page\n", __func__);
> +		return 0;
> +	}
> +
> +	if (vmx->nested.current_l2_page) {
> +		printk(KERN_INFO "%s: shadow vmcs already mapped\n", __func__);
> +		WARN_ON(1);
> +		return 0;
> +	}
> +
> +	mapped_page = kmap_atomic(vmcs_page, KM_USER0);
> +
> +	if (!mapped_page) {
> +		printk(KERN_INFO "%s: error in kmap_atomic\n", __func__);
> +		return 0;
> +	}
> +
> +	vmx->nested.current_l2_page = mapped_page;
> +
> +	return 1;
> +}
> +
> +static void nested_unmap_current(struct kvm_vcpu *vcpu)
> +{
> +	struct page *page;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (!vmx->nested.current_l2_page) {
> +		printk(KERN_INFO "Shadow vmcs already unmapped\n");
> +		WARN_ON(1);
> +		return;
> +	}
> +
> +	page = kmap_atomic_to_page(vmx->nested.current_l2_page);
> +
> +	kunmap_atomic(vmx->nested.current_l2_page, KM_USER0);
> +
> +	kvm_release_page_dirty(page);
> +
> +	vmx->nested.current_l2_page = NULL;
> +}
> +
>  static int init_rmode(struct kvm *kvm);
>  static u64 construct_eptp(unsigned long root_hpa);
>  
> @@ -1144,6 +1240,35 @@ static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
>  	return 0;
>  }
>  
> +static int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, gva_t gva, u64 *gentry)
> +{
> +	int r = 0;
> +	uint size;
> +
> +	*gentry = 0;
> +
> +	if (is_long_mode(vcpu))
> +		size = sizeof(u64);
> +	else
> +		size = sizeof(u32);
> +
> +	r = kvm_read_guest_virt(gva, gentry,
> +				size, vcpu);
> +	if (r) {
> +		printk(KERN_ERR "%s cannot read guest vmcs addr %lx : %d\n",
> +		       __func__, vcpu->arch.regs[VCPU_REGS_RAX], r);
> +		return r;
> +	}
> +
> +	if (!IS_ALIGNED(*gentry, PAGE_SIZE)) {
> +		printk(KERN_DEBUG "%s addr %llx not aligned\n",
> +		       __func__, *gentry);
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Writes msr value into into the appropriate "register".
>   * Returns 0 on success, non-0 otherwise.
> @@ -1316,6 +1441,7 @@ static int create_l1_state(struct kvm_vcpu *vcpu)
>  	} else
>  		return 0;
>  
> +	INIT_LIST_HEAD(&(vmx->nested.l2_vmcs_list));
>  	return 0;
>  }
>  
> @@ -1488,15 +1614,35 @@ static void free_vmcs(struct vmcs *vmcs)
>  	free_pages((unsigned long)vmcs, vmcs_config.order);
>  }
>  
> +static void nested_free_current_vmcs(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct nested_vmcs_list *list_item, *n;
> +
> +	list_for_each_entry_safe(list_item, n, &vmx->nested.l2_vmcs_list, list)
> +		if (list_item->vmcs_addr == vmx->nested.current_vmptr) {
> +			free_vmcs(list_item->l2_vmcs);
> +			list_del(&(list_item->list));
> +			return;
> +		}
> +}
> +
>  static void free_l1_state(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct nested_vmcs_list *list_item, *n;
>  
>  	if (!vmx->nested.l1_state)
>  		return;
>  
>  	kfree(vmx->nested.l1_state);
>  	vmx->nested.l1_state = NULL;
> +
> +	list_for_each_entry_safe(list_item, n, &vmx->nested.l2_vmcs_list,
> +				 list) {
> +		free_vmcs(list_item->l2_vmcs);
> +		list_del(&(list_item->list));
> +	}
>  }
>  
>  
> @@ -3352,6 +3498,93 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu)
>  	return 1;
>  }
>  
> +static void clear_rflags_cf_zf(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long rflags;
> +	rflags = vmx_get_rflags(vcpu);
> +	rflags &= ~(X86_EFLAGS_CF | X86_EFLAGS_ZF);
> +	vmx_set_rflags(vcpu, rflags);
> +}
> +
> +/*
> + * Decode the memory address (operand) of a vmx instruction according to Table 23-12/23-11
> + * For additional information regarding offset calculation see 3.7.5
> + */
> +static gva_t get_vmx_mem_address(struct kvm_vcpu *vcpu,
> +				 unsigned long exit_qualification,
> +				 u32 vmx_instruction_info)
> +{
> +	int  scaling        = vmx_instruction_info & 3;             /* bits 0:1 scaling */
> +	int  addr_size      = (vmx_instruction_info >> 7) & 7;      /* bits 7:9 address size, 0=16bit, 1=32bit, 2=64bit */
> +	bool is_reg         = vmx_instruction_info & (1u << 10);    /* bit  10  1=register operand, 0= memory */
> +	int  seg_reg        = (vmx_instruction_info >> 15) & 7;     /* bits 15:17 segment register */
> +	int  index_reg      = (vmx_instruction_info >> 18) & 0xf;   /* bits 18:21 index register */
> +	bool index_is_valid = !(vmx_instruction_info & (1u << 22)); /* bit  22 index register validity, 0=valid, 1=invalid */
> +	int  base_reg       = (vmx_instruction_info >> 23) & 0xf;   /* bits 23:26 index register */
> +	bool base_is_valid  = !(vmx_instruction_info & (1u << 27)); /* bit  27 base register validity, 0=valid, 1=invalid */
> +	gva_t addr;
> +
> +	if (is_reg)
> +		return 0;
> +
> +	switch (addr_size) {
> +	case 1:
> +		exit_qualification &= 0xffffffff; /* 32 high bits are undefied according to the spec, page 23-7 */
> +		break;
> +	case 2:
> +		break;
> +	default:
> +		return 0;
> +	}
> +
> +	/* Addr = segment_base + offset */
> +	/* offfset = Base + [Index * Scale] + Displacement, see Figure 3-11 */
> +	addr = vmx_get_segment_base(vcpu, seg_reg);
> +	if (base_is_valid)
> +		addr += kvm_register_read(vcpu, base_reg);
> +	if (index_is_valid)
> +		addr += kvm_register_read(vcpu, index_reg)*scaling;
> +	addr += exit_qualification; /* exit qualification holds the displacement, spec page 23-7 */
> +
> +	return addr;
> +}
> +
> +static int handle_vmclear(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct level_state *l2_state;
> +	gpa_t guest_vmcs_addr;
> +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +	gva_t vmcs_gva;
> +
> +	if (!nested_vmx_check_permission(vcpu))
> +		return 1;
> +
> +	vmcs_gva = get_vmx_mem_address(vcpu, exit_qualification,
> +				       vmx_instruction_info);
> +
> +	if (read_guest_vmcs_gpa(vcpu, vmcs_gva, &guest_vmcs_addr))
> +		return 1;
> +
Should check that vmcs address is 4K aligned and given address is not equal
to vmxon pointer.

> +	vmx->nested.current_vmptr = guest_vmcs_addr;
vmclear doesn't change current vmcs pointer.

> +	if (!nested_map_current(vcpu))
> +		return 1;
> +
> +	l2_state = &(to_vmx(vcpu)->nested.current_l2_page->l2_state);
> +	l2_state->vmclear = 1;
> +	nested_free_current_vmcs(vcpu);
> +
> +	vmx->nested.current_vmptr = -1ull;
> +
vmclear reset current vmcs pointer to -1 only if it was called with
current vmcs pointer as an argument.

> +	nested_unmap_current(vcpu);
> +
> +	skip_emulated_instruction(vcpu);
> +	clear_rflags_cf_zf(vcpu);
> +
> +	return 1;
> +}
> +
>  static int handle_vmoff(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -3695,7 +3928,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>  	[EXIT_REASON_HLT]                     = handle_halt,
>  	[EXIT_REASON_INVLPG]		      = handle_invlpg,
>  	[EXIT_REASON_VMCALL]                  = handle_vmcall,
> -	[EXIT_REASON_VMCLEAR]	              = handle_vmx_insn,
> +	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
>  	[EXIT_REASON_VMLAUNCH]                = handle_vmx_insn,
>  	[EXIT_REASON_VMPTRLD]                 = handle_vmx_insn,
>  	[EXIT_REASON_VMPTRST]                 = handle_vmx_insn,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b698952..e5acf22 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2773,8 +2773,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
>  	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
>  }
>  
> -static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
> -			       struct kvm_vcpu *vcpu)
> +int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
> +			struct kvm_vcpu *vcpu)
>  {
>  	void *data = val;
>  	int r = X86EMUL_CONTINUE;
> @@ -2802,6 +2802,7 @@ static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
>  out:
>  	return r;
>  }
> +EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
>  
>  static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
>  				struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 57204cb..2d7b2dc 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -35,6 +35,9 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
>  struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
>                                               u32 function, u32 index);
>  
> +int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
> +			struct kvm_vcpu *vcpu);
> +
>  extern int nested;
>  
>  #endif
> -- 
> 1.6.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

next prev parent reply	other threads:[~2009-12-28 14:57 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-10 18:38 Nested VMX support v4 oritw
2009-12-10 18:38 ` [PATCH 1/7] Nested VMX patch 1 implements vmon and vmoff oritw
2009-12-10 18:38   ` [PATCH 2/7] Nested VMX patch 2 implements vmclear oritw
2009-12-10 18:38     ` [PATCH 3/7] Nested VMX patch 3 implements vmptrld and vmptrst oritw
2009-12-10 18:38       ` [PATCH 4/7] Nested VMX patch 4 implements vmread and vmwrite oritw
2009-12-10 18:38         ` [PATCH 5/7] Nested VMX patch 5 Simplify fpu handling oritw
2009-12-10 18:38           ` [PATCH 6/7] Nested VMX patch 6 implements vmlaunch and vmresume oritw
2009-12-10 18:38             ` [PATCH 7/7] Nested VMX patch 7 handling of nested guest exits oritw
2009-12-17 13:46               ` Avi Kivity
2009-12-17 10:10             ` [PATCH 6/7] Nested VMX patch 6 implements vmlaunch and vmresume Avi Kivity
2009-12-17  9:10           ` [PATCH 5/7] Nested VMX patch 5 Simplify fpu handling Avi Kivity
2009-12-16 14:44         ` [PATCH 4/7] Nested VMX patch 4 implements vmread and vmwrite Avi Kivity
2009-12-16 14:32       ` [PATCH 3/7] Nested VMX patch 3 implements vmptrld and vmptrst Avi Kivity
2009-12-16 13:59     ` [PATCH 2/7] Nested VMX patch 2 implements vmclear Avi Kivity
2009-12-28 14:57     ` Gleb Natapov [this message]
2009-12-16 13:34   ` [PATCH 1/7] Nested VMX patch 1 implements vmon and vmoff Avi Kivity
2009-12-20 14:20   ` Gleb Natapov
2009-12-20 14:23     ` Avi Kivity
2009-12-20 14:25       ` Gleb Natapov
2009-12-20 17:08     ` Andi Kleen
2009-12-20 19:04       ` Avi Kivity
2009-12-21 15:52         ` Muli Ben-Yehuda
2009-12-21 16:00           ` Avi Kivity
2009-12-17 13:49 ` Nested VMX support v4 Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091228145750.GA4257@redhat.com \
    --to=gleb@redhat.com \
    --cc=abelg@il.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=avi@redhat.com \
    --cc=benami@il.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=mdday@us.ibm.com \
    --cc=muli@il.ibm.com \
    --cc=oritw@il.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.