Re: [PATCH 2/7] Nested VMX patch 2 implements vmclear

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Avi Kivity <avi@redhat.com>
To: oritw@il.ibm.com
Cc: kvm@vger.kernel.org, benami@il.ibm.com, abelg@il.ibm.com,
	muli@il.ibm.com, aliguori@us.ibm.com, mdday@us.ibm.com
Subject: Re: [PATCH 2/7] Nested VMX patch 2 implements vmclear
Date: Wed, 16 Dec 2009 15:59:49 +0200	[thread overview]
Message-ID: <4B28E7D5.6080401@redhat.com> (raw)
In-Reply-To: <1260470309-7166-3-git-send-email-oritw@il.ibm.com>

On 12/10/2009 08:38 PM, oritw@il.ibm.com wrote:
> From: Orit Wasserman<oritw@il.ibm.com>
>
> ---
>   arch/x86/kvm/vmx.c |  235 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>   arch/x86/kvm/x86.c |    5 +-
>   arch/x86/kvm/x86.h |    3 +
>   3 files changed, 240 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2726a6c..a7ffd5e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -93,13 +93,39 @@ struct shared_msr_entry {
>   };
>
>   struct __attribute__ ((__packed__)) level_state {
> +	/* Has the level1 guest done vmclear? */
> +	bool vmclear;
> +};
>    

Suggest calling it launch_state and using an enum.  We can have three 
states: uninitialized, clear, and launched.  Not sure if this is really 
required by the spec.

Do we need vmclear in l1_state?

> +struct __attribute__ ((__packed__)) nested_vmcs_page {
> +	u32 revision_id;
> +	u32 abort;
> +	struct level_state l2_state;
> +};
> +
> +struct nested_vmcs_list {
> +	struct list_head list;
>    

'link'

> +	gpa_t vmcs_addr;
> +	struct vmcs *l2_vmcs;
>   };
>
>   struct nested_vmx {
>   	/* Has the level1 guest done vmxon? */
>   	bool vmxon;
> +	/* What is the location of the current vmcs l1 keeps for l2 */
> +	gpa_t current_vmptr;
>   	/* Level 1 state for switching to level 2 and back */
>   	struct level_state *l1_state;
> +	/* list of vmcs for each l2 guest created by l1 */
> +	struct list_head l2_vmcs_list;
> +	/* l2 page corresponding to the current vmcs set by l1 */
> +	struct nested_vmcs_page *current_l2_page;
>   };
>
>   struct vcpu_vmx {
> @@ -156,6 +182,76 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
>   	return container_of(vcpu, struct vcpu_vmx, vcpu);
>   }
>
> +static struct page *nested_get_page(struct kvm_vcpu *vcpu,
> +				    u64 vmcs_addr)
> +{
> +	struct page *vmcs_page = NULL;
> +
> +	down_read(&current->mm->mmap_sem);
> +	vmcs_page = gfn_to_page(vcpu->kvm, vmcs_addr>>  PAGE_SHIFT);
> +	up_read(&current->mm->mmap_sem);
>    

gfn_to_page() doesn't need mmap_sem (and may deadlock if you take it).

> +
> +	if (is_error_page(vmcs_page)) {
> +		printk(KERN_ERR "%s error allocating page 0x%llx\n",
> +		       __func__, vmcs_addr);
> +		kvm_release_page_clean(vmcs_page);
> +		return NULL;
> +	}
> +
> +	return vmcs_page;
> +
> +}
> +
> +static int nested_map_current(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct page *vmcs_page =
> +		nested_get_page(vcpu, vmx->nested.current_vmptr);
> +	struct nested_vmcs_page *mapped_page;
> +
> +	if (vmcs_page == NULL) {
> +		printk(KERN_INFO "%s: failure in nested_get_page\n", __func__);
> +		return 0;
> +	}
> +
> +	if (vmx->nested.current_l2_page) {
> +		printk(KERN_INFO "%s: shadow vmcs already mapped\n", __func__);
> +		WARN_ON(1);
> +		return 0;
> +	}
> +
> +	mapped_page = kmap_atomic(vmcs_page, KM_USER0);
> +
> +	if (!mapped_page) {
> +		printk(KERN_INFO "%s: error in kmap_atomic\n", __func__);
> +		return 0;
> +	}
>    

kmap_atomic() can't fail.

> +
> +	vmx->nested.current_l2_page = mapped_page;
> +
> +	return 1;
> +}
> +
> +static void nested_unmap_current(struct kvm_vcpu *vcpu)
> +{
> +	struct page *page;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (!vmx->nested.current_l2_page) {
> +		printk(KERN_INFO "Shadow vmcs already unmapped\n");
> +		WARN_ON(1);
>    

Use BUG_ON(), since this can't happen unless there's a bug.

> +		return;
> +	}
> +
> +	page = kmap_atomic_to_page(vmx->nested.current_l2_page);
> +
> +	kunmap_atomic(vmx->nested.current_l2_page, KM_USER0);
> +
> +	kvm_release_page_dirty(page);
> +
> +	vmx->nested.current_l2_page = NULL;
> +}
> +
>   static int init_rmode(struct kvm *kvm);
>   static u64 construct_eptp(unsigned long root_hpa);
>
> @@ -1144,6 +1240,35 @@ static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
>   	return 0;
>   }
>
> +static int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, gva_t gva, u64 *gentry)
> +{
> +	int r = 0;
> +	uint size;
> +
> +	*gentry = 0;
> +
> +	if (is_long_mode(vcpu))
> +		size = sizeof(u64);
> +	else
> +		size = sizeof(u32);
>    

I think the gpa is always 64 bit, regardless of the current mode.

> +
> +	r = kvm_read_guest_virt(gva, gentry,
> +				size, vcpu);
> +	if (r) {
> +		printk(KERN_ERR "%s cannot read guest vmcs addr %lx : %d\n",
> +		       __func__, vcpu->arch.regs[VCPU_REGS_RAX], r);
>    

RAX may not be relevant.  Just return, and the user can disassemble the 
instructions and see for themselves.

> +		return r;
> +	}
> +
> +	if (!IS_ALIGNED(*gentry, PAGE_SIZE)) {
> +		printk(KERN_DEBUG "%s addr %llx not aligned\n",
> +		       __func__, *gentry);
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
>
> +/*
> + * Decode the memory address (operand) of a vmx instruction according to Table 23-12/23-11
> + * For additional information regarding offset calculation see 3.7.5
> + */
> +static gva_t get_vmx_mem_address(struct kvm_vcpu *vcpu,
> +				 unsigned long exit_qualification,
> +				 u32 vmx_instruction_info)
> +{
> +	int  scaling        = vmx_instruction_info&  3;             /* bits 0:1 scaling */
> +	int  addr_size      = (vmx_instruction_info>>  7)&  7;      /* bits 7:9 address size, 0=16bit, 1=32bit, 2=64bit */
> +	bool is_reg         = vmx_instruction_info&  (1u<<  10);    /* bit  10  1=register operand, 0= memory */
> +	int  seg_reg        = (vmx_instruction_info>>  15)&  7;     /* bits 15:17 segment register */
> +	int  index_reg      = (vmx_instruction_info>>  18)&  0xf;   /* bits 18:21 index register */
> +	bool index_is_valid = !(vmx_instruction_info&  (1u<<  22)); /* bit  22 index register validity, 0=valid, 1=invalid */
> +	int  base_reg       = (vmx_instruction_info>>  23)&  0xf;   /* bits 23:26 index register */
> +	bool base_is_valid  = !(vmx_instruction_info&  (1u<<  27)); /* bit  27 base register validity, 0=valid, 1=invalid */
> +	gva_t addr;
> +
> +	if (is_reg)
> +		return 0;
>    

Should #UD.

> +
> +	switch (addr_size) {
> +	case 1:
> +		exit_qualification&= 0xffffffff; /* 32 high bits are undefied according to the spec, page 23-7 */
> +		break;
> +	case 2:
> +		break;
> +	default:
> +		return 0;
> +	}
> +
> +	/* Addr = segment_base + offset */
> +	/* offfset = Base + [Index * Scale] + Displacement, see Figure 3-11 */
> +	addr = vmx_get_segment_base(vcpu, seg_reg);
> +	if (base_is_valid)
> +		addr += kvm_register_read(vcpu, base_reg);
> +	if (index_is_valid)
> +		addr += kvm_register_read(vcpu, index_reg)*scaling;
>    

Shouldn't this be a shift?

Wish we had something like that for emulate.c.

> +	addr += exit_qualification; /* exit qualification holds the displacement, spec page 23-7 */
> +
> +	return addr;
> +}
> +
> +static int handle_vmclear(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct level_state *l2_state;
> +	gpa_t guest_vmcs_addr;
> +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +	gva_t vmcs_gva;
> +
> +	if (!nested_vmx_check_permission(vcpu))
> +		return 1;
> +
> +	vmcs_gva = get_vmx_mem_address(vcpu, exit_qualification,
> +				       vmx_instruction_info);
>    

I think you can let get_vmx_mem_address() do the vmread()s, simpler.

> +
> +	if (read_guest_vmcs_gpa(vcpu, vmcs_gva,&guest_vmcs_addr))
> +		return 1;
> +
> +	vmx->nested.current_vmptr = guest_vmcs_addr;
> +	if (!nested_map_current(vcpu))
> +		return 1;
> +
> +	l2_state =&(to_vmx(vcpu)->nested.current_l2_page->l2_state);
> +	l2_state->vmclear = 1;
> +	nested_free_current_vmcs(vcpu);
>    

Why free?  Isn't the purpose of the list to keep those active?

> +
> +	vmx->nested.current_vmptr = -1ull;
> +
> +	nested_unmap_current(vcpu);
> +
> +	skip_emulated_instruction(vcpu);
> +	clear_rflags_cf_zf(vcpu);
> +
> +	return 1;
> +}
> +
>    

As usual, if you can split some of the infrastructure into separate 
patches, it would help review.

-- 
error compiling committee.c: too many arguments to function

next prev parent reply	other threads:[~2009-12-16 13:59 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-10 18:38 Nested VMX support v4 oritw
2009-12-10 18:38 ` [PATCH 1/7] Nested VMX patch 1 implements vmon and vmoff oritw
2009-12-10 18:38   ` [PATCH 2/7] Nested VMX patch 2 implements vmclear oritw
2009-12-10 18:38     ` [PATCH 3/7] Nested VMX patch 3 implements vmptrld and vmptrst oritw
2009-12-10 18:38       ` [PATCH 4/7] Nested VMX patch 4 implements vmread and vmwrite oritw
2009-12-10 18:38         ` [PATCH 5/7] Nested VMX patch 5 Simplify fpu handling oritw
2009-12-10 18:38           ` [PATCH 6/7] Nested VMX patch 6 implements vmlaunch and vmresume oritw
2009-12-10 18:38             ` [PATCH 7/7] Nested VMX patch 7 handling of nested guest exits oritw
2009-12-17 13:46               ` Avi Kivity
2009-12-17 10:10             ` [PATCH 6/7] Nested VMX patch 6 implements vmlaunch and vmresume Avi Kivity
2009-12-17  9:10           ` [PATCH 5/7] Nested VMX patch 5 Simplify fpu handling Avi Kivity
2009-12-16 14:44         ` [PATCH 4/7] Nested VMX patch 4 implements vmread and vmwrite Avi Kivity
2009-12-16 14:32       ` [PATCH 3/7] Nested VMX patch 3 implements vmptrld and vmptrst Avi Kivity
2009-12-16 13:59     ` Avi Kivity [this message]
2009-12-28 14:57     ` [PATCH 2/7] Nested VMX patch 2 implements vmclear Gleb Natapov
2009-12-16 13:34   ` [PATCH 1/7] Nested VMX patch 1 implements vmon and vmoff Avi Kivity
2009-12-20 14:20   ` Gleb Natapov
2009-12-20 14:23     ` Avi Kivity
2009-12-20 14:25       ` Gleb Natapov
2009-12-20 17:08     ` Andi Kleen
2009-12-20 19:04       ` Avi Kivity
2009-12-21 15:52         ` Muli Ben-Yehuda
2009-12-21 16:00           ` Avi Kivity
2009-12-17 13:49 ` Nested VMX support v4 Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B28E7D5.6080401@redhat.com \
    --to=avi@redhat.com \
    --cc=abelg@il.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=benami@il.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=mdday@us.ibm.com \
    --cc=muli@il.ibm.com \
    --cc=oritw@il.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.