Re: [PATCH v4 5/5] KVM: x86: improve reexecute_instruction

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Marcelo Tosatti <mtosatti@redhat.com>
To: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Gleb Natapov <gleb@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org>
Subject: Re: [PATCH v4 5/5] KVM: x86: improve reexecute_instruction
Date: Mon, 7 Jan 2013 18:46:20 -0200	[thread overview]
Message-ID: <20130107204620.GA26134@amt.cnet> (raw)
In-Reply-To: <50E7E165.6000107@linux.vnet.ibm.com>

On Sat, Jan 05, 2013 at 04:16:37PM +0800, Xiao Guangrong wrote:
> On 01/05/2013 06:44 AM, Marcelo Tosatti wrote:
> 
> >> index b0a3678..44c6992 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -4756,15 +4756,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
> >>  static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
> >>  {
> >>  	gpa_t gpa = cr2;
> >> +	gfn_t gfn;
> >>  	pfn_t pfn;
> >> -	unsigned int indirect_shadow_pages;
> >> -
> >> -	spin_lock(&vcpu->kvm->mmu_lock);
> >> -	indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> >> -	spin_unlock(&vcpu->kvm->mmu_lock);
> >> -
> >> -	if (!indirect_shadow_pages)
> >> -		return false;
> > 
> > This renders the previous patch obsolete, pretty much (please fold).
> 
> Will try.
> 
> > 
> >>  	if (!vcpu->arch.mmu.direct_map) {
> >>  		/*
> >> @@ -4781,13 +4774,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
> >>  			return true;
> >>  	}
> >>
> >> -	/*
> >> -	 * if emulation was due to access to shadowed page table
> >> -	 * and it failed try to unshadow page and re-enter the
> >> -	 * guest to let CPU execute the instruction.
> >> -	 */
> >> -	if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)))
> >> -		return true;
> >> +	gfn = gpa_to_gfn(gpa);
> >>
> >>  	/*
> >>  	 * Do not retry the unhandleable instruction if it faults on the
> >> @@ -4795,13 +4782,38 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
> >>  	 * retry instruction -> write #PF -> emulation fail -> retry
> >>  	 * instruction -> ...
> >>  	 */
> >> -	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
> >> -	if (!is_error_noslot_pfn(pfn)) {
> >> -		kvm_release_pfn_clean(pfn);
> >> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> >> +
> >> +	/*
> >> +	 * If the instruction failed on the error pfn, it can not be fixed,
> >> +	 * report the error to userspace.
> >> +	 */
> >> +	if (is_error_noslot_pfn(pfn))
> >> +		return false;
> >> +
> >> +	kvm_release_pfn_clean(pfn);
> >> +
> >> +	/* The instructions are well-emulated on direct mmu. */
> >> +	if (vcpu->arch.mmu.direct_map) {
> > 
> > !direct_map?
> 
> No. This logic is, if it is direct mmu, we just unprotect the page shadowed by
> nested mmu, then let guest retry the instruction, no need to detect unhandlable
> instruction.
> 
> > 
> >> +		unsigned int indirect_shadow_pages;
> >> +
> >> +		spin_lock(&vcpu->kvm->mmu_lock);
> >> +		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> >> +		spin_unlock(&vcpu->kvm->mmu_lock);
> >> +
> >> +		if (indirect_shadow_pages)
> >> +			kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> >> +
> >>  		return true;
> >>  	}
> >>
> >> -	return false;
> >> +	kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> >> +
> >> +	/* If the target gfn is used as page table, the fault can
> >> +	 * not be avoided by unprotecting shadow page and it will
> >> +	 * be reported to userspace.
> >> +	 */
> >> +	return !vcpu->arch.target_gfn_is_pt;
> >>  }
> > 
> > The idea was
> > 
> > How about recording the gfn number for shadow pages that have been
> > shadowed in the current pagefault run? (which is cheap, compared to
> > shadowing these pages).
> > 
> > If failed instruction emulation is write to one of these gfns, then
> > fail.
> 
> If i understood correctly, i do not think it is simpler than the way in this
> patch.
> 
> There is the change to apply the idea:
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c431b33..2163de8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -502,6 +502,8 @@ struct kvm_vcpu_arch {
>  		u64 msr_val;
>  		struct gfn_to_hva_cache data;
>  	} pv_eoi;
> +
> +	gfn_t pt_gfns[4];
>  };
> 
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 0453fa0..ac4210f 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -523,6 +523,18 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
>  	return false;
>  }
> 
> +static void FNAME(cache_pt_gfns)(struct kvm_vcpu *vcpu, struct guest_walker *walker)
> +{
> +	int level;
> +
> +	/* Reset all gfns to -1, then we can detect the levels which is not used in guest. */
> +	for (level = 0; level < 4; level++)
> +		vcpu->arch.pt_gfns[level] = (gfn_t)(-1);
> +
> +	for (level = walker->level; level <= walker->max_level; level++)
> +		vcpu->arch.pt_gfns[level - 1] = walker->table_gfn[level - 1];
> +}
> +
>  /*
>   * Page fault handler.  There are several causes for a page fault:
>   *   - there is no shadow pte for the guest pte
> @@ -576,6 +588,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
>  		return 0;
>  	}
> 
> +	 FNAME(cache_pt_gfns)(vcpu, &walker);
> +
>  	if (walker.level >= PT_DIRECTORY_LEVEL)
>  		force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
>  		   || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b0a3678..b86ee24 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4753,18 +4753,25 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
>  	return r;
>  }
> 
> +static bool is_gfn_used_as_pt(struct kvm_vcpu *vcpu, gfn_t gfn)
> +{
> +	int level;
> +
> +	for (level = 0; level < 4; level++) {
> +		if (vcpu->arch.pt_gfns[level] == (gfn_t)-1)
> +			continue;
> +		if (gfn == vcpu->arch.pt_gfns[level])
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>  {
>  	gpa_t gpa = cr2;
> +	gfn_t gfn;
>  	pfn_t pfn;
> -	unsigned int indirect_shadow_pages;
> -
> -	spin_lock(&vcpu->kvm->mmu_lock);
> -	indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> -	spin_unlock(&vcpu->kvm->mmu_lock);
> -
> -	if (!indirect_shadow_pages)
> -		return false;
> 
>  	if (!vcpu->arch.mmu.direct_map) {
>  		/*
> @@ -4781,13 +4788,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>  			return true;
>  	}
> 
> -	/*
> -	 * if emulation was due to access to shadowed page table
> -	 * and it failed try to unshadow page and re-enter the
> -	 * guest to let CPU execute the instruction.
> -	 */
> -	if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)))
> -		return true;
> +	gfn = gpa_to_gfn(gpa);
> 
>  	/*
>  	 * Do not retry the unhandleable instruction if it faults on the
> @@ -4795,13 +4796,38 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>  	 * retry instruction -> write #PF -> emulation fail -> retry
>  	 * instruction -> ...
>  	 */
> -	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
> -	if (!is_error_noslot_pfn(pfn)) {
> -		kvm_release_pfn_clean(pfn);
> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> +
> +	/*
> +	 * If the instruction failed on the error pfn, it can not be fixed,
> +	 * report the error to userspace.
> +	 */
> +	if (is_error_noslot_pfn(pfn))
> +		return false;
> +
> +	kvm_release_pfn_clean(pfn);
> +
> +	/* The instructions are well-emulated on direct mmu. */
> +	if (vcpu->arch.mmu.direct_map) {
> +		unsigned int indirect_shadow_pages;
> +
> +		spin_lock(&vcpu->kvm->mmu_lock);
> +		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> +		spin_unlock(&vcpu->kvm->mmu_lock);
> +
> +		if (indirect_shadow_pages)
> +			kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> +
>  		return true;
>  	}
> 
> -	return false;
> +	kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> +
> +	/* If the target gfn is used as page table, the fault can
> +	 * not be avoided by unprotecting shadow page and it will
> +	 * be reported to userspace.
> +	 */
> +	return !is_gfn_used_as_pt(vcpu, gfn);
>  }
> 
>  static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
> 
> 
> You can see we need to record more things in the vcpu struct (bool vs. gfn_t [4])
> and my patch can fold is_gfn_used_as_pt into a existed function FNAME(is_self_change_mapping).
> 
> Hmm?

Yes, its not needed. But its not clear where target_gfn_is_pt is reset.
Also please use a more descriptive name, such as
"bool write_fault_to_shadow_pgtable".

Please use coding style which is easier for humans to parse, overall.

     prev parent reply	other threads:[~2013-01-07 20:46 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-04 13:53 [PATCH v4 0/5] KVM: x86: improve reexecute_instruction Xiao Guangrong
2013-01-04 13:54 ` [PATCH v4 1/5] KVM: MMU: fix Dirty bit missed if CR0.WP = 0 Xiao Guangrong
2013-01-04 13:54 ` [PATCH v4 2/5] KVM: MMU: fix infinite fault access retry Xiao Guangrong
2013-01-04 13:55 ` [PATCH v4 3/5] KVM: x86: clean up reexecute_instruction Xiao Guangrong
2013-01-04 22:21   ` Marcelo Tosatti
2013-01-05  7:20     ` Xiao Guangrong
2013-01-04 13:56 ` [PATCH v4 4/5] KVM: x86: let reexecute_instruction work for tdp Xiao Guangrong
2013-01-04 13:56 ` [PATCH v4 5/5] KVM: x86: improve reexecute_instruction Xiao Guangrong
2013-01-04 22:44   ` Marcelo Tosatti
2013-01-05  8:16     ` Xiao Guangrong
2013-01-07 20:46       ` Marcelo Tosatti [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130107204620.GA26134@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=gleb@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.