All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	Paolo Bonzini <pbonzini@redhat.com>,
	Lai Jiangshan <laijs@linux.alibaba.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>
Subject: Re: [RFC PATCH] kvm/x86: Keep root hpa in prev_roots as much as possible
Date: Thu, 29 Jul 2021 18:05:56 +0000	[thread overview]
Message-ID: <YQLuBDZ2MlNlIoH4@google.com> (raw)
In-Reply-To: <20210525213920.3340-1-jiangshanlai@gmail.com>

On Wed, May 26, 2021, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> Pagetable roots in prev_roots[] are likely to be reused soon and
> there is no much overhead to keep it with a new need_sync field
> introduced.
> 
> With the help of the new need_sync field, pagetable roots are
> kept as much as possible, and they will be re-synced before reused
> instead of being dropped.
> 
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> ---
> 
> This patch is just for RFC.
>   Is the idea Ok?

Yes, the idea is definitely a good one.

>   If the idea is Ok, we need to reused one bit from pgd or hpa
>     as need_sync to save memory.  Which one is better?

Ha, we can do this without increasing the memory footprint and without co-opting
a bit from pgd or hpa.  Because of compiler alignment/padding, the u8s and bools
between mmu_role and prev_roots already occupy 8 bytes, even though the actual
size is 4 bytes.  In total, we need room for 4 roots (3 previous + current), i.e.
4 bytes.  If a separate array is used, no additional memory is consumed and no
masking is needed when reading/writing e.g. pgd.

The cost is an extra swap() when updating the prev_roots LRU, but that's peanuts
and would likely be offset by masking anyways.

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 99f37781a6fc..13bb3c3a60b4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -424,10 +424,12 @@ struct kvm_mmu {
        hpa_t root_hpa;
        gpa_t root_pgd;
        union kvm_mmu_role mmu_role;
+       bool root_unsync;
        u8 root_level;
        u8 shadow_root_level;
        u8 ept_ad;
        bool direct_map;
+       bool unsync_roots[KVM_MMU_NUM_PREV_ROOTS];
        struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];

        /*


>  arch/x86/include/asm/kvm_host.h |  3 ++-
>  arch/x86/kvm/mmu/mmu.c          |  6 ++++++
>  arch/x86/kvm/vmx/nested.c       | 12 ++++--------
>  arch/x86/kvm/x86.c              |  9 +++++----
>  4 files changed, 17 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 55efbacfc244..19a337cf7aa6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -354,10 +354,11 @@ struct rsvd_bits_validate {
>  struct kvm_mmu_root_info {
>  	gpa_t pgd;
>  	hpa_t hpa;
> +	bool need_sync;

Hmm, use "unsync" instead of "need_sync", purely to match the existing terminology
in KVM's MMU for this sort of behavior.

>  };
>  
>  #define KVM_MMU_ROOT_INFO_INVALID \
> -	((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
> +	((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE, .need_sync = true})
>  
>  #define KVM_MMU_NUM_PREV_ROOTS 3
>  
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 5e60b00e8e50..147827135549 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3878,6 +3878,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd,
>  
>  	root.pgd = mmu->root_pgd;
>  	root.hpa = mmu->root_hpa;
> +	root.need_sync = false;
>  
>  	if (is_root_usable(&root, new_pgd, new_role))
>  		return true;
> @@ -3892,6 +3893,11 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd,
>  	mmu->root_hpa = root.hpa;
>  	mmu->root_pgd = root.pgd;
>  
> +	if (i < KVM_MMU_NUM_PREV_ROOTS && root.need_sync) {

Probably makes sense to write this as:

	if (i >= KVM_MMU_NUM_PREV_ROOTS)
		return false;

	if (root.need_sync) {
		kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
	}
	return true;

The "i < KVM_MMU_NUM_PREV_ROOTS == success" logic is just confusing enough that
it'd be nice to write it only once.

And that would also play nicely with deferring a sync for the "current" root
(see below), e.g.

	...
	unsync = mmu->root_unsync;

	if (is_root_usable(&root, new_pgd, new_role))
		goto found_root;

	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
		swap(root, mmu->prev_roots[i]);
		swap(unsync, mmu->unsync_roots[i]);

		if (is_root_usable(&root, new_pgd, new_role))
			break;
	}

        if (i >= KVM_MMU_NUM_PREV_ROOTS)
                return false;

found_root:
        if (unsync) {
                kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
                kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
        }
        return true;

> +		kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
> +		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
> +	}
> +
>  	return i < KVM_MMU_NUM_PREV_ROOTS;
>  }
>  
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 6058a65a6ede..ab7069ac6dc5 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -5312,7 +5312,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 vmx_instruction_info, types;
> -	unsigned long type, roots_to_free;
> +	unsigned long type;
>  	struct kvm_mmu *mmu;
>  	gva_t gva;
>  	struct x86_exception e;
> @@ -5361,29 +5361,25 @@ static int handle_invept(struct kvm_vcpu *vcpu)
>  			return nested_vmx_fail(vcpu,
>  				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
>  
> -		roots_to_free = 0;
>  		if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
>  					    operand.eptp))
> -			roots_to_free |= KVM_MMU_ROOT_CURRENT;
> +			kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOT_CURRENT);

For a non-RFC series, I think this should do two things:

  1. Separate INVEPT from INVPCID, i.e. do only INVPCID first.
  2. Enhance INVEPT to SYNC+FLUSH the current root instead of freeing it

As alluded to above, this can be done by deferring the sync+flush (which can't
be done right away because INVEPT runs in L1 context, whereas KVM needs to sync+flush
L2 EPT context).

>  		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
>  			if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
>  						    mmu->prev_roots[i].pgd,
>  						    operand.eptp))
> -				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
> +				mmu->prev_roots[i].need_sync = true;
>  		}
>  		break;
>  	case VMX_EPT_EXTENT_GLOBAL:
> -		roots_to_free = KVM_MMU_ROOTS_ALL;
> +		kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOTS_ALL);
>  		break;
>  	default:
>  		BUG();
>  		break;
>  	}
>  
> -	if (roots_to_free)
> -		kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
> -
>  	return nested_vmx_succeed(vcpu);
>  }

  reply	other threads:[~2021-07-29 18:06 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-25 21:39 [RFC PATCH] kvm/x86: Keep root hpa in prev_roots as much as possible Lai Jiangshan
2021-07-29 18:05 ` Sean Christopherson [this message]
2021-08-03  1:19   ` Lai Jiangshan
2021-08-03 15:57     ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YQLuBDZ2MlNlIoH4@google.com \
    --to=seanjc@google.com \
    --cc=jiangshanlai@gmail.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=laijs@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.