All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Maxim Levitsky <mlevitsk@redhat.com>,
	Sean Christopherson <seanjc@google.com>
Subject: Re: [PATCH 8/9] KVM: X86: Optimize pte_list_desc with per-array counter
Date: Thu, 24 Jun 2021 18:53:49 -0400	[thread overview]
Message-ID: <YNUM/W9uXWficCiN@t490s> (raw)
In-Reply-To: <20210624181520.11012-1-peterx@redhat.com>

On Thu, Jun 24, 2021 at 02:15:20PM -0400, Peter Xu wrote:
> Add a counter field into pte_list_desc, so as to simplify the add/remove/loop
> logic.  E.g., we don't need to loop over the array any more for most reasons.
> 
> This will make more sense after we've switched the array size to be larger
> otherwise the counter will be a waste.
> 
> Initially I wanted to store a tail pointer at the head of the array list so we
> don't need to traverse the list at least for pushing new ones (if without the
> counter we traverse both the list and the array).  However that'll need
> slightly more change without a huge lot benefit, e.g., after we grow entry
> numbers per array the list traversing is not so expensive.
> 
> So let's be simple but still try to get as much benefit as we can with just
> these extra few lines of changes (not to mention the code looks easier too
> without looping over arrays).
> 
> I used the same a test case to fork 500 child and recycle them ("./rmap_fork
> 500" [1]), this patch further speeds up the total fork time of about 14%, which
> is a total of 38% of vanilla kernel:
> 
>         Vanilla:      367.20 (+-4.58%)
>         3->15 slots:  302.00 (+-5.30%)
>         Add counter:  265.20 (+-9.88%)
> 
> [1] https://github.com/xzpeter/clibs/commit/825436f825453de2ea5aaee4bdb1c92281efe5b3
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  arch/x86/kvm/mmu/mmu.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 8888ae291cb9..b21e52dfc27b 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -136,10 +136,15 @@ module_param(dbg, bool, 0644);
>  #include <trace/events/kvm.h>
>  
>  /* make pte_list_desc fit well in cache lines */
> -#define PTE_LIST_EXT 15
> +#define PTE_LIST_EXT 14
>  
>  struct pte_list_desc {
>  	u64 *sptes[PTE_LIST_EXT];
> +	/*
> +	 * Stores number of entries stored in the pte_list_desc.  No need to be
> +	 * u64 but just for easier alignment.  When PTE_LIST_EXT, means full.
> +	 */
> +	u64 spte_count;
>  	struct pte_list_desc *more;
>  };
>  
> @@ -830,7 +835,7 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
>  			struct kvm_rmap_head *rmap_head)
>  {
>  	struct pte_list_desc *desc;
> -	int i, count = 0;
> +	int count = 0;
>  
>  	if (!rmap_head->val) {
>  		rmap_printk("%p %llx 0->1\n", spte, *spte);
> @@ -840,24 +845,24 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
>  		desc = mmu_alloc_pte_list_desc(vcpu);
>  		desc->sptes[0] = (u64 *)rmap_head->val;
>  		desc->sptes[1] = spte;
> +		desc->spte_count = 2;
>  		rmap_head->val = (unsigned long)desc | 1;
>  		++count;
>  	} else {
>  		rmap_printk("%p %llx many->many\n", spte, *spte);
>  		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> -		while (desc->sptes[PTE_LIST_EXT-1]) {
> +		while (desc->spte_count == PTE_LIST_EXT) {
>  			count += PTE_LIST_EXT;
> -
>  			if (!desc->more) {
>  				desc->more = mmu_alloc_pte_list_desc(vcpu);
>  				desc = desc->more;
> +				desc->spte_count = 0;
>  				break;
>  			}
>  			desc = desc->more;
>  		}
> -		for (i = 0; desc->sptes[i]; ++i)
> -			++count;
> -		desc->sptes[i] = spte;
> +		count += desc->spte_count;
> +		desc->sptes[desc->spte_count++] = spte;
>  	}
>  	return count;
>  }
> @@ -873,8 +878,10 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
>  		;
>  	desc->sptes[i] = desc->sptes[j];
>  	desc->sptes[j] = NULL;
> +	desc->spte_count--;
>  	if (j != 0)
>  		return;
> +	WARN_ON_ONCE(desc->spte_count);
>  	if (!prev_desc && !desc->more)
>  		rmap_head->val = 0;
>  	else
> @@ -930,7 +937,7 @@ static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
>  unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
>  {
>  	struct pte_list_desc *desc;
> -	unsigned int i, count = 0;
> +	unsigned int count = 0;
>  
>  	if (!rmap_head->val)
>  		return 0;
> @@ -940,8 +947,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
>  	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
>  
>  	while (desc) {
> -		for (i = 0; (i < PTE_LIST_EXT) && desc->sptes[i]; i++)
> -			count++;
> +		count += desc->spte_count;
>  		desc = desc->more;
>  	}

I think I still missed another loop in pte_list_desc_remove_entry() that we can
drop.  With some other cleanups, I plan to squash below into this patch too..

---8<---
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 719fb6fd0aa0..2d8c56eb36f8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -872,16 +872,13 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
                           struct pte_list_desc *desc, int i,
                           struct pte_list_desc *prev_desc)
 {
-       int j;
+       int j = desc->spte_count - 1;
 
-       for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j)
-               ;
        desc->sptes[i] = desc->sptes[j];
        desc->sptes[j] = NULL;
        desc->spte_count--;
-       if (j != 0)
+       if (desc->spte_count)
                return;
-       WARN_ON_ONCE(desc->spte_count);
        if (!prev_desc && !desc->more)
                rmap_head->val = 0;
        else
@@ -913,7 +910,7 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
                prev_desc = NULL;
                while (desc) {
-                       for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
+                       for (i = 0; i < desc->spte_count; ++i) {
                                if (desc->sptes[i] == spte) {
                                        pte_list_desc_remove_entry(rmap_head,
                                                        desc, i, prev_desc);
---8<---

-- 
Peter Xu


  reply	other threads:[~2021-06-24 22:53 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-24 18:13 [PATCH 0/9] KVM: X86: Some light optimizations on rmap logic Peter Xu
2021-06-24 18:13 ` [PATCH 1/9] KVM: X86: Add per-vm stat for max rmap list size Peter Xu
2021-06-24 18:13 ` [PATCH 2/9] KVM: Introduce kvm_get_kvm_safe() Peter Xu
2021-06-24 18:13 ` [PATCH 3/9] KVM: Allow to have arch-specific per-vm debugfs files Peter Xu
2021-06-24 18:13 ` [PATCH 4/9] KVM: X86: Introduce pte_list_count() helper Peter Xu
2021-06-24 18:13 ` [PATCH 5/9] KVM: X86: Introduce kvm_mmu_slot_lpages() helpers Peter Xu
2021-06-24 18:13 ` [PATCH 6/9] KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file Peter Xu
2021-06-24 18:22   ` Peter Xu
2021-06-24 18:13 ` [PATCH 7/9] KVM: X86: MMU: Tune PTE_LIST_EXT to be bigger Peter Xu
2021-06-24 18:15 ` [PATCH 8/9] KVM: X86: Optimize pte_list_desc with per-array counter Peter Xu
2021-06-24 22:53   ` Peter Xu [this message]
2021-06-24 18:15 ` [PATCH 9/9] KVM: X86: Optimize zapping rmap Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YNUM/W9uXWficCiN@t490s \
    --to=peterx@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mlevitsk@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.