All of lore.kernel.org
 help / color / mirror / Atom feed
From: Igor Mammedov <imammedo@redhat.com>
To: Wanpeng Li <kernellwp@gmail.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>
Subject: Re: [PATCH v3] KVM: LAPIC: Recalculate apic map in batch
Date: Fri, 19 Jun 2020 14:36:26 +0200	[thread overview]
Message-ID: <20200619143626.1b326566@redhat.com> (raw)
In-Reply-To: <1582684862-10880-1-git-send-email-wanpengli@tencent.com>

On Wed, 26 Feb 2020 10:41:02 +0800
Wanpeng Li <kernellwp@gmail.com> wrote:

> From: Wanpeng Li <wanpengli@tencent.com>
> 
> In the vCPU reset and set APIC_BASE MSR path, the apic map will be recalculated 
> several times, each time it will consume 10+ us observed by ftrace in my 
> non-overcommit environment since the expensive memory allocate/mutex/rcu etc 
> operations. This patch optimizes it by recaluating apic map in batch, I hope 
> this can benefit the serverless scenario which can frequently create/destroy 
> VMs. 
> 
> Before patch:
> 
> kvm_lapic_reset  ~27us
> 
> After patch:
> 
> kvm_lapic_reset  ~14us 
> 
> Observed by ftrace, improve ~48%.
> 
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
this broke CPU hotplug,

qemu-kvm -m 2G -smp 4,maxcpus=8  -monitor stdio
(qemu) device_add qemu64-x86_64-cpu,socket-id=4,core-id=0,thread-id=0

in guest fails with:

 smpboot: do_boot_cpu failed(-1) to wakeup CPU#4

which makes me suspect that  INIT/SIPI wasn't delivered

Is it a know issue?

> ---
> v2 -> v3:
>  * move apic_map_dirty to kvm_arch
>  * add the suggestions from Paolo
> 
> v1 -> v2:
>  * add apic_map_dirty to kvm_lapic
>  * error condition in kvm_apic_set_state, do recalcuate  unconditionally
> 
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/lapic.c            | 46 ++++++++++++++++++++++++++++++++---------
>  arch/x86/kvm/lapic.h            |  1 +
>  arch/x86/kvm/x86.c              |  1 +
>  4 files changed, 39 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 40a0c0f..4380ed1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -920,6 +920,7 @@ struct kvm_arch {
>  	atomic_t vapics_in_nmi_mode;
>  	struct mutex apic_map_lock;
>  	struct kvm_apic_map *apic_map;
> +	bool apic_map_dirty;
>  
>  	bool apic_access_page_done;
>  	unsigned long apicv_inhibit_reasons;
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index afcd30d..de832aa 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -164,14 +164,28 @@ static void kvm_apic_map_free(struct rcu_head *rcu)
>  	kvfree(map);
>  }
>  
> -static void recalculate_apic_map(struct kvm *kvm)
> +void kvm_recalculate_apic_map(struct kvm *kvm)
>  {
>  	struct kvm_apic_map *new, *old = NULL;
>  	struct kvm_vcpu *vcpu;
>  	int i;
>  	u32 max_id = 255; /* enough space for any xAPIC ID */
>  
> +	if (!kvm->arch.apic_map_dirty) {
> +		/*
> +		 * Read kvm->arch.apic_map_dirty before
> +		 * kvm->arch.apic_map
> +		 */
> +		smp_rmb();
> +		return;
> +	}
> +
>  	mutex_lock(&kvm->arch.apic_map_lock);
> +	if (!kvm->arch.apic_map_dirty) {
> +		/* Someone else has updated the map. */
> +		mutex_unlock(&kvm->arch.apic_map_lock);
> +		return;
> +	}
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm)
>  		if (kvm_apic_present(vcpu))
> @@ -236,6 +250,12 @@ static void recalculate_apic_map(struct kvm *kvm)
>  	old = rcu_dereference_protected(kvm->arch.apic_map,
>  			lockdep_is_held(&kvm->arch.apic_map_lock));
>  	rcu_assign_pointer(kvm->arch.apic_map, new);
> +	/*
> +	 * Write kvm->arch.apic_map before
> +	 * clearing apic->apic_map_dirty
> +	 */
> +	smp_wmb();
> +	kvm->arch.apic_map_dirty = false;
>  	mutex_unlock(&kvm->arch.apic_map_lock);
>  
>  	if (old)
> @@ -257,20 +277,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
>  		else
>  			static_key_slow_inc(&apic_sw_disabled.key);
>  
> -		recalculate_apic_map(apic->vcpu->kvm);
> +		apic->vcpu->kvm->arch.apic_map_dirty = true;
>  	}
>  }
>  
>  static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
>  {
>  	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
> -	recalculate_apic_map(apic->vcpu->kvm);
> +	apic->vcpu->kvm->arch.apic_map_dirty = true;
>  }
>  
>  static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
>  {
>  	kvm_lapic_set_reg(apic, APIC_LDR, id);
> -	recalculate_apic_map(apic->vcpu->kvm);
> +	apic->vcpu->kvm->arch.apic_map_dirty = true;
>  }
>  
>  static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
> @@ -286,7 +306,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
>  
>  	kvm_lapic_set_reg(apic, APIC_ID, id);
>  	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
> -	recalculate_apic_map(apic->vcpu->kvm);
> +	apic->vcpu->kvm->arch.apic_map_dirty = true;
>  }
>  
>  static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
> @@ -1912,7 +1932,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  	case APIC_DFR:
>  		if (!apic_x2apic_mode(apic)) {
>  			kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
> -			recalculate_apic_map(apic->vcpu->kvm);
> +			apic->vcpu->kvm->arch.apic_map_dirty = true;
>  		} else
>  			ret = 1;
>  		break;
> @@ -2018,6 +2038,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  		break;
>  	}
>  
> +	kvm_recalculate_apic_map(apic->vcpu->kvm);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
> @@ -2166,7 +2188,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  			static_key_slow_dec_deferred(&apic_hw_disabled);
>  		} else {
>  			static_key_slow_inc(&apic_hw_disabled.key);
> -			recalculate_apic_map(vcpu->kvm);
> +			vcpu->kvm->arch.apic_map_dirty = true;
>  		}
>  	}
>  
> @@ -2207,6 +2229,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
>  	if (!apic)
>  		return;
>  
> +	vcpu->kvm->arch.apic_map_dirty = false;
>  	/* Stop the timer in case it's a reset to an active apic */
>  	hrtimer_cancel(&apic->lapic_timer.timer);
>  
> @@ -2258,6 +2281,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
>  
>  	vcpu->arch.apic_arb_prio = 0;
>  	vcpu->arch.apic_attention = 0;
> +
> +	kvm_recalculate_apic_map(vcpu->kvm);
>  }
>  
>  /*
> @@ -2479,17 +2504,18 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	int r;
>  
> -
>  	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
>  	/* set SPIV separately to get count of SW disabled APICs right */
>  	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
>  
>  	r = kvm_apic_state_fixup(vcpu, s, true);
> -	if (r)
> +	if (r) {
> +		kvm_recalculate_apic_map(vcpu->kvm);
>  		return r;
> +	}
>  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
>  
> -	recalculate_apic_map(vcpu->kvm);
> +	kvm_recalculate_apic_map(vcpu->kvm);
>  	kvm_apic_set_version(vcpu);
>  
>  	apic_update_ppr(apic);
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index ec6fbfe..7581bc2 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -78,6 +78,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
>  void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
>  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
>  u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
> +void kvm_recalculate_apic_map(struct kvm *kvm);
>  void kvm_apic_set_version(struct kvm_vcpu *vcpu);
>  int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val);
>  int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 79bc995..d3802a2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -350,6 +350,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	}
>  
>  	kvm_lapic_set_base(vcpu, msr_info->data);
> +	kvm_recalculate_apic_map(vcpu->kvm);
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(kvm_set_apic_base);


  parent reply	other threads:[~2020-06-19 12:36 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-26  2:41 [PATCH v3] KVM: LAPIC: Recalculate apic map in batch Wanpeng Li
2020-03-02 16:32 ` Paolo Bonzini
2020-06-19 12:36 ` Igor Mammedov [this message]
2020-06-19 14:10   ` Paolo Bonzini
2020-06-19 15:38     ` Igor Mammedov
2020-06-21 22:26     ` Igor Mammedov
2020-06-22 14:37       ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200619143626.1b326566@redhat.com \
    --to=imammedo@redhat.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=sean.j.christopherson@intel.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.