All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Wanpeng Li <wanpengli@tencent.com>,
	x86@kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 1/2] x86/kvm/async_pf: Use separate percpu variable to track the enabling of asyncpf
Date: Wed, 25 Oct 2023 14:22:18 +0000	[thread overview]
Message-ID: <ZTkkmgs_oCnDCGvd@google.com> (raw)
In-Reply-To: <87a5s73w53.fsf@redhat.com>

On Wed, Oct 25, 2023, Vitaly Kuznetsov wrote:
> Xiaoyao Li <xiaoyao.li@intel.com> writes:
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index b8ab9ee5896c..388a3fdd3cad 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg)
> >  
> >  early_param("no-steal-acc", parse_no_stealacc);
> >  
> > +static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
> 
> Would it make a difference is we replace this with a cpumask? I realize
> that we need to access it on all CPUs from hotpaths but this mask will
> rarely change so maybe there's no real perfomance hit?

FWIW, I personally prefer per-CPU booleans from a readability perspective.  I
doubt there is a meaningful performance difference for a bitmap vs. individual
booleans, the check is already gated by a static key, i.e. kernels that are NOT
running as KVM guests don't care.

Actually, if there's performance gains to be had, optimizing kvm_read_and_reset_apf_flags()
to read the "enabled" flag if and only if it's necessary is a more likely candidate.
Assuming the host isn't being malicious/stupid, then apf_reason.flags will be '0'
if PV async #PFs are disabled.  The only question is whether or not apf_reason.flags
is predictable enough for the CPU.

Aha!  In practice, the CPU already needs to resolve a branch based on apf_reason.flags,
it's just "hidden" up in __kvm_handle_async_pf().

If we really want to micro-optimize, provide an __always_inline inner helper so
that __kvm_handle_async_pf() doesn't need to make a CALL just to read the flags.
Then in the common case where a #PF isn't due to the host swapping out a page,
the paravirt happy path doesn't need a taken branch and never reads the enabled
variable.  E.g. the below generates:

   0xffffffff81939ed0 <+0>:	41 54              	push   %r12
   0xffffffff81939ed2 <+2>:	31 c0              	xor    %eax,%eax
   0xffffffff81939ed4 <+4>:	55                 	push   %rbp
   0xffffffff81939ed5 <+5>:	53                 	push   %rbx
   0xffffffff81939ed6 <+6>:	48 83 ec 08        	sub    $0x8,%rsp
   0xffffffff81939eda <+10>:	65 8b 2d df 81 6f 7e	mov    %gs:0x7e6f81df(%rip),%ebp        # 0x320c0 <apf_reason>
   0xffffffff81939ee1 <+17>:	85 ed              	test   %ebp,%ebp
   0xffffffff81939ee3 <+19>:	75 09              	jne    0xffffffff81939eee <__kvm_handle_async_pf+30>
   0xffffffff81939ee5 <+21>:	48 83 c4 08        	add    $0x8,%rsp
   0xffffffff81939ee9 <+25>:	5b                 	pop    %rbx
   0xffffffff81939eea <+26>:	5d                 	pop    %rbp
   0xffffffff81939eeb <+27>:	41 5c              	pop    %r12
   0xffffffff81939eed <+29>:	c3                 	ret


diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ab9ee5896c..b24133dc0731 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -240,22 +240,29 @@ void kvm_async_pf_task_wake(u32 token)
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
 
-noinstr u32 kvm_read_and_reset_apf_flags(void)
+static __always_inline u32 __kvm_read_and_reset_apf_flags(void)
 {
-       u32 flags = 0;
+       u32 flags = __this_cpu_read(apf_reason.flags);
 
-       if (__this_cpu_read(apf_reason.enabled)) {
-               flags = __this_cpu_read(apf_reason.flags);
-               __this_cpu_write(apf_reason.flags, 0);
+       if (unlikely(flags)) {
+               if (likely(__this_cpu_read(apf_reason.enabled)))
+                       __this_cpu_write(apf_reason.flags, 0);
+               else
+                       flags = 0;
        }
 
        return flags;
 }
+
+u32 kvm_read_and_reset_apf_flags(void)
+{
+       return __kvm_read_and_reset_apf_flags();
+}
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
 
 noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
-       u32 flags = kvm_read_and_reset_apf_flags();
+       u32 flags = __kvm_read_and_reset_apf_flags();
        irqentry_state_t state;
 
        if (!flags)

> >  static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> >  DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
> >  static int has_steal_clock = 0;
> > @@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
> >  {
> >  	u32 flags = 0;
> >  
> > -	if (__this_cpu_read(apf_reason.enabled)) {
> > +	if (__this_cpu_read(async_pf_enabled)) {
> >  		flags = __this_cpu_read(apf_reason.flags);
> >  		__this_cpu_write(apf_reason.flags, 0);
> >  	}
> > @@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
> >  
> >  	inc_irq_stat(irq_hv_callback_count);
> >  
> > -	if (__this_cpu_read(apf_reason.enabled)) {
> > +	if (__this_cpu_read(async_pf_enabled)) {
> >  		token = __this_cpu_read(apf_reason.token);
> >  		kvm_async_pf_task_wake(token);
> >  		__this_cpu_write(apf_reason.token, 0);
> > @@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void)
> >  		wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
> >  
> >  		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
> > -		__this_cpu_write(apf_reason.enabled, 1);
> > +		__this_cpu_write(async_pf_enabled, 1);
> 
> As 'async_pf_enabled' is bool, it would probably be more natural to
> write
> 
> 	__this_cpu_write(async_pf_enabled, true);

+1000

  reply	other threads:[~2023-10-25 14:22 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-25  5:59 [PATCH v2 0/2] x86/asyncpf: Fixes the size of asyncpf PV data and related docs Xiaoyao Li
2023-10-25  5:59 ` [PATCH v2 1/2] x86/kvm/async_pf: Use separate percpu variable to track the enabling of asyncpf Xiaoyao Li
2023-10-25  9:10   ` Vitaly Kuznetsov
2023-10-25 14:22     ` Sean Christopherson [this message]
2023-10-30  5:47       ` Xiaoyao Li
2023-10-30 23:17         ` Sean Christopherson
2023-10-30  5:17     ` Xiaoyao Li
2023-10-25  5:59 ` [PATCH v2 2/2] KVM: x86: Improve documentation of MSR_KVM_ASYNC_PF_EN Xiaoyao Li
2024-02-06 21:36 ` [PATCH v2 0/2] x86/asyncpf: Fixes the size of asyncpf PV data and related docs Sean Christopherson
2024-02-07  6:26   ` Xiaoyao Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZTkkmgs_oCnDCGvd@google.com \
    --to=seanjc@google.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    --cc=xiaoyao.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.