linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
To: David Matlack <dmatlack@google.com>
Cc: linuxppc-dev@lists.ozlabs.org, kvm-ppc@vger.kernel.org,
	"kvm list" <kvm@vger.kernel.org>,
	mpe@ellerman.id.au, paulus@samba.org, benh@kernel.crashing.org,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	agraf@suse.com, "Radim Krčmář" <rkrcmar@redhat.com>,
	"Christian Borntraeger" <borntraeger@de.ibm.com>
Subject: Re: [PATCH V4 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module
Date: Thu, 21 Jul 2016 19:24:59 +1000	[thread overview]
Message-ID: <579094EB.5030207@gmail.com> (raw)
In-Reply-To: <CALzav=dKcw+iRZ-O7HrDd3K1kagOFvGF3ALiJKnjdThvhbhu=g@mail.gmail.com>



On 20/07/16 04:58, David Matlack wrote:
> On Tue, Jul 19, 2016 at 1:12 AM, Suraj Jitindar Singh
> <sjitindarsingh@gmail.com> wrote:
>> This patch introduces new halt polling functionality into the kvm_hv kernel
>> module. When a vcore is idle it will poll for some period of time before
>> scheduling itself out.
>>
>> When all of the runnable vcpus on a vcore have ceded (and thus the vcore is
>> idle) we schedule ourselves out to allow something else to run. In the
>> event that we need to wake up very quickly (for example an interrupt
>> arrives), we are required to wait until we get scheduled again.
>>
>> Implement halt polling so that when a vcore is idle, and before scheduling
>> ourselves, we poll for vcpus in the runnable_threads list which have
>> pending exceptions or which leave the ceded state. If we poll successfully
>> then we can get back into the guest very quickly without ever scheduling
>> ourselves, otherwise we schedule ourselves out as before.
>>
>> Testing of this patch with a TCP round robin test between two guests with
>> virtio network interfaces has found a decrease in round trip time of ~15us
>> on average. A performance gain is only seen when going out of and
>> back into the guest often and quickly, otherwise there is no net benefit
>> from the polling. The polling interval is adjusted such that when we are
>> often scheduled out for long periods of time it is reduced, and when we
>> often poll successfully it is increased. The rate at which the polling
>> interval increases or decreases, and the maximum polling interval, can
>> be set through module parameters.
>>
>> Based on the implementation in the generic kvm module by Wanpeng Li and
>> Paolo Bonzini, and on direction from Paul Mackerras.
>>
>> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
>> ---
>>  arch/powerpc/include/asm/kvm_book3s.h |   1 +
>>  arch/powerpc/include/asm/kvm_host.h   |   1 +
>>  arch/powerpc/kvm/book3s_hv.c          | 116 ++++++++++++++++++++++++++++++----
>>  arch/powerpc/kvm/trace_hv.h           |  22 +++++++
>>  4 files changed, 126 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
>> index 151f817..c261f52 100644
>> --- a/arch/powerpc/include/asm/kvm_book3s.h
>> +++ b/arch/powerpc/include/asm/kvm_book3s.h
>> @@ -102,6 +102,7 @@ struct kvmppc_vcore {
>>         ulong pcr;
>>         ulong dpdes;            /* doorbell state (POWER8) */
>>         ulong conferring_threads;
>> +       unsigned int halt_poll_ns;
>>  };
>>
>>  struct kvmppc_vcpu_book3s {
>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
>> index 02d06e9..610f393 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -294,6 +294,7 @@ struct kvm_arch {
>>  #define VCORE_SLEEPING 3
>>  #define VCORE_RUNNING  4
>>  #define VCORE_EXITING  5
>> +#define VCORE_POLLING  6
>>
>>  /*
>>   * Struct used to manage memory for a virtual processor area
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 3bcf9e6..a9de1d4 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -94,6 +94,23 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
>>  MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
>>  #endif
>>
>> +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
>> +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
>> +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
>> +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
>> +
>> +/* Factor by which the vcore halt poll interval is grown, default is to double
>> + */
>> +static unsigned int halt_poll_ns_grow = 2;
>> +module_param(halt_poll_ns_grow, int, S_IRUGO);
>> +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
>> +
>> +/* Factor by which the vcore halt poll interval is shrunk, default is to reset
>> + */
>> +static unsigned int halt_poll_ns_shrink;
>> +module_param(halt_poll_ns_shrink, int, S_IRUGO);
>> +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
>> +
>>  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
>>  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
>>
>> @@ -2620,32 +2637,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
>>         finish_wait(&vcpu->arch.cpu_run, &wait);
>>  }
>>
>> +static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
>> +{
>> +       /* 10us base */
>> +       if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
>> +               vc->halt_poll_ns = 10000;
>> +       else
>> +               vc->halt_poll_ns *= halt_poll_ns_grow;
>> +
>> +       if (vc->halt_poll_ns > halt_poll_max_ns)
>> +               vc->halt_poll_ns = halt_poll_max_ns;
>> +}
>> +
>> +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
>> +{
>> +       if (halt_poll_ns_shrink == 0)
>> +               vc->halt_poll_ns = 0;
>> +       else
>> +               vc->halt_poll_ns /= halt_poll_ns_shrink;
>> +}
>> +
>> +/* Check to see if any of the runnable vcpus on the vcore have pending
>> + * exceptions or are no longer ceded
>> + */
>> +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
>> +{
>> +       struct kvm_vcpu *vcpu;
>> +       int i;
>> +
>> +       for_each_runnable_thread(i, vcpu, vc) {
>> +               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
>> +                       return 1;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>>  /*
>>   * All the vcpus in this vcore are idle, so wait for a decrementer
>>   * or external interrupt to one of the vcpus.  vc->lock is held.
>>   */
>>  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>>  {
>> -       struct kvm_vcpu *vcpu;
>> -       int do_sleep = 1, i;
>> +       int do_sleep = 1;
>> +       ktime_t cur, start;
>> +       u64 block_ns;
>>         DECLARE_SWAITQUEUE(wait);
>>
>> -       prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
>> +       /* Poll for pending exceptions and ceded state */
>> +       cur = start = ktime_get();
>> +       if (vc->halt_poll_ns) {
>> +               ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
>>
>> -       /*
>> -        * Check one last time for pending exceptions and ceded state after
>> -        * we put ourselves on the wait queue
>> -        */
>> -       for_each_runnable_thread(i, vcpu, vc) {
>> -               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
>> -                       do_sleep = 0;
>> -                       break;
>> -               }
>> +               vc->vcore_state = VCORE_POLLING;
>> +               spin_unlock(&vc->lock);
>> +
>> +               do {
>> +                       if (kvmppc_vcore_check_block(vc)) {
>> +                               do_sleep = 0;
>> +                               break;
>> +                       }
>> +                       cur = ktime_get();
>> +               } while (ktime_before(cur, stop));
> The generic halt-polling implementation only polls if there are no
> other threads waiting to run (single_task_running() == 1). Should/Can
> we do the same here?

Thanks for catching that.

Yes I think we should.


>
>> +
>> +               spin_lock(&vc->lock);
>> +               vc->vcore_state = VCORE_INACTIVE;
>> +
>> +               if (!do_sleep)
>> +                       goto out;
>>         }
>>
>> -       if (!do_sleep) {
>> +       prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
>> +
>> +       if (kvmppc_vcore_check_block(vc)) {
>>                 finish_swait(&vc->wq, &wait);
>> -               return;
>> +               do_sleep = 0;
>> +               goto out;
>>         }
>>
>>         vc->vcore_state = VCORE_SLEEPING;
>> @@ -2656,6 +2723,27 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>>         spin_lock(&vc->lock);
>>         vc->vcore_state = VCORE_INACTIVE;
>>         trace_kvmppc_vcore_blocked(vc, 1);
>> +
>> +       cur = ktime_get();
>> +
>> +out:
>> +       block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
>> +
>> +       /* Adjust poll time */
>> +       if (halt_poll_max_ns) {
>> +               if (block_ns <= vc->halt_poll_ns)
>> +                       ;
>> +               /* We slept and blocked for longer than the max halt time */
>> +               else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
>> +                       shrink_halt_poll_ns(vc);
>> +               /* We slept and our poll time is too small */
>> +               else if (vc->halt_poll_ns < halt_poll_max_ns &&
>> +                               block_ns < halt_poll_max_ns)
>> +                       grow_halt_poll_ns(vc);
>> +       } else
>> +               vc->halt_poll_ns = 0;
>> +
>> +       trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
>>  }
>>
>>  static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
>> index 33d9daf..fb21990 100644
>> --- a/arch/powerpc/kvm/trace_hv.h
>> +++ b/arch/powerpc/kvm/trace_hv.h
>> @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
>>                    __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
>>  );
>>
>> +TRACE_EVENT(kvmppc_vcore_wakeup,
>> +       TP_PROTO(int do_sleep, __u64 ns),
>> +
>> +       TP_ARGS(do_sleep, ns),
>> +
>> +       TP_STRUCT__entry(
>> +               __field(__u64,  ns)
>> +               __field(int,    waited)
>> +               __field(pid_t,  tgid)
>> +       ),
>> +
>> +       TP_fast_assign(
>> +               __entry->ns     = ns;
>> +               __entry->waited = do_sleep;
>> +               __entry->tgid   = current->tgid;
>> +       ),
>> +
>> +       TP_printk("%s time %lld ns, tgid=%d",
>> +               __entry->waited ? "wait" : "poll",
>> +               __entry->ns, __entry->tgid)
>> +);
>> +
>>  TRACE_EVENT(kvmppc_run_vcpu_enter,
>>         TP_PROTO(struct kvm_vcpu *vcpu),
>>
>> --
>> 2.5.5
>>

  reply	other threads:[~2016-07-21  9:25 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-19  8:12 [PATCH V4 1/5] kvm/ppc/book3s: Move struct kvmppc_vcore from kvm_host.h to kvm_book3s.h Suraj Jitindar Singh
2016-07-19  8:12 ` [PATCH V4 2/5] kvm/ppc/book3s_hv: Change vcore element runnable_threads from linked-list to array Suraj Jitindar Singh
2016-07-19  8:12 ` [PATCH V4 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module Suraj Jitindar Singh
2016-07-19  8:38   ` Christian Borntraeger
2016-07-19 18:58   ` David Matlack
2016-07-21  9:24     ` Suraj Jitindar Singh [this message]
2016-07-19  8:12 ` [PATCH V4 4/5] kvm/stats: Add provisioning for ulong vm stats and u64 vcpu stats Suraj Jitindar Singh
2016-07-19  8:52   ` Christian Borntraeger
2016-07-19 19:17   ` David Matlack
2016-07-19  8:12 ` [PATCH V4 5/5] powerpc/kvm/stats: Implement existing and add new halt polling " Suraj Jitindar Singh
2016-07-19  8:41   ` Christian Borntraeger
2016-07-19 19:20   ` David Matlack

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=579094EB.5030207@gmail.com \
    --to=sjitindarsingh@gmail.com \
    --cc=agraf@suse.com \
    --cc=benh@kernel.crashing.org \
    --cc=borntraeger@de.ibm.com \
    --cc=dmatlack@google.com \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).