Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Alexander Graf <agraf@csgraf.de>
To: Peter Collingbourne <pcc@google.com>
Cc: Peter Maydell <peter.maydell@linaro.org>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel <qemu-devel@nongnu.org>,
	Cameron Esfahani <dirty@apple.com>,
	Roman Bolshakov <r.bolshakov@yadro.com>,
	qemu-arm@nongnu.org, Claudio Fontana <cfontana@suse.de>,
	Frank Yang <lfy@google.com>, Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
Date: Wed, 2 Dec 2020 02:39:19 +0100	[thread overview]
Message-ID: <b41352d9-26db-a232-957a-9c63fcc2db18@csgraf.de> (raw)
In-Reply-To: <CAMn1gO4pNFb338-X7JAghCBqJKarrzbQfmpB6v0fLr843fM12A@mail.gmail.com>


On 02.12.20 02:32, Peter Collingbourne wrote:
> On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
>>
>> On 01.12.20 22:00, Peter Collingbourne wrote:
>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
>>> up on IPI.
>>>
>>> Signed-off-by: Peter Collingbourne <pcc@google.com>
>>> ---
>>> v2:
>>> - simplify locking further
>>> - wait indefinitely on disabled or masked timers
>>>
>>>    accel/hvf/hvf-cpus.c     |   5 +-
>>>    include/sysemu/hvf_int.h |   3 +-
>>>    target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
>>>    3 files changed, 43 insertions(+), 81 deletions(-)
>>>
>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
>>> index 4360f64671..b2c8fb57f6 100644
>>> --- a/accel/hvf/hvf-cpus.c
>>> +++ b/accel/hvf/hvf-cpus.c
>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
>>>        sigact.sa_handler = dummy_signal;
>>>        sigaction(SIG_IPI, &sigact, NULL);
>>>
>>> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
>>> -    sigdelset(&set, SIG_IPI);
>>> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
>>> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
>>> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
>>>
>>>    #ifdef __aarch64__
>>>        r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
>>> index c56baa3ae8..13adf6ea77 100644
>>> --- a/include/sysemu/hvf_int.h
>>> +++ b/include/sysemu/hvf_int.h
>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
>>>    struct hvf_vcpu_state {
>>>        uint64_t fd;
>>>        void *exit;
>>> -    struct timespec ts;
>>> -    bool sleeping;
>>> +    sigset_t unblock_ipi_mask;
>>>    };
>>>
>>>    void assert_hvf_ok(hv_return_t ret);
>>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
>>> index 8fe10966d2..3321d48aa2 100644
>>> --- a/target/arm/hvf/hvf.c
>>> +++ b/target/arm/hvf/hvf.c
>>> @@ -2,6 +2,7 @@
>>>     * QEMU Hypervisor.framework support for Apple Silicon
>>>
>>>     * Copyright 2020 Alexander Graf <agraf@csgraf.de>
>>> + * Copyright 2020 Google LLC
>>>     *
>>>     * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>>     * See the COPYING file in the top-level directory.
>>> @@ -18,6 +19,7 @@
>>>    #include "sysemu/hw_accel.h"
>>>
>>>    #include <Hypervisor/Hypervisor.h>
>>> +#include <mach/mach_time.h>
>>>
>>>    #include "exec/address-spaces.h"
>>>    #include "hw/irq.h"
>>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>>>
>>>    void hvf_kick_vcpu_thread(CPUState *cpu)
>>>    {
>>> -    if (cpu->hvf->sleeping) {
>>> -        /*
>>> -         * When sleeping, make sure we always send signals. Also, clear the
>>> -         * timespec, so that an IPI that arrives between setting hvf->sleeping
>>> -         * and the nanosleep syscall still aborts the sleep.
>>> -         */
>>> -        cpu->thread_kicked = false;
>>> -        cpu->hvf->ts = (struct timespec){ };
>>> -        cpus_kick_thread(cpu);
>>> -    } else {
>>> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
>>> -    }
>>> +    cpus_kick_thread(cpu);
>>> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
>>>    }
>>>
>>>    static int hvf_inject_interrupts(CPUState *cpu)
>>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
>>>        return 0;
>>>    }
>>>
>>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
>>> +{
>>> +    /*
>>> +     * Use pselect to sleep so that other threads can IPI us while we're
>>> +     * sleeping.
>>> +     */
>>> +    qatomic_mb_set(&cpu->thread_kicked, false);
>>> +    qemu_mutex_unlock_iothread();
>>> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
>>> +    qemu_mutex_lock_iothread();
>>> +}
>>> +
>>>    int hvf_vcpu_exec(CPUState *cpu)
>>>    {
>>>        ARMCPU *arm_cpu = ARM_CPU(cpu);
>>> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>        hv_return_t r;
>>>        int ret = 0;
>>>
>>> -    qemu_mutex_unlock_iothread();
>>> -
>>>        do {
>>>            bool advance_pc = false;
>>>
>>> -        qemu_mutex_lock_iothread();
>>>            current_cpu = cpu;
>>>            qemu_wait_io_event_common(cpu);
>>> -        qemu_mutex_unlock_iothread();
>>>
>>>            flush_cpu_state(cpu);
>>>
>>> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>            }
>>>
>>>            if (cpu->halted) {
>>> -            qemu_mutex_lock_iothread();
>>>                return EXCP_HLT;
>>>            }
>>>
>>> +        qemu_mutex_unlock_iothread();
>>>            assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
>>>
>>>            /* handle VMEXIT */
>>> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>            uint64_t syndrome = hvf_exit->exception.syndrome;
>>>            uint32_t ec = syn_get_ec(syndrome);
>>>
>>> +        qemu_mutex_lock_iothread();
>>>            switch (exit_reason) {
>>>            case HV_EXIT_REASON_EXCEPTION:
>>>                /* This is the main one, handle below. */
>>>                break;
>>>            case HV_EXIT_REASON_VTIMER_ACTIVATED:
>>> -            qemu_mutex_lock_iothread();
>>>                current_cpu = cpu;
>>>                qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
>>> -            qemu_mutex_unlock_iothread();
>>>                continue;
>>>            case HV_EXIT_REASON_CANCELED:
>>>                /* we got kicked, no exit to process */
>>> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>                uint32_t srt = (syndrome >> 16) & 0x1f;
>>>                uint64_t val = 0;
>>>
>>> -            qemu_mutex_lock_iothread();
>>>                current_cpu = cpu;
>>>
>>>                DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
>>> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>                    hvf_set_reg(cpu, srt, val);
>>>                }
>>>
>>> -            qemu_mutex_unlock_iothread();
>>> -
>>>                advance_pc = true;
>>>                break;
>>>            }
>>> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>            case EC_WFX_TRAP:
>>>                if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
>>>                    (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
>>> -                uint64_t cval, ctl, val, diff, now;
>>> +                advance_pc = true;
>>>
>>> -                /* Set up a local timer for vtimer if necessary ... */
>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
>>> -                assert_hvf_ok(r);
>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
>>> +                uint64_t ctl;
>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
>>> +                                        &ctl);
>>>                    assert_hvf_ok(r);
>>>
>>> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
>>> -                diff = cval - val;
>>> -
>>> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
>>> -                      gt_cntfrq_period_ns(arm_cpu);
>>> -
>>> -                /* Timer disabled or masked, just wait for long */
>>>                    if (!(ctl & 1) || (ctl & 2)) {
>>> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
>>> -                           gt_cntfrq_period_ns(arm_cpu);
>>> +                    /* Timer disabled or masked, just wait for an IPI. */
>>> +                    hvf_wait_for_ipi(cpu, NULL);
>>> +                    break;
>>>                    }
>>>
>>> -                if (diff < INT64_MAX) {
>>> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
>>> -                    struct timespec *ts = &cpu->hvf->ts;
>>> -
>>> -                    *ts = (struct timespec){
>>> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
>>> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
>>> -                    };
>>> -
>>> -                    /*
>>> -                     * Waking up easily takes 1ms, don't go to sleep for smaller
>>> -                     * time periods than 2ms.
>>> -                     */
>>> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
>>> -                        advance_pc = true;
>>> -                        break;
>>> -                    }
>>> -
>>> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
>>> -                    cpu->hvf->sleeping = true;
>>> -                    smp_mb();
>>> -
>>> -                    /* Bail out if we received an IRQ meanwhile */
>>> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
>>> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
>>> -                        cpu->hvf->sleeping = false;
>>> -                        break;
>>> -                    }
>>> -
>>> -                    /* nanosleep returns on signal, so we wake up on kick. */
>>> -                    nanosleep(ts, NULL);
>>> -
>>> -                    /* Out of sleep - either naturally or because of a kick */
>>> -                    cpu->hvf->sleeping = false;
>>> +                uint64_t cval;
>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
>>> +                                        &cval);
>>> +                assert_hvf_ok(r);
>>> +
>>> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
>>
>> I think you touched based on it in a previous thread, but would you mind
>> to explain again why mach_absolute_time() is the right thing to check
>> cval against? If I read the headers correctly, the cnvt_off register
>> should be 0, so cntvct should be the reference time, no?
> In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
> same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
> at the guest we see that CNTPCT_EL0 corresponds to
> mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
> at XNU kernel sources you will see that mach_absolute_time() reads
> CNTPCT_EL0 and adds a constant corresponding to the amount of time
> that the machine spends asleep) so I think that what's going on at the
> hypervisor level is that guest CNTPOFF_EL2 is being set to the same
> constant to make it correspond to mach_absolute_time().


Yes, I can absolutely see how it's different from CNTPCT, but it should 
be identical to CNTVCT_EL0 inside QEMU, no?


Alex

next prev parent reply	other threads:[~2020-12-02  1:41 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-01 21:00 [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Peter Collingbourne via
2020-12-01 21:00 ` [PATCH v2 2/2] arm/hvf: Stop setting current_cpu Peter Collingbourne via
2020-12-01 22:11   ` Alexander Graf
2020-12-02  0:13     ` Peter Collingbourne
2020-12-01 23:24 ` [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Alexander Graf
2020-12-02  1:32   ` Peter Collingbourne
2020-12-02  1:39     ` Alexander Graf [this message]
2020-12-02  1:52       ` Peter Collingbourne
2020-12-02  1:54         ` Alexander Graf
2020-12-02  1:57           ` Peter Collingbourne

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b41352d9-26db-a232-957a-9c63fcc2db18@csgraf.de \
    --to=agraf@csgraf.de \
    --cc=cfontana@suse.de \
    --cc=dirty@apple.com \
    --cc=ehabkost@redhat.com \
    --cc=lfy@google.com \
    --cc=pbonzini@redhat.com \
    --cc=pcc@google.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=r.bolshakov@yadro.com \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).