qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: "Lluís Vilanova" <vilanova@ac.upc.edu>
Cc: qemu-devel@nongnu.org, Eric Blake <eblake@redhat.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Peter Crosthwaite <crosthwaite.peter@gmail.com>,
	Richard Henderson <rth@twiddle.net>
Subject: Re: [Qemu-devel] [PATCH v6 3/7] trace: [tcg] Delay changes to dynamic state when translating
Date: Mon, 9 Jan 2017 17:01:56 +0000	[thread overview]
Message-ID: <20170109170156.GL30228@stefanha-x1.localdomain> (raw)
In-Reply-To: <148295047061.19871.11792107348459066542.stgit@fimbulvetr.bsc.es>

[-- Attachment #1: Type: text/plain, Size: 7601 bytes --]

On Wed, Dec 28, 2016 at 07:41:10PM +0100, Lluís Vilanova wrote:
> This keeps consistency across all decisions taken during translation
> when the dynamic state of a vCPU is changed in the middle of translating
> some guest code.
> 
> Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
> ---
>  cpu-exec.c             |   26 ++++++++++++++++++++++++++
>  include/qom/cpu.h      |    7 +++++++
>  qom/cpu.c              |    4 ++++
>  trace/control-target.c |   11 +++++++++--
>  4 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 4188fed3c6..1b7366efb0 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -33,6 +33,7 @@
>  #include "hw/i386/apic.h"
>  #endif
>  #include "sysemu/replay.h"
> +#include "trace/control.h"
>  
>  /* -icount align implementation. */
>  
> @@ -451,9 +452,21 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
>  #ifndef CONFIG_USER_ONLY
>      } else if (replay_has_exception()
>                 && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
> +        /* delay changes to this vCPU's dstate during translation */
> +        atomic_set(&cpu->trace_dstate_delayed_req, false);
> +        atomic_set(&cpu->trace_dstate_must_delay, true);
> +
>          /* try to cause an exception pending in the log */
>          cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true);
>          *ret = -1;
> +
> +        /* apply and disable delayed dstate changes */
> +        atomic_set(&cpu->trace_dstate_must_delay, false);
> +        if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) {
> +            bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
> +                        trace_get_vcpu_event_count());
> +        }
> +
>          return true;
>  #endif
>      }
> @@ -634,8 +647,21 @@ int cpu_exec(CPUState *cpu)
>  
>              for(;;) {
>                  cpu_handle_interrupt(cpu, &last_tb);
> +
> +                /* delay changes to this vCPU's dstate during translation */
> +                atomic_set(&cpu->trace_dstate_delayed_req, false);
> +                atomic_set(&cpu->trace_dstate_must_delay, true);
> +
>                  tb = tb_find(cpu, last_tb, tb_exit);
>                  cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
> +
> +                /* apply and disable delayed dstate changes */
> +                atomic_set(&cpu->trace_dstate_must_delay, false);
> +                if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) {
> +                    bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
> +                                trace_get_vcpu_event_count());
> +                }
> +
>                  /* Try to align the host and virtual clocks
>                     if the guest is in advance */
>                  align_clocks(&sc, cpu);
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 3f79a8e955..58255d06fa 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -295,6 +295,10 @@ struct qemu_work_item;
>   * @kvm_fd: vCPU file descriptor for KVM.
>   * @work_mutex: Lock to prevent multiple access to queued_work_*.
>   * @queued_work_first: First asynchronous work pending.
> + * @trace_dstate_must_delay: Whether a change to trace_dstate must be delayed.
> + * @trace_dstate_delayed_req: Whether a change to trace_dstate was delayed.
> + * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
> + *                        to @trace_dstate).
>   * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
>   *
>   * State of one CPU core or thread.
> @@ -370,6 +374,9 @@ struct CPUState {
>       * Dynamically allocated based on bitmap requried to hold up to
>       * trace_get_vcpu_event_count() entries.
>       */
> +    bool trace_dstate_must_delay;
> +    bool trace_dstate_delayed_req;
> +    unsigned long *trace_dstate_delayed;
>      unsigned long *trace_dstate;
>  
>      /* TODO Move common fields from CPUArchState here. */
> diff --git a/qom/cpu.c b/qom/cpu.c
> index 03d9190f8c..d56496d28d 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -367,6 +367,9 @@ static void cpu_common_initfn(Object *obj)
>      QTAILQ_INIT(&cpu->breakpoints);
>      QTAILQ_INIT(&cpu->watchpoints);
>  
> +    cpu->trace_dstate_must_delay = false;
> +    cpu->trace_dstate_delayed_req = false;
> +    cpu->trace_dstate_delayed = bitmap_new(trace_get_vcpu_event_count());
>      cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count());
>  
>      cpu_exec_initfn(cpu);
> @@ -375,6 +378,7 @@ static void cpu_common_initfn(Object *obj)
>  static void cpu_common_finalize(Object *obj)
>  {
>      CPUState *cpu = CPU(obj);
> +    g_free(cpu->trace_dstate_delayed);
>      g_free(cpu->trace_dstate);
>  }
>  
> diff --git a/trace/control-target.c b/trace/control-target.c
> index 7ebf6e0bcb..aba8db55de 100644
> --- a/trace/control-target.c
> +++ b/trace/control-target.c
> @@ -69,13 +69,20 @@ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
>      if (state_pre != state) {
>          if (state) {
>              trace_events_enabled_count++;
> -            set_bit(vcpu_id, vcpu->trace_dstate);
> +            set_bit(vcpu_id, vcpu->trace_dstate_delayed);
> +            if (!atomic_read(&vcpu->trace_dstate_must_delay)) {
> +                set_bit(vcpu_id, vcpu->trace_dstate);
> +            }
>              (*ev->dstate)++;
>          } else {
>              trace_events_enabled_count--;
> -            clear_bit(vcpu_id, vcpu->trace_dstate);
> +            clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
> +            if (!atomic_read(&vcpu->trace_dstate_must_delay)) {
> +                clear_bit(vcpu_id, vcpu->trace_dstate);
> +            }
>              (*ev->dstate)--;
>          }
> +        atomic_set(&vcpu->trace_dstate_delayed_req, true);
>      }
>  }

This lock-free scheme looks broken to me.  Consider the following case
with threads A and B:

A: atomic_set(&cpu->trace_dstate_delayed_req, false);
A: atomic_set(&cpu->trace_dstate_must_delay, true);
B: if (!atomic_read(&vcpu->trace_dstate_must_delay)) { /* false */
A: atomic_set(&cpu->trace_dstate_must_delay, false);
A: if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) { /* false */
B: atomic_set(&vcpu->trace_dstate_delayed_req, true);

Oops, we missed the delayed update.

Now when A runs the next iteration we forget there was a delayed req:

A: atomic_set(&cpu->trace_dstate_delayed_req, false);

As a result even the next iteration may not copy the delayed bitmap.

Perhaps you should use RCU.

Or use a simpler scheme:

struct CPUState {
    ...
    uint32_t dstate_update_count;
};

In trace_event_set_vcpu_state_dynamic():

    if (state) {
        trace_events_enabled_count++;
        set_bit(vcpu_id, vcpu->trace_dstate_delayed);
	atomic_inc(&vcpu->dstate_update_count, 1);
        (*ev->dstate)++;
    } ...

In cpu_exec() and friends:

    last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);

    tb = tb_find(cpu, last_tb, tb_exit);
    cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);

    /* apply and disable delayed dstate changes */
    if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
        bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
        trace_get_vcpu_event_count());
    }

(You'll need to adjust the details but the update counter approach
should be workable.)

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

  reply	other threads:[~2017-01-10 14:43 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-28 18:40 [Qemu-devel] [PATCH v6 0/7] trace: [tcg] Optimize per-vCPU tracing states with separate TB caches Lluís Vilanova
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 1/7] exec: [tcg] Refactor flush of per-CPU virtual TB cache Lluís Vilanova
2017-01-10 20:07   ` Richard Henderson
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 2/7] trace: Make trace_get_vcpu_event_count() inlinable Lluís Vilanova
2017-01-10 20:08   ` Richard Henderson
2017-01-12 18:14     ` Lluís Vilanova
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 3/7] trace: [tcg] Delay changes to dynamic state when translating Lluís Vilanova
2017-01-09 17:01   ` Stefan Hajnoczi [this message]
2017-01-10 16:31     ` Paolo Bonzini
2017-01-11 16:16       ` Stefan Hajnoczi
2017-01-12 19:37         ` Lluís Vilanova
2017-01-12 21:25           ` Paolo Bonzini
2017-01-13 20:08             ` Lluís Vilanova
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 4/7] exec: [tcg] Use different TBs according to the vCPU's dynamic tracing state Lluís Vilanova
2017-01-10 20:10   ` Richard Henderson
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 5/7] trace: [tcg] Do not generate TCG code to trace dinamically-disabled events Lluís Vilanova
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 6/7] trace: [tcg, trivial] Re-align generated code Lluís Vilanova
2017-01-12 11:19   ` Michael Tokarev
2017-01-12 18:46     ` Lluís Vilanova
2016-12-28 18:41 ` [Qemu-devel] [PATCH v6 7/7] trace: [trivial] Statically enable all guest events Lluís Vilanova
2017-01-09 17:04 ` [Qemu-devel] [PATCH v6 0/7] trace: [tcg] Optimize per-vCPU tracing states with separate TB caches Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170109170156.GL30228@stefanha-x1.localdomain \
    --to=stefanha@redhat.com \
    --cc=crosthwaite.peter@gmail.com \
    --cc=eblake@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=vilanova@ac.upc.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).