[Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part.
@ 2015-07-10 16:08 fred.konrad
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 1/3] cpus: protect queued_work_* with work_mutex fred.konrad
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: fred.konrad @ 2015-07-10 16:08 UTC (permalink / raw)
  To: qemu-devel, mttcg
  Cc: mark.burton, a.rigo, guillaume.delbergue, pbonzini, alex.bennee,
	fred.konrad

From: KONRAD Frederic <fred.konrad@greensocs.com>

This is the async_safe_work introduction bit of the Multithread TCG work.
Rebased on current upstream (6169b60285fe1ff730d840a49527e721bfb30899).

It can be cloned here:
http://git.greensocs.com/fkonrad/mttcg.git branch async_work_v2

The first patch introduces a mutex to protect the existing queued_work_*
CPUState members against multiple (concurent) access.

The second patch introduces a tcg_executing_flag which will be 1 when we are
inside cpu_exec(). This is required as safe work need to be sure that's all vCPU
are outside cpu_exec().

The last patch introduces async_safe_work. It allows to add some work which will
be done asynchronously but only when all vCPUs are outside cpu_exec(). The tcg
thread will wait that no vCPUs have any pending safe work before reentering
cpu-exec().

Changes V1 -> V2:
  * Release the lock while running the callback for both async and safe work.

KONRAD Frederic (3):
  cpus: protect queued_work_* with work_mutex.
  cpus: add a tcg_executing flag.
  cpus: introduce async_run_safe_work_on_cpu.

 cpu-exec.c        |   7 +++
 cpus.c            | 164 +++++++++++++++++++++++++++++++++++++++++-------------
 include/qom/cpu.h |  28 ++++++++++
 qom/cpu.c         |   2 +
 4 files changed, 161 insertions(+), 40 deletions(-)

-- 
1.9.0

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC PATCH V2 1/3] cpus: protect queued_work_* with work_mutex.
  2015-07-10 16:08 [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part fred.konrad
@ 2015-07-10 16:08 ` fred.konrad
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag fred.konrad
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: fred.konrad @ 2015-07-10 16:08 UTC (permalink / raw)
  To: qemu-devel, mttcg
  Cc: mark.burton, a.rigo, guillaume.delbergue, pbonzini, alex.bennee,
	fred.konrad

From: KONRAD Frederic <fred.konrad@greensocs.com>

This protects queued_work_* used by async_run_on_cpu, run_on_cpu and
flush_queued_work with a new lock (work_mutex) to prevent multiple (concurrent)
access.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>

Changes V1 -> V2:
  * Unlock the mutex while running the callback.
---
 cpus.c            | 11 +++++++++++
 include/qom/cpu.h |  3 +++
 qom/cpu.c         |  1 +
 3 files changed, 15 insertions(+)

diff --git a/cpus.c b/cpus.c
index b00a423..eabd4b1 100644
--- a/cpus.c
+++ b/cpus.c
@@ -845,6 +845,8 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
     wi.func = func;
     wi.data = data;
     wi.free = false;
+
+    qemu_mutex_lock(&cpu->work_mutex);
     if (cpu->queued_work_first == NULL) {
         cpu->queued_work_first = &wi;
     } else {
@@ -853,6 +855,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
     cpu->queued_work_last = &wi;
     wi.next = NULL;
     wi.done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
 
     qemu_cpu_kick(cpu);
     while (!wi.done) {
@@ -876,6 +879,8 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
     wi->func = func;
     wi->data = data;
     wi->free = true;
+
+    qemu_mutex_lock(&cpu->work_mutex);
     if (cpu->queued_work_first == NULL) {
         cpu->queued_work_first = wi;
     } else {
@@ -884,6 +889,7 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
     cpu->queued_work_last = wi;
     wi->next = NULL;
     wi->done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
 
     qemu_cpu_kick(cpu);
 }
@@ -896,15 +902,20 @@ static void flush_queued_work(CPUState *cpu)
         return;
     }
 
+    qemu_mutex_lock(&cpu->work_mutex);
     while ((wi = cpu->queued_work_first)) {
         cpu->queued_work_first = wi->next;
+        qemu_mutex_unlock(&cpu->work_mutex);
         wi->func(wi->data);
+        qemu_mutex_lock(&cpu->work_mutex);
         wi->done = true;
         if (wi->free) {
             g_free(wi);
         }
     }
     cpu->queued_work_last = NULL;
+    qemu_mutex_unlock(&cpu->work_mutex);
+
     qemu_cond_broadcast(&qemu_work_cond);
 }
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 20aabc9..efa9624 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -242,6 +242,8 @@ struct kvm_run;
  * @mem_io_pc: Host Program Counter at which the memory was accessed.
  * @mem_io_vaddr: Target virtual address at which the memory was accessed.
  * @kvm_fd: vCPU file descriptor for KVM.
+ * @work_mutex: Lock to prevent multiple access to queued_work_*.
+ * @queued_work_first: First asynchronous work pending.
  *
  * State of one CPU core or thread.
  */
@@ -262,6 +264,7 @@ struct CPUState {
     uint32_t host_tid;
     bool running;
     struct QemuCond *halt_cond;
+    QemuMutex work_mutex;
     struct qemu_work_item *queued_work_first, *queued_work_last;
     bool thread_kicked;
     bool created;
diff --git a/qom/cpu.c b/qom/cpu.c
index eb9cfec..4e12598 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -316,6 +316,7 @@ static void cpu_common_initfn(Object *obj)
     cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
+    qemu_mutex_init(&cpu->work_mutex);
 }
 
 static void cpu_common_finalize(Object *obj)
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag.
  2015-07-10 16:08 [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part fred.konrad
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 1/3] cpus: protect queued_work_* with work_mutex fred.konrad
@ 2015-07-10 16:08 ` fred.konrad
  2015-07-13 15:56   ` Alex Bennée
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu fred.konrad
  2015-07-13 11:53 ` [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part Paolo Bonzini
  3 siblings, 1 reply; 14+ messages in thread
From: fred.konrad @ 2015-07-10 16:08 UTC (permalink / raw)
  To: qemu-devel, mttcg
  Cc: mark.burton, a.rigo, guillaume.delbergue, pbonzini, alex.bennee,
	fred.konrad

From: KONRAD Frederic <fred.konrad@greensocs.com>

This flag indicates if the VCPU is currently executing TCG code.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>

Changes V1 -> V2:
  * do both tcg_executing = 0 or 1 in cpu_exec().
---
 cpu-exec.c        | 2 ++
 include/qom/cpu.h | 3 +++
 qom/cpu.c         | 1 +
 3 files changed, 6 insertions(+)

diff --git a/cpu-exec.c b/cpu-exec.c
index 75694f3..2fdf89d 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -371,6 +371,7 @@ int cpu_exec(CPUState *cpu)
         cpu->halted = 0;
     }
 
+    cpu->tcg_executing = 1;
     current_cpu = cpu;
 
     /* As long as current_cpu is null, up to the assignment just above,
@@ -583,5 +584,6 @@ int cpu_exec(CPUState *cpu)
 
     /* fail safe : never use current_cpu outside cpu_exec() */
     current_cpu = NULL;
+    cpu->tcg_executing = 0;
     return ret;
 }
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index efa9624..a2de536 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -226,6 +226,7 @@ struct kvm_run;
  * @stopped: Indicates the CPU has been artificially stopped.
  * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
  *           CPU and return to its top level loop.
+ * @tcg_executing: This TCG thread is in cpu_exec().
  * @singlestep_enabled: Flags for single-stepping.
  * @icount_extra: Instructions until next timer event.
  * @icount_decr: Number of cycles left, with interrupt flag in high bit.
@@ -322,6 +323,8 @@ struct CPUState {
        (absolute value) offset as small as possible.  This reduces code
        size, especially for hosts without large memory offsets.  */
     volatile sig_atomic_t tcg_exit_req;
+
+    volatile int tcg_executing;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/qom/cpu.c b/qom/cpu.c
index 4e12598..62663e5 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -249,6 +249,7 @@ static void cpu_common_reset(CPUState *cpu)
     cpu->icount_decr.u32 = 0;
     cpu->can_do_io = 0;
     cpu->exception_index = -1;
+    cpu->tcg_executing = 0;
     memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
 }
 
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu.
  2015-07-10 16:08 [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part fred.konrad
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 1/3] cpus: protect queued_work_* with work_mutex fred.konrad
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag fred.konrad
@ 2015-07-10 16:08 ` fred.konrad
  2015-07-13 16:20   ` Alex Bennée
  2015-07-13 11:53 ` [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part Paolo Bonzini
  3 siblings, 1 reply; 14+ messages in thread
From: fred.konrad @ 2015-07-10 16:08 UTC (permalink / raw)
  To: qemu-devel, mttcg
  Cc: mark.burton, a.rigo, guillaume.delbergue, pbonzini, alex.bennee,
	fred.konrad

From: KONRAD Frederic <fred.konrad@greensocs.com>

We already had async_run_on_cpu but we need all VCPUs outside their execution
loop to execute some tb_flush/invalidate task:

async_run_on_cpu_safe schedule a work on a VCPU but the work start when no more
VCPUs are executing code.
When a safe work is pending cpu_has_work returns true, so cpu_exec returns and
the VCPUs can't enters execution loop. cpu_thread_is_idle returns false so at
the moment where all VCPUs are stop || stopped the safe work queue can be
flushed.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>

Changes V2 -> V3:
  * Unlock the mutex while executing the callback.
Changes V1 -> V2:
  * Move qemu_cpu_kick_thread to avoid prototype declaration.
  * Use the work_mutex lock to protect the queued_safe_work_* structures.
---
 cpu-exec.c        |   5 ++
 cpus.c            | 153 ++++++++++++++++++++++++++++++++++++++++--------------
 include/qom/cpu.h |  24 ++++++++-
 3 files changed, 141 insertions(+), 41 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 2fdf89d..7581f76 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -363,6 +363,11 @@ int cpu_exec(CPUState *cpu)
     /* This must be volatile so it is not trashed by longjmp() */
     volatile bool have_tb_lock = false;
 
+    if (async_safe_work_pending()) {
+        cpu->exit_request = 1;
+        return 0;
+    }
+
     if (cpu->halted) {
         if (!cpu_has_work(cpu)) {
             return EXCP_HALTED;
diff --git a/cpus.c b/cpus.c
index eabd4b1..4321380 100644
--- a/cpus.c
+++ b/cpus.c
@@ -76,7 +76,7 @@ bool cpu_is_stopped(CPUState *cpu)
 
 static bool cpu_thread_is_idle(CPUState *cpu)
 {
-    if (cpu->stop || cpu->queued_work_first) {
+    if (cpu->stop || cpu->queued_work_first || cpu->queued_safe_work_first) {
         return false;
     }
     if (cpu_is_stopped(cpu)) {
@@ -833,6 +833,45 @@ void qemu_init_cpu_loop(void)
     qemu_thread_get_self(&io_thread);
 }
 
+static void qemu_cpu_kick_thread(CPUState *cpu)
+{
+#ifndef _WIN32
+    int err;
+
+    err = pthread_kill(cpu->thread->thread, SIG_IPI);
+    if (err) {
+        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+        exit(1);
+    }
+#else /* _WIN32 */
+    if (!qemu_cpu_is_self(cpu)) {
+        CONTEXT tcgContext;
+
+        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
+            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
+                    GetLastError());
+            exit(1);
+        }
+
+        /* On multi-core systems, we are not sure that the thread is actually
+         * suspended until we can get the context.
+         */
+        tcgContext.ContextFlags = CONTEXT_CONTROL;
+        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
+            continue;
+        }
+
+        cpu_signal(0);
+
+        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
+            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
+                    GetLastError());
+            exit(1);
+        }
+    }
+#endif
+}
+
 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 {
     struct qemu_work_item wi;
@@ -894,6 +933,76 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
     qemu_cpu_kick(cpu);
 }
 
+void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
+                                void *data)
+{
+    struct qemu_work_item *wi;
+
+    wi = g_malloc0(sizeof(struct qemu_work_item));
+    wi->func = func;
+    wi->data = data;
+    wi->free = true;
+
+    qemu_mutex_lock(&cpu->work_mutex);
+    if (cpu->queued_safe_work_first == NULL) {
+        cpu->queued_safe_work_first = wi;
+    } else {
+        cpu->queued_safe_work_last->next = wi;
+    }
+    cpu->queued_safe_work_last = wi;
+    wi->next = NULL;
+    wi->done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
+
+    CPU_FOREACH(cpu) {
+        qemu_cpu_kick_thread(cpu);
+    }
+}
+
+static void flush_queued_safe_work(CPUState *cpu)
+{
+    struct qemu_work_item *wi;
+    CPUState *other_cpu;
+
+    if (cpu->queued_safe_work_first == NULL) {
+        return;
+    }
+
+    CPU_FOREACH(other_cpu) {
+        if (other_cpu->tcg_executing != 0) {
+            return;
+        }
+    }
+
+    qemu_mutex_lock(&cpu->work_mutex);
+    while ((wi = cpu->queued_safe_work_first)) {
+        cpu->queued_safe_work_first = wi->next;
+        qemu_mutex_unlock(&cpu->work_mutex);
+        wi->func(wi->data);
+        qemu_mutex_lock(&cpu->work_mutex);
+        wi->done = true;
+        if (wi->free) {
+            g_free(wi);
+        }
+    }
+    cpu->queued_safe_work_last = NULL;
+    qemu_mutex_unlock(&cpu->work_mutex);
+    qemu_cond_broadcast(&qemu_work_cond);
+}
+
+/* FIXME: add a counter to avoid this CPU_FOREACH() */
+bool async_safe_work_pending(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->queued_safe_work_first) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static void flush_queued_work(CPUState *cpu)
 {
     struct qemu_work_item *wi;
@@ -926,6 +1035,9 @@ static void qemu_wait_io_event_common(CPUState *cpu)
         cpu->stopped = true;
         qemu_cond_signal(&qemu_pause_cond);
     }
+    qemu_mutex_unlock_iothread();
+    flush_queued_safe_work(cpu);
+    qemu_mutex_lock_iothread();
     flush_queued_work(cpu);
     cpu->thread_kicked = false;
 }
@@ -1085,45 +1197,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
     return NULL;
 }
 
-static void qemu_cpu_kick_thread(CPUState *cpu)
-{
-#ifndef _WIN32
-    int err;
-
-    err = pthread_kill(cpu->thread->thread, SIG_IPI);
-    if (err) {
-        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
-        exit(1);
-    }
-#else /* _WIN32 */
-    if (!qemu_cpu_is_self(cpu)) {
-        CONTEXT tcgContext;
-
-        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
-            exit(1);
-        }
-
-        /* On multi-core systems, we are not sure that the thread is actually
-         * suspended until we can get the context.
-         */
-        tcgContext.ContextFlags = CONTEXT_CONTROL;
-        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
-            continue;
-        }
-
-        cpu_signal(0);
-
-        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
-            exit(1);
-        }
-    }
-#endif
-}
-
 void qemu_cpu_kick(CPUState *cpu)
 {
     qemu_cond_broadcast(cpu->halt_cond);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index a2de536..692d0d3 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -243,8 +243,9 @@ struct kvm_run;
  * @mem_io_pc: Host Program Counter at which the memory was accessed.
  * @mem_io_vaddr: Target virtual address at which the memory was accessed.
  * @kvm_fd: vCPU file descriptor for KVM.
- * @work_mutex: Lock to prevent multiple access to queued_work_*.
+ * @work_mutex: Lock to prevent multiple access to queued_* qemu_work_item.
  * @queued_work_first: First asynchronous work pending.
+ * @queued_safe_work_first: First item of safe work pending.
  *
  * State of one CPU core or thread.
  */
@@ -267,6 +268,7 @@ struct CPUState {
     struct QemuCond *halt_cond;
     QemuMutex work_mutex;
     struct qemu_work_item *queued_work_first, *queued_work_last;
+    struct qemu_work_item *queued_safe_work_first, *queued_safe_work_last;
     bool thread_kicked;
     bool created;
     bool stop;
@@ -546,6 +548,26 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
 
 /**
+ * async_run_safe_work_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu asynchronously
+ * when all the VCPUs are outside their loop.
+ */
+void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
+                                void *data);
+
+/**
+ * async_safe_work_pending:
+ *
+ * Check whether any safe work is pending on any VCPUs.
+ * Returns: @true if a safe work is pending, @false otherwise.
+ */
+bool async_safe_work_pending(void);
+
+/**
  * qemu_get_cpu:
  * @index: The CPUState@cpu_index value of the CPU to obtain.
  *
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part.
  2015-07-10 16:08 [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part fred.konrad
                   ` (2 preceding siblings ...)
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu fred.konrad
@ 2015-07-13 11:53 ` Paolo Bonzini
  2015-07-13 15:50   ` Alex Bennée
  2015-07-15  7:44   ` Frederic Konrad
  3 siblings, 2 replies; 14+ messages in thread
From: Paolo Bonzini @ 2015-07-13 11:53 UTC (permalink / raw)
  To: fred.konrad, qemu-devel, mttcg
  Cc: mark.burton, alex.bennee, a.rigo, guillaume.delbergue



On 10/07/2015 18:08, fred.konrad@greensocs.com wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
> 
> This is the async_safe_work introduction bit of the Multithread TCG work.
> Rebased on current upstream (6169b60285fe1ff730d840a49527e721bfb30899).
> 
> It can be cloned here:
> http://git.greensocs.com/fkonrad/mttcg.git branch async_work_v2
> 
> The first patch introduces a mutex to protect the existing queued_work_*
> CPUState members against multiple (concurent) access.
> 
> The second patch introduces a tcg_executing_flag which will be 1 when we are
> inside cpu_exec(). This is required as safe work need to be sure that's all vCPU
> are outside cpu_exec().
> 
> The last patch introduces async_safe_work. It allows to add some work which will
> be done asynchronously but only when all vCPUs are outside cpu_exec(). The tcg
> thread will wait that no vCPUs have any pending safe work before reentering
> cpu-exec().
> 
> Changes V1 -> V2:
>   * Release the lock while running the callback for both async and safe work.

What tree do these patches apply to?

Paolo

> KONRAD Frederic (3):
>   cpus: protect queued_work_* with work_mutex.
>   cpus: add a tcg_executing flag.
>   cpus: introduce async_run_safe_work_on_cpu.
> 
>  cpu-exec.c        |   7 +++
>  cpus.c            | 164 +++++++++++++++++++++++++++++++++++++++++-------------
>  include/qom/cpu.h |  28 ++++++++++
>  qom/cpu.c         |   2 +
>  4 files changed, 161 insertions(+), 40 deletions(-)
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part.
  2015-07-13 11:53 ` [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part Paolo Bonzini
@ 2015-07-13 15:50   ` Alex Bennée
  2015-07-15  7:44   ` Frederic Konrad
  1 sibling, 0 replies; 14+ messages in thread
From: Alex Bennée @ 2015-07-13 15:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: mttcg, mark.burton, a.rigo, qemu-devel, guillaume.delbergue,
	fred.konrad


Paolo Bonzini <pbonzini@redhat.com> writes:

> On 10/07/2015 18:08, fred.konrad@greensocs.com wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>> 
>> This is the async_safe_work introduction bit of the Multithread TCG work.
>> Rebased on current upstream (6169b60285fe1ff730d840a49527e721bfb30899).
>> 
>> It can be cloned here:
>> http://git.greensocs.com/fkonrad/mttcg.git branch async_work_v2
>> 
>> The first patch introduces a mutex to protect the existing queued_work_*
>> CPUState members against multiple (concurent) access.
>> 
>> The second patch introduces a tcg_executing_flag which will be 1 when we are
>> inside cpu_exec(). This is required as safe work need to be sure that's all vCPU
>> are outside cpu_exec().
>> 
>> The last patch introduces async_safe_work. It allows to add some work which will
>> be done asynchronously but only when all vCPUs are outside cpu_exec(). The tcg
>> thread will wait that no vCPUs have any pending safe work before reentering
>> cpu-exec().
>> 
>> Changes V1 -> V2:
>>   * Release the lock while running the callback for both async and safe work.
>
> What tree do these patches apply to?

See Fred's tree:

http://git.greensocs.com/fkonrad/mttcg

>
> Paolo
>
>> KONRAD Frederic (3):
>>   cpus: protect queued_work_* with work_mutex.
>>   cpus: add a tcg_executing flag.
>>   cpus: introduce async_run_safe_work_on_cpu.
>> 
>>  cpu-exec.c        |   7 +++
>>  cpus.c            | 164 +++++++++++++++++++++++++++++++++++++++++-------------
>>  include/qom/cpu.h |  28 ++++++++++
>>  qom/cpu.c         |   2 +
>>  4 files changed, 161 insertions(+), 40 deletions(-)
>> 

-- 
Alex Bennée

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag.
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag fred.konrad
@ 2015-07-13 15:56   ` Alex Bennée
  2015-07-13 16:10     ` Peter Maydell
  2015-07-15  8:40     ` Frederic Konrad
  0 siblings, 2 replies; 14+ messages in thread
From: Alex Bennée @ 2015-07-13 15:56 UTC (permalink / raw)
  To: fred.konrad
  Cc: mttcg, mark.burton, qemu-devel, a.rigo, guillaume.delbergue,
	pbonzini


fred.konrad@greensocs.com writes:

> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> This flag indicates if the VCPU is currently executing TCG code.
>
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>
> Changes V1 -> V2:
>   * do both tcg_executing = 0 or 1 in cpu_exec().
> ---
>  cpu-exec.c        | 2 ++
>  include/qom/cpu.h | 3 +++
>  qom/cpu.c         | 1 +
>  3 files changed, 6 insertions(+)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 75694f3..2fdf89d 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -371,6 +371,7 @@ int cpu_exec(CPUState *cpu)
>          cpu->halted = 0;
>      }
>  
> +    cpu->tcg_executing = 1;
>      current_cpu = cpu;
>  
>      /* As long as current_cpu is null, up to the assignment just above,
> @@ -583,5 +584,6 @@ int cpu_exec(CPUState *cpu)
>  
>      /* fail safe : never use current_cpu outside cpu_exec() */
>      current_cpu = NULL;
> +    cpu->tcg_executing = 0;
>      return ret;
>  }
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index efa9624..a2de536 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -226,6 +226,7 @@ struct kvm_run;
>   * @stopped: Indicates the CPU has been artificially stopped.
>   * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
>   *           CPU and return to its top level loop.
> + * @tcg_executing: This TCG thread is in cpu_exec().
>   * @singlestep_enabled: Flags for single-stepping.
>   * @icount_extra: Instructions until next timer event.
>   * @icount_decr: Number of cycles left, with interrupt flag in high bit.
> @@ -322,6 +323,8 @@ struct CPUState {
>         (absolute value) offset as small as possible.  This reduces code
>         size, especially for hosts without large memory offsets.  */
>      volatile sig_atomic_t tcg_exit_req;
> +
> +    volatile int tcg_executing;

My concern is on weakly ordered backends is volatile enough for this
flag or do we need some sort of memory barrier when we update it? Does
it just introduce an inefficiency that other threads may spin a few
times waiting to find out the vCPU has halted?

If other threads are waiting for it to halt is there a mechanism that
ensures we'll never start-up again until everything is done?


>  };
>  
>  QTAILQ_HEAD(CPUTailQ, CPUState);
> diff --git a/qom/cpu.c b/qom/cpu.c
> index 4e12598..62663e5 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -249,6 +249,7 @@ static void cpu_common_reset(CPUState *cpu)
>      cpu->icount_decr.u32 = 0;
>      cpu->can_do_io = 0;
>      cpu->exception_index = -1;
> +    cpu->tcg_executing = 0;
>      memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
>  }

-- 
Alex Bennée

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag.
  2015-07-13 15:56   ` Alex Bennée
@ 2015-07-13 16:10     ` Peter Maydell
  2015-07-13 16:15       ` Paolo Bonzini
  2015-07-15  8:40     ` Frederic Konrad
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Maydell @ 2015-07-13 16:10 UTC (permalink / raw)
  To: Alex Bennée
  Cc: mttcg, Mark Burton, Alvise Rigo, QEMU Developers,
	Guillaume Delbergue, Paolo Bonzini, KONRAD Frédéric

On 13 July 2015 at 16:56, Alex Bennée <alex.bennee@linaro.org> wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>> @@ -322,6 +323,8 @@ struct CPUState {
>>         (absolute value) offset as small as possible.  This reduces code
>>         size, especially for hosts without large memory offsets.  */
>>      volatile sig_atomic_t tcg_exit_req;
>> +
>> +    volatile int tcg_executing;
>
> My concern is on weakly ordered backends is volatile enough for this
> flag or do we need some sort of memory barrier when we update it? Does
> it just introduce an inefficiency that other threads may spin a few
> times waiting to find out the vCPU has halted?

Plus "volatile int" is a bit of an "is this really right?" red flag...
Currently in QEMU we use 'volatile' for:
 (1) "volatile sig_atomic_t", which is dealing with variables
     accessed from signal handlers
 (2) marking local variables which mustn't be trashed by longjmp()
 (3) some things in tests/ code which I'm ignoring because they're
     only test code
 (4) a few other things which are suspicious at best:
  hw/intc/apic_common.c:    volatile int a_i_d = apic_irq_delivered;
  hw/xen/xen_pt_msi.c:        const volatile uint32_t *vec_ctrl;
  trace/simple.c:static volatile gint trace_idx;
  trace/simple.c:static volatile gint dropped_events;

and I would be very dubious about adding more direct uses of volatile.
You almost certainly want something from atomic.h instead.

thanks
-- PMM

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag.
  2015-07-13 16:10     ` Peter Maydell
@ 2015-07-13 16:15       ` Paolo Bonzini
  2015-07-13 16:36         ` Alex Bennée
  0 siblings, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2015-07-13 16:15 UTC (permalink / raw)
  To: Peter Maydell, Alex Bennée
  Cc: mttcg, Mark Burton, Alvise Rigo, QEMU Developers,
	Guillaume Delbergue, KONRAD Frédéric



On 13/07/2015 18:10, Peter Maydell wrote:
>  (4) a few other things which are suspicious at best:
>   hw/intc/apic_common.c:    volatile int a_i_d = apic_irq_delivered;

This one has a comment above:

    /* Copy this into a local variable to encourage gcc to emit a plain
     * register for a sys/sdt.h marker.  For details on this workaround, see:
     * https://sourceware.org/bugzilla/show_bug.cgi?id=13296
     */

>   hw/xen/xen_pt_msi.c:        const volatile uint32_t *vec_ctrl;

Seems to be MMIO (yes, really), so okay.

Paolo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu.
  2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu fred.konrad
@ 2015-07-13 16:20   ` Alex Bennée
  2015-07-13 22:56     ` Paolo Bonzini
  0 siblings, 1 reply; 14+ messages in thread
From: Alex Bennée @ 2015-07-13 16:20 UTC (permalink / raw)
  To: fred.konrad
  Cc: mttcg, mark.burton, qemu-devel, a.rigo, guillaume.delbergue,
	pbonzini


fred.konrad@greensocs.com writes:

> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> We already had async_run_on_cpu but we need all VCPUs outside their execution
> loop to execute some tb_flush/invalidate task:
>
> async_run_on_cpu_safe schedule a work on a VCPU but the work start when no more
> VCPUs are executing code.
> When a safe work is pending cpu_has_work returns true, so cpu_exec returns and
> the VCPUs can't enters execution loop. cpu_thread_is_idle returns false so at
> the moment where all VCPUs are stop || stopped the safe work queue can be
> flushed.
>
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>
> Changes V2 -> V3:
>   * Unlock the mutex while executing the callback.
> Changes V1 -> V2:
>   * Move qemu_cpu_kick_thread to avoid prototype declaration.
>   * Use the work_mutex lock to protect the queued_safe_work_* structures.
> ---
>  cpu-exec.c        |   5 ++
>  cpus.c            | 153 ++++++++++++++++++++++++++++++++++++++++--------------
>  include/qom/cpu.h |  24 ++++++++-
>  3 files changed, 141 insertions(+), 41 deletions(-)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 2fdf89d..7581f76 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -363,6 +363,11 @@ int cpu_exec(CPUState *cpu)
>      /* This must be volatile so it is not trashed by longjmp() */
>      volatile bool have_tb_lock = false;
>  
> +    if (async_safe_work_pending()) {
> +        cpu->exit_request = 1;
> +        return 0;
> +    }
> +
>      if (cpu->halted) {
>          if (!cpu_has_work(cpu)) {
>              return EXCP_HALTED;
> diff --git a/cpus.c b/cpus.c
> index eabd4b1..4321380 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -76,7 +76,7 @@ bool cpu_is_stopped(CPUState *cpu)
>  
>  static bool cpu_thread_is_idle(CPUState *cpu)
>  {
> -    if (cpu->stop || cpu->queued_work_first) {
> +    if (cpu->stop || cpu->queued_work_first || cpu->queued_safe_work_first) {
>          return false;
>      }
>      if (cpu_is_stopped(cpu)) {
> @@ -833,6 +833,45 @@ void qemu_init_cpu_loop(void)
>      qemu_thread_get_self(&io_thread);
>  }
>  
> +static void qemu_cpu_kick_thread(CPUState *cpu)
> +{
> +#ifndef _WIN32
> +    int err;
> +
> +    err = pthread_kill(cpu->thread->thread, SIG_IPI);
> +    if (err) {
> +        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
> +        exit(1);
> +    }
> +#else /* _WIN32 */
> +    if (!qemu_cpu_is_self(cpu)) {
> +        CONTEXT tcgContext;
> +
> +        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> +                    GetLastError());
> +            exit(1);
> +        }
> +
> +        /* On multi-core systems, we are not sure that the thread is actually
> +         * suspended until we can get the context.
> +         */
> +        tcgContext.ContextFlags = CONTEXT_CONTROL;
> +        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
> +            continue;
> +        }
> +
> +        cpu_signal(0);
> +
> +        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> +                    GetLastError());
> +            exit(1);
> +        }
> +    }
> +#endif

I'm going to go out on a limb and guess these sort of implementation
specifics should be in the posix/win32 utility files, that is unless
Glib abstracts enough of it for us.

> +}
> +
>  void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
>  {
>      struct qemu_work_item wi;
> @@ -894,6 +933,76 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
>      qemu_cpu_kick(cpu);
>  }
>  
> +void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
> +                                void *data)
> +{
> +    struct qemu_work_item *wi;
> +
> +    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    wi->func = func;
> +    wi->data = data;

Is there anything that prevents the user calling the function with the
same payload for multiple CPUs?

> +    wi->free = true;
> +
> +    qemu_mutex_lock(&cpu->work_mutex);
> +    if (cpu->queued_safe_work_first == NULL) {
> +        cpu->queued_safe_work_first = wi;
> +    } else {
> +        cpu->queued_safe_work_last->next = wi;
> +    }
> +    cpu->queued_safe_work_last = wi;

I'm surprised we haven't added some helpers to the qemu_work_queue API
for all this identical boilerplate but whatever...


> +    wi->next = NULL;
> +    wi->done = false;
> +    qemu_mutex_unlock(&cpu->work_mutex);
> +
> +    CPU_FOREACH(cpu) {
> +        qemu_cpu_kick_thread(cpu);
> +    }
> +}
> +
> +static void flush_queued_safe_work(CPUState *cpu)
> +{
> +    struct qemu_work_item *wi;
> +    CPUState *other_cpu;
> +
> +    if (cpu->queued_safe_work_first == NULL) {
> +        return;
> +    }
> +
> +    CPU_FOREACH(other_cpu) {
> +        if (other_cpu->tcg_executing != 0) {
> +            return;
> +        }
> +    }
> +
> +    qemu_mutex_lock(&cpu->work_mutex);
> +    while ((wi = cpu->queued_safe_work_first)) {
> +        cpu->queued_safe_work_first = wi->next;
> +        qemu_mutex_unlock(&cpu->work_mutex);
> +        wi->func(wi->data);
> +        qemu_mutex_lock(&cpu->work_mutex);
> +        wi->done = true;
> +        if (wi->free) {
> +            g_free(wi);
> +        }
> +    }
> +    cpu->queued_safe_work_last = NULL;
> +    qemu_mutex_unlock(&cpu->work_mutex);
> +    qemu_cond_broadcast(&qemu_work_cond);
> +}
> +
> +/* FIXME: add a counter to avoid this CPU_FOREACH() */
> +bool async_safe_work_pending(void)
> +{
> +    CPUState *cpu;
> +
> +    CPU_FOREACH(cpu) {
> +        if (cpu->queued_safe_work_first) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
>  static void flush_queued_work(CPUState *cpu)
>  {
>      struct qemu_work_item *wi;
> @@ -926,6 +1035,9 @@ static void qemu_wait_io_event_common(CPUState *cpu)
>          cpu->stopped = true;
>          qemu_cond_signal(&qemu_pause_cond);
>      }
> +    qemu_mutex_unlock_iothread();
> +    flush_queued_safe_work(cpu);
> +    qemu_mutex_lock_iothread();
>      flush_queued_work(cpu);
>      cpu->thread_kicked = false;
>  }
> @@ -1085,45 +1197,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>      return NULL;
>  }
>  
> -static void qemu_cpu_kick_thread(CPUState *cpu)
> -{
> -#ifndef _WIN32
> -    int err;
> -
> -    err = pthread_kill(cpu->thread->thread, SIG_IPI);
> -    if (err) {
> -        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
> -        exit(1);
> -    }
> -#else /* _WIN32 */
> -    if (!qemu_cpu_is_self(cpu)) {
> -        CONTEXT tcgContext;
> -
> -        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> -            exit(1);
> -        }
> -
> -        /* On multi-core systems, we are not sure that the thread is actually
> -         * suspended until we can get the context.
> -         */
> -        tcgContext.ContextFlags = CONTEXT_CONTROL;
> -        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
> -            continue;
> -        }
> -
> -        cpu_signal(0);
> -
> -        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
> -            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> -                    GetLastError());
> -            exit(1);
> -        }
> -    }
> -#endif
> -}
> -

Ahh here it is, could you not just have put a function declaration at
the top to avoid churning the code too much?

>  void qemu_cpu_kick(CPUState *cpu)
>  {
>      qemu_cond_broadcast(cpu->halt_cond);
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index a2de536..692d0d3 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -243,8 +243,9 @@ struct kvm_run;
>   * @mem_io_pc: Host Program Counter at which the memory was accessed.
>   * @mem_io_vaddr: Target virtual address at which the memory was accessed.
>   * @kvm_fd: vCPU file descriptor for KVM.
> - * @work_mutex: Lock to prevent multiple access to queued_work_*.
> + * @work_mutex: Lock to prevent multiple access to queued_* qemu_work_item.
>   * @queued_work_first: First asynchronous work pending.
> + * @queued_safe_work_first: First item of safe work pending.
>   *
>   * State of one CPU core or thread.
>   */
> @@ -267,6 +268,7 @@ struct CPUState {
>      struct QemuCond *halt_cond;
>      QemuMutex work_mutex;
>      struct qemu_work_item *queued_work_first, *queued_work_last;
> +    struct qemu_work_item *queued_safe_work_first, *queued_safe_work_last;
>      bool thread_kicked;
>      bool created;
>      bool stop;
> @@ -546,6 +548,26 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
>  void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
>  
>  /**
> + * async_run_safe_work_on_cpu:
> + * @cpu: The vCPU to run on.
> + * @func: The function to be executed.
> + * @data: Data to pass to the function.
> + *
> + * Schedules the function @func for execution on the vCPU @cpu asynchronously
> + * when all the VCPUs are outside their loop.
> + */
> +void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
> +                                void *data);

We should probably document the fact that the user should allocate a
data structure for each CPU they send async work to.

> +
> +/**
> + * async_safe_work_pending:
> + *
> + * Check whether any safe work is pending on any VCPUs.
> + * Returns: @true if a safe work is pending, @false otherwise.
> + */
> +bool async_safe_work_pending(void);
> +
> +/**
>   * qemu_get_cpu:
>   * @index: The CPUState@cpu_index value of the CPU to obtain.
>   *

Otherwise looks pretty reasonable to me.

-- 
Alex Bennée

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag.
  2015-07-13 16:15       ` Paolo Bonzini
@ 2015-07-13 16:36         ` Alex Bennée
  0 siblings, 0 replies; 14+ messages in thread
From: Alex Bennée @ 2015-07-13 16:36 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: mttcg, Peter Maydell, Mark Burton, Alvise Rigo, QEMU Developers,
	Guillaume Delbergue, KONRAD Frédéric


Paolo Bonzini <pbonzini@redhat.com> writes:

> On 13/07/2015 18:10, Peter Maydell wrote:
>>  (4) a few other things which are suspicious at best:
>>   hw/intc/apic_common.c:    volatile int a_i_d = apic_irq_delivered;
>
> This one has a comment above:
>
>     /* Copy this into a local variable to encourage gcc to emit a plain
>      * register for a sys/sdt.h marker.  For details on this workaround, see:
>      * https://sourceware.org/bugzilla/show_bug.cgi?id=13296
>      */
>
>>   hw/xen/xen_pt_msi.c:        const volatile uint32_t *vec_ctrl;
>
> Seems to be MMIO (yes, really), so okay.

For some reason I don't find the use of the word "encourage" w.r.t
compiler behaviour particularly encouraging ;-)

-- 
Alex Bennée

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu.
  2015-07-13 16:20   ` Alex Bennée
@ 2015-07-13 22:56     ` Paolo Bonzini
  0 siblings, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2015-07-13 22:56 UTC (permalink / raw)
  To: Alex Bennée, fred.konrad
  Cc: mttcg, guillaume.delbergue, mark.burton, qemu-devel, a.rigo



On 13/07/2015 18:20, Alex Bennée wrote:
>> +static void qemu_cpu_kick_thread(CPUState *cpu)
>> +{
>> +#ifndef _WIN32
>> +    int err;
>> +
>> +    err = pthread_kill(cpu->thread->thread, SIG_IPI);
>> +    if (err) {
>> +        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
>> +        exit(1);
>> +    }
>> +#else /* _WIN32 */
>> +    if (!qemu_cpu_is_self(cpu)) {
>> +        CONTEXT tcgContext;
>> +
>> +        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
>> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>> +                    GetLastError());
>> +            exit(1);
>> +        }
>> +
>> +        /* On multi-core systems, we are not sure that the thread is actually
>> +         * suspended until we can get the context.
>> +         */
>> +        tcgContext.ContextFlags = CONTEXT_CONTROL;
>> +        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
>> +            continue;
>> +        }
>> +
>> +        cpu_signal(0);
>> +
>> +        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
>> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>> +                    GetLastError());
>> +            exit(1);
>> +        }
>> +    }
>> +#endif
> 
> I'm going to go out on a limb and guess these sort of implementation
> specifics should be in the posix/win32 utility files, that is unless
> Glib abstracts enough of it for us.

As you found later, this part of the patch is just moving code around.
However, getting ultimately rid of this is basically the reason why I'm
interested in MTTCG. :)

>> +}
>> +
>>  void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
>>  {
>>      struct qemu_work_item wi;
>> @@ -894,6 +933,76 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
>>      qemu_cpu_kick(cpu);
>>  }
>>  
>> +void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
>> +                                void *data)
>> +{
>> +    struct qemu_work_item *wi;
>> +
>> +    wi = g_malloc0(sizeof(struct qemu_work_item));
>> +    wi->func = func;
>> +    wi->data = data;
> 
> Is there anything that prevents the user calling the function with the
> same payload for multiple CPUs?

Why?  The data could be read only, or could be known to outlive the CPUs.

>> +    wi->free = true;
>> +
>> +    qemu_mutex_lock(&cpu->work_mutex);
>> +    if (cpu->queued_safe_work_first == NULL) {
>> +        cpu->queued_safe_work_first = wi;
>> +    } else {
>> +        cpu->queued_safe_work_last->next = wi;
>> +    }
>> +    cpu->queued_safe_work_last = wi;
> 
> I'm surprised we haven't added some helpers to the qemu_work_queue API
> for all this identical boilerplate but whatever...

Yes, it should be using QSIMPLEQ.  Can be done later.

It is indeed reasonable, but it should be used with great care.

Paolo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part.
  2015-07-13 11:53 ` [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part Paolo Bonzini
  2015-07-13 15:50   ` Alex Bennée
@ 2015-07-15  7:44   ` Frederic Konrad
  1 sibling, 0 replies; 14+ messages in thread
From: Frederic Konrad @ 2015-07-15  7:44 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel, mttcg
  Cc: alex.bennee, mark.burton, a.rigo, guillaume.delbergue

On 13/07/2015 13:53, Paolo Bonzini wrote:
>
> On 10/07/2015 18:08, fred.konrad@greensocs.com wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> This is the async_safe_work introduction bit of the Multithread TCG work.
>> Rebased on current upstream (6169b60285fe1ff730d840a49527e721bfb30899).
>>
>> It can be cloned here:
>> http://git.greensocs.com/fkonrad/mttcg.git branch async_work_v2
>>
>> The first patch introduces a mutex to protect the existing queued_work_*
>> CPUState members against multiple (concurent) access.
>>
>> The second patch introduces a tcg_executing_flag which will be 1 when we are
>> inside cpu_exec(). This is required as safe work need to be sure that's all vCPU
>> are outside cpu_exec().
>>
>> The last patch introduces async_safe_work. It allows to add some work which will
>> be done asynchronously but only when all vCPUs are outside cpu_exec(). The tcg
>> thread will wait that no vCPUs have any pending safe work before reentering
>> cpu-exec().
>>
>> Changes V1 -> V2:
>>    * Release the lock while running the callback for both async and safe work.
> What tree do these patches apply to?
>
> Paolo

Hi,
it applies on :

(6169b60285fe1ff730d840a49527e721bfb30899)

I need to rebase mttcg on it.. But I cleaned up/sent this part for Alvise.

Fred

>
>> KONRAD Frederic (3):
>>    cpus: protect queued_work_* with work_mutex.
>>    cpus: add a tcg_executing flag.
>>    cpus: introduce async_run_safe_work_on_cpu.
>>
>>   cpu-exec.c        |   7 +++
>>   cpus.c            | 164 +++++++++++++++++++++++++++++++++++++++++-------------
>>   include/qom/cpu.h |  28 ++++++++++
>>   qom/cpu.c         |   2 +
>>   4 files changed, 161 insertions(+), 40 deletions(-)
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag.
  2015-07-13 15:56   ` Alex Bennée
  2015-07-13 16:10     ` Peter Maydell
@ 2015-07-15  8:40     ` Frederic Konrad
  1 sibling, 0 replies; 14+ messages in thread
From: Frederic Konrad @ 2015-07-15  8:40 UTC (permalink / raw)
  To: Alex Bennée
  Cc: mttcg, mark.burton, qemu-devel, a.rigo, guillaume.delbergue,
	pbonzini

On 13/07/2015 17:56, Alex Bennée wrote:
> fred.konrad@greensocs.com writes:
>
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> This flag indicates if the VCPU is currently executing TCG code.
>>
>> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> Changes V1 -> V2:
>>    * do both tcg_executing = 0 or 1 in cpu_exec().
>> ---
>>   cpu-exec.c        | 2 ++
>>   include/qom/cpu.h | 3 +++
>>   qom/cpu.c         | 1 +
>>   3 files changed, 6 insertions(+)
>>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 75694f3..2fdf89d 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -371,6 +371,7 @@ int cpu_exec(CPUState *cpu)
>>           cpu->halted = 0;
>>       }
>>   
>> +    cpu->tcg_executing = 1;
>>       current_cpu = cpu;
>>   
>>       /* As long as current_cpu is null, up to the assignment just above,
>> @@ -583,5 +584,6 @@ int cpu_exec(CPUState *cpu)
>>   
>>       /* fail safe : never use current_cpu outside cpu_exec() */
>>       current_cpu = NULL;
>> +    cpu->tcg_executing = 0;
>>       return ret;
>>   }
>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
>> index efa9624..a2de536 100644
>> --- a/include/qom/cpu.h
>> +++ b/include/qom/cpu.h
>> @@ -226,6 +226,7 @@ struct kvm_run;
>>    * @stopped: Indicates the CPU has been artificially stopped.
>>    * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
>>    *           CPU and return to its top level loop.
>> + * @tcg_executing: This TCG thread is in cpu_exec().
>>    * @singlestep_enabled: Flags for single-stepping.
>>    * @icount_extra: Instructions until next timer event.
>>    * @icount_decr: Number of cycles left, with interrupt flag in high bit.
>> @@ -322,6 +323,8 @@ struct CPUState {
>>          (absolute value) offset as small as possible.  This reduces code
>>          size, especially for hosts without large memory offsets.  */
>>       volatile sig_atomic_t tcg_exit_req;
>> +
>> +    volatile int tcg_executing;
> My concern is on weakly ordered backends is volatile enough for this
> flag or do we need some sort of memory barrier when we update it? Does
> it just introduce an inefficiency that other threads may spin a few
> times waiting to find out the vCPU has halted?

I think it will just spin (see in flush_queued_safe_work in patch 3).
>
> If other threads are waiting for it to halt is there a mechanism that
> ensures we'll never start-up again until everything is done?
This flag is not supposed to do that, it's in the third patch as well.

It will check async_safe_work_pending before starting the execution.
We might have a race here, if the flush is triggered between
async_safe_work_pending and the tcg_executing flag set in cpu-exec.

     if (async_safe_work_pending()) {
         cpu->exit_request = 1;
         return 0;
     }

     if (cpu->halted) {
         if (!cpu_has_work(cpu)) {
             return EXCP_HALTED;
         }

         cpu->halted = 0;
     }

     cpu->tcg_executing = 1;

I need to check and fix that.

Fred

>
>
>>   };
>>   
>>   QTAILQ_HEAD(CPUTailQ, CPUState);
>> diff --git a/qom/cpu.c b/qom/cpu.c
>> index 4e12598..62663e5 100644
>> --- a/qom/cpu.c
>> +++ b/qom/cpu.c
>> @@ -249,6 +249,7 @@ static void cpu_common_reset(CPUState *cpu)
>>       cpu->icount_decr.u32 = 0;
>>       cpu->can_do_io = 0;
>>       cpu->exception_index = -1;
>> +    cpu->tcg_executing = 0;
>>       memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
>>   }

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-07-15  8:40 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-10 16:08 [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part fred.konrad
2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 1/3] cpus: protect queued_work_* with work_mutex fred.konrad
2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 2/3] cpus: add a tcg_executing flag fred.konrad
2015-07-13 15:56   ` Alex Bennée
2015-07-13 16:10     ` Peter Maydell
2015-07-13 16:15       ` Paolo Bonzini
2015-07-13 16:36         ` Alex Bennée
2015-07-15  8:40     ` Frederic Konrad
2015-07-10 16:08 ` [Qemu-devel] [RFC PATCH V2 3/3] cpus: introduce async_run_safe_work_on_cpu fred.konrad
2015-07-13 16:20   ` Alex Bennée
2015-07-13 22:56     ` Paolo Bonzini
2015-07-13 11:53 ` [Qemu-devel] [RFC PATCH V2 0/3] Multithread TCG async_safe_work part Paolo Bonzini
2015-07-13 15:50   ` Alex Bennée
2015-07-15  7:44   ` Frederic Konrad

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).