[Qemu-devel] [PATCH v7 15/27] cputlb: introduce tlb_flush_* async work.

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: "Alex Bennée" <alex.bennee@linaro.org>
To: mttcg@listserver.greensocs.com, qemu-devel@nongnu.org,
	fred.konrad@greensocs.com, a.rigo@virtualopensystems.com,
	cota@braap.org, bobby.prani@gmail.com, nikunj@linux.vnet.ibm.com
Cc: mark.burton@greensocs.com, pbonzini@redhat.com,
	jan.kiszka@siemens.com, serge.fdrv@gmail.com, rth@twiddle.net,
	peter.maydell@linaro.org, claudio.fontana@huawei.com,
	bamvor.zhangjian@linaro.org,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>
Subject: [Qemu-devel] [PATCH v7 15/27] cputlb: introduce tlb_flush_* async work.
Date: Thu, 19 Jan 2017 17:04:55 +0000	[thread overview]
Message-ID: <20170119170507.16185-16-alex.bennee@linaro.org> (raw)
In-Reply-To: <20170119170507.16185-1-alex.bennee@linaro.org>

From: KONRAD Frederic <fred.konrad@greensocs.com>

Some architectures allow to flush the tlb of other VCPUs. This is not a problem
when we have only one thread for all VCPUs but it definitely needs to be an
asynchronous work when we are in true multithreaded work.

We take the tb_lock() when doing this to avoid racing with other threads
which may be invalidating TB's at the same time. The alternative would
be to use proper atomic primitives to clear the tlb entries en-mass.

This patch doesn't do anything to protect other cputlb function being
called in MTTCG mode making cross vCPU changes.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: remove need for g_malloc on defer, make check fixes, tb_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v6 (base patches)
  - don't use cmpxchg_bool (we drop it later anyway)
  - use RUN_ON_CPU macros instead of inlines
  - bug out of tlb_flush if !tcg_enabled() (MacOSX make check failure)
v5 (base patches)
  - take tb_lock() for memset
  - ensure tb_flush_page properly asyncs work for other vCPUs
  - use run_on_cpu_data
v4 (base_patches)
  - brought forward from arm enabling series
  - restore pending_tlb_flush flag
v1
  - Remove tlb_flush_all just do the check in tlb_flush.
  - remove the need to g_malloc
  - tlb_flush calls direct if !cpu->created

fixup! cputlb: introduce tlb_flush_* async work.
---
 cputlb.c                | 78 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/exec/exec-all.h |  1 +
 include/qom/cpu.h       |  6 ++++
 3 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 94fa9977c5..36388b29b8 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -64,6 +64,10 @@
         }                                                         \
     } while (0)
 
+/* run_on_cpu_data.target_ptr should always be big enough for a
+ * target_ulong even on 32 bit builds */
+QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
+
 /* statistics */
 int tlb_flush_count;
 
@@ -72,13 +76,22 @@ int tlb_flush_count;
  * flushing more entries than required is only an efficiency issue,
  * not a correctness issue.
  */
-void tlb_flush(CPUState *cpu)
+static void tlb_flush_nocheck(CPUState *cpu)
 {
     CPUArchState *env = cpu->env_ptr;
 
+    /* The QOM tests will trigger tlb_flushes without setting up TCG
+     * so we bug out here in that case.
+     */
+    if (!tcg_enabled()) {
+        return;
+    }
+
     assert_cpu_is_self(cpu);
     tlb_debug("(count: %d)\n", tlb_flush_count++);
 
+    tb_lock();
+
     memset(env->tlb_table, -1, sizeof(env->tlb_table));
     memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
     memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -86,6 +99,39 @@ void tlb_flush(CPUState *cpu)
     env->vtlb_index = 0;
     env->tlb_flush_addr = -1;
     env->tlb_flush_mask = 0;
+
+    tb_unlock();
+
+    atomic_mb_set(&cpu->pending_tlb_flush, false);
+}
+
+static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
+{
+    tlb_flush_nocheck(cpu);
+}
+
+/* NOTE:
+ * If flush_global is true (the usual case), flush all tlb entries.
+ * If flush_global is false, flush (at least) all tlb entries not
+ * marked global.
+ *
+ * Since QEMU doesn't currently implement a global/not-global flag
+ * for tlb entries, at the moment tlb_flush() will also flush all
+ * tlb entries in the flush_global == false case. This is OK because
+ * CPU architectures generally permit an implementation to drop
+ * entries from the TLB at any time, so flushing more entries than
+ * required is only an efficiency issue, not a correctness issue.
+ */
+void tlb_flush(CPUState *cpu)
+{
+    if (cpu->created && !qemu_cpu_is_self(cpu)) {
+        if (atomic_cmpxchg(&cpu->pending_tlb_flush, false, true) == true) {
+            async_run_on_cpu(cpu, tlb_flush_global_async_work,
+                             RUN_ON_CPU_NULL);
+        }
+    } else {
+        tlb_flush_nocheck(cpu);
+    }
 }
 
 static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
@@ -95,6 +141,8 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
     assert_cpu_is_self(cpu);
     tlb_debug("start\n");
 
+    tb_lock();
+
     for (;;) {
         int mmu_idx = va_arg(argp, int);
 
@@ -109,6 +157,8 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
     }
 
     memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+
+    tb_unlock();
 }
 
 void tlb_flush_by_mmuidx(CPUState *cpu, ...)
@@ -131,13 +181,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
     }
 }
 
-void tlb_flush_page(CPUState *cpu, target_ulong addr)
+static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
+    target_ulong addr = (target_ulong) data.target_ptr;
     int i;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
+
     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
 
     /* Check if we need to flush due to large pages.  */
@@ -167,6 +219,18 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
     tb_flush_jmp_cache(cpu, addr);
 }
 
+void tlb_flush_page(CPUState *cpu, target_ulong addr)
+{
+    tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+
+    if (!qemu_cpu_is_self(cpu)) {
+        async_run_on_cpu(cpu, tlb_flush_page_async_work,
+                         RUN_ON_CPU_TARGET_PTR(addr));
+    } else {
+        tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
+    }
+}
+
 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
 {
     CPUArchState *env = cpu->env_ptr;
@@ -213,6 +277,16 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
     tb_flush_jmp_cache(cpu, addr);
 }
 
+void tlb_flush_page_all(target_ulong addr)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        async_run_on_cpu(cpu, tlb_flush_page_async_work,
+                         RUN_ON_CPU_TARGET_PTR(addr));
+    }
+}
+
 /* update the TLBs so that writes to code in the virtual page 'addr'
    can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index bd4622ac5d..e43cb68355 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -158,6 +158,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr);
 void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
                  uintptr_t retaddr);
+void tlb_flush_page_all(target_ulong addr);
 #else
 static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
 {
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 1735374ad6..880ba4254e 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -393,6 +393,12 @@ struct CPUState {
        (absolute value) offset as small as possible.  This reduces code
        size, especially for hosts without large memory offsets.  */
     uint32_t tcg_exit_req;
+
+    /* The pending_tlb_flush flag is set and cleared atomically to
+     * avoid potential races. The aim of the flag is to avoid
+     * unnecessary flushes.
+     */
+    bool pending_tlb_flush;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
-- 
2.11.0

next prev parent reply	other threads:[~2017-01-19 17:05 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-19 17:04 [Qemu-devel] [PATCH v7 00/27] Remaining MTTCG Base patches and ARM enablement Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 01/27] docs: new design document multi-thread-tcg.txt Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 02/27] mttcg: translate-all: Enable locking debug in a debug build Alex Bennée
2017-01-23 18:57   ` Richard Henderson
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 03/27] mttcg: Add missing tb_lock/unlock() in cpu_exec_step() Alex Bennée
2017-01-23 18:57   ` Richard Henderson
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 04/27] tcg: move TCG_MO/BAR types into own file Alex Bennée
2017-01-23 18:59   ` Richard Henderson
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 05/27] tcg: add options for enabling MTTCG Alex Bennée
2017-01-20  1:28   ` Pranith Kumar
2017-01-20 14:50     ` Alex Bennée
2017-01-20 15:03       ` Pranith Kumar
2017-01-23 19:06   ` Richard Henderson
2017-01-24 20:25     ` Alex Bennée
2017-01-24 20:48       ` Richard Henderson
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 06/27] tcg: add kick timer for single-threaded vCPU emulation Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 07/27] tcg: rename tcg_current_cpu to tcg_current_rr_cpu Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 08/27] tcg: drop global lock during TCG code execution Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 09/27] tcg: remove global exit_request Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 10/27] tcg: enable tb_lock() for SoftMMU Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 11/27] tcg: enable thread-per-vCPU Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 12/27] tcg: handle EXCP_ATOMIC exception for system emulation Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 13/27] cputlb: add assert_cpu_is_self checks Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 14/27] cputlb: tweak qemu_ram_addr_from_host_nofail reporting Alex Bennée
2017-01-23 19:07   ` Richard Henderson
2017-01-19 17:04 ` Alex Bennée [this message]
2017-01-23 19:10   ` [Qemu-devel] [PATCH v7 15/27] cputlb: introduce tlb_flush_* async work Richard Henderson
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 16/27] cputlb: add tlb_flush_by_mmuidx async routines Alex Bennée
2017-01-23 19:11   ` Richard Henderson
2017-01-24 20:31     ` Alex Bennée
2017-01-24 20:44       ` Richard Henderson
2017-01-25 14:09         ` Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 17/27] cputlb: atomically update tlb fields used by tlb_reset_dirty Alex Bennée
2017-01-23 19:17   ` Richard Henderson
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 18/27] cputlb: introduce tlb_flush_*_all_cpus Alex Bennée
2017-01-23 19:21   ` Richard Henderson
2017-01-24 20:34     ` Alex Bennée
2017-01-24 20:47       ` Richard Henderson
2017-01-25 14:21         ` Alex Bennée
2017-01-19 17:04 ` [Qemu-devel] [PATCH v7 19/27] target-arm/powerctl: defer cpu reset work to CPU context Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 20/27] target-arm: ensure BQL taken for ARM_CP_IO register access Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 21/27] target-arm: helpers which may affect global state need the BQL Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 22/27] target-arm: don't generate WFE/YIELD calls for MTTCG Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 23/27] target-arm/cpu.h: make ARM_CP defined consistent Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 24/27] target-arm: introduce ARM_CP_EXIT_PC Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 25/27] target-arm: ensure all cross vCPUs TLB flushes complete Alex Bennée
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 26/27] tcg: enable MTTCG by default for ARM on x86 hosts Alex Bennée
2017-01-20  0:08   ` Pranith Kumar
2017-01-20 10:53     ` Alex Bennée
2017-01-20 14:30       ` Pranith Kumar
2017-01-19 17:05 ` [Qemu-devel] [PATCH v7 27/27] target-ppc: take global mutex for set_irq Alex Bennée

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:94fa9977c dfblob:36388b29b dfblob:bd4622ac5 dfblob:e43cb6835
dfblob:1735374ad dfblob:880ba4254 )
 OR (
bs:"[Qemu-devel] [PATCH v7 15/27] cputlb: introduce tlb_flush_* async work." )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170119170507.16185-16-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=a.rigo@virtualopensystems.com \
    --cc=bamvor.zhangjian@linaro.org \
    --cc=bobby.prani@gmail.com \
    --cc=claudio.fontana@huawei.com \
    --cc=cota@braap.org \
    --cc=crosthwaite.peter@gmail.com \
    --cc=fred.konrad@greensocs.com \
    --cc=jan.kiszka@siemens.com \
    --cc=mark.burton@greensocs.com \
    --cc=mttcg@listserver.greensocs.com \
    --cc=nikunj@linux.vnet.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=serge.fdrv@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).