[PULL 07/47] util: Enhance flush_icache_range with separate data pointer

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, "Alex Bennée" <alex.bennee@linaro.org>,
	"Joelle van Dyne" <j@getutm.app>
Subject: [PULL 07/47] util: Enhance flush_icache_range with separate data pointer
Date: Thu,  7 Jan 2021 10:14:08 -1000	[thread overview]
Message-ID: <20210107201448.1152301-8-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210107201448.1152301-1-richard.henderson@linaro.org>

We are shortly going to have a split rw/rx jit buffer.  Depending
on the host, we need to flush the dcache at the rw data pointer and
flush the icache at the rx code pointer.

For now, the two passed pointers are identical, so there is no
effective change in behaviour.

Reviewed-by: Joelle van Dyne <j@getutm.app>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/cacheflush.h    | 15 ++++++++++++--
 softmmu/physmem.c            |  2 +-
 tcg/tcg.c                    |  6 ++++--
 util/cacheflush.c            | 38 +++++++++++++++++++++---------------
 util/cacheinfo.c             |  8 +++++---
 tcg/aarch64/tcg-target.c.inc |  2 +-
 tcg/mips/tcg-target.c.inc    |  2 +-
 tcg/ppc/tcg-target.c.inc     |  4 ++--
 tcg/sparc/tcg-target.c.inc   |  4 ++--
 9 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/include/qemu/cacheflush.h b/include/qemu/cacheflush.h
index 58ae488491..ae20bcda73 100644
--- a/include/qemu/cacheflush.h
+++ b/include/qemu/cacheflush.h
@@ -8,16 +8,27 @@
 #ifndef QEMU_CACHEFLUSH_H
 #define QEMU_CACHEFLUSH_H
 
+/**
+ * flush_idcache_range:
+ * @rx: instruction address
+ * @rw: data address
+ * @len: length to flush
+ *
+ * Flush @len bytes of the data cache at @rw and the icache at @rx
+ * to bring them in sync.  The two addresses may be different virtual
+ * mappings of the same physical page(s).
+ */
+
 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
     /* icache is coherent and does not require flushing. */
 }
 
 #else
 
-void flush_icache_range(uintptr_t start, uintptr_t stop);
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
 
 #endif
 
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 8b9ffc41c2..6301f4f0a5 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -2946,7 +2946,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
                 invalidate_and_set_dirty(mr, addr1, l);
                 break;
             case FLUSH_CACHE:
-                flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
+                flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
                 break;
             }
         }
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 9bdc450196..759a41d848 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1079,7 +1079,8 @@ void tcg_prologue_init(TCGContext *s)
 
     buf1 = s->code_ptr;
 #ifndef CONFIG_TCG_INTERPRETER
-    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+    flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0,
+                        tcg_ptr_byte_diff(buf1, buf0));
 #endif
 
     /* Deduct the prologue from the buffer.  */
@@ -4328,7 +4329,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 
 #ifndef CONFIG_TCG_INTERPRETER
     /* flush instruction cache */
-    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+    flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
+                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
 #endif
 
     return tcg_current_code_size(s);
diff --git a/util/cacheflush.c b/util/cacheflush.c
index 2881832a38..92805efe49 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -21,29 +21,32 @@
 #include <sys/cachectl.h>
 #endif
 
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    cacheflush((void *)start, stop - start, ICACHE);
+    if (rx != rw) {
+        cacheflush((void *)rw, len, DCACHE);
+    }
+    cacheflush((void *)rx, len, ICACHE);
 }
 
 #elif defined(__powerpc__)
 
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    uintptr_t p, start1, stop1;
+    uintptr_t p, b, e;
     size_t dsize = qemu_dcache_linesize;
     size_t isize = qemu_icache_linesize;
 
-    start1 = start & ~(dsize - 1);
-    stop1 = (stop + dsize - 1) & ~(dsize - 1);
-    for (p = start1; p < stop1; p += dsize) {
+    b = rw & ~(dsize - 1);
+    e = (rw + len + dsize - 1) & ~(dsize - 1);
+    for (p = b; p < e; p += dsize) {
         asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
     }
     asm volatile ("sync" : : : "memory");
 
-    start &= start & ~(isize - 1);
-    stop1 = (stop + isize - 1) & ~(isize - 1);
-    for (p = start1; p < stop1; p += isize) {
+    b = rx & ~(isize - 1);
+    e = (rx + len + isize - 1) & ~(isize - 1);
+    for (p = b; p < e; p += isize) {
         asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
     }
     asm volatile ("sync" : : : "memory");
@@ -52,20 +55,23 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
 
 #elif defined(__sparc__)
 
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    uintptr_t p;
-
-    for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
+    /* No additional data flush to the RW virtual address required. */
+    uintptr_t p, end = (rx + len + 7) & -8;
+    for (p = rx & -8; p < end; p += 8) {
         __asm__ __volatile__("flush\t%0" : : "r" (p));
     }
 }
 
 #else
 
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    __builtin___clear_cache((char *)start, (char *)stop);
+    if (rw != rx) {
+        __builtin___clear_cache((char *)rw, (char *)rw + len);
+    }
+    __builtin___clear_cache((char *)rx, (char *)rx + len);
 }
 
 #endif
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index 7804c186b6..b182f0b693 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -166,9 +166,11 @@ static void fallback_cache_info(int *isize, int *dsize)
         *isize = *dsize;
     } else {
 #if defined(_ARCH_PPC)
-        /* For PPC, we're going to use the icache size computed for
-           flush_icache_range.  Which means that we must use the
-           architecture minimum.  */
+        /*
+         * For PPC, we're going to use the cache sizes computed for
+         * flush_idcache_range.  Which means that we must use the
+         * architecture minimum.
+         */
         *isize = *dsize = 16;
 #else
         /* Otherwise, 64 bytes is not uncommon.  */
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 26f71cb599..83af3108a4 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
     }
     pair = (uint64_t)i2 << 32 | i1;
     qatomic_set((uint64_t *)jmp_addr, pair);
-    flush_icache_range(jmp_addr, jmp_addr + 8);
+    flush_idcache_range(jmp_addr, jmp_addr, 8);
 }
 
 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 41be574e89..c255ecb444 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                               uintptr_t addr)
 {
     qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
-    flush_icache_range(jmp_addr, jmp_addr + 4);
+    flush_idcache_range(jmp_addr, jmp_addr, 4);
 }
 
 typedef struct {
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 0d068ec8ab..b756281042 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
         /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
            within qatomic_set that would fail to build a ppc32 host.  */
         qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
-        flush_icache_range(jmp_addr, jmp_addr + 8);
+        flush_idcache_range(jmp_addr, jmp_addr, 8);
     } else {
         intptr_t diff = addr - jmp_addr;
         tcg_debug_assert(in_range_b(diff));
         qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
-        flush_icache_range(jmp_addr, jmp_addr + 4);
+        flush_idcache_range(jmp_addr, jmp_addr, 4);
     }
 }
 
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 6775bd30fc..6e2d755f6a 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
     if (!USE_REG_TB) {
         qatomic_set((uint32_t *)jmp_addr,
 		    deposit32(CALL, 0, 30, br_disp >> 2));
-        flush_icache_range(jmp_addr, jmp_addr + 4);
+        flush_idcache_range(jmp_addr, jmp_addr, 4);
         return;
     }
 
@@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
     }
 
     qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
-    flush_icache_range(jmp_addr, jmp_addr + 8);
+    flush_idcache_range(jmp_addr, jmp_addr, 8);
 }
-- 
2.25.1

next prev parent reply	other threads:[~2021-01-07 20:23 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-07 20:14 [PULL 00/47] tcg patch queue Richard Henderson
2021-01-07 20:14 ` [PULL 01/47] linux-user: Conditionalize TUNSETVNETLE Richard Henderson
2021-01-07 20:14 ` [PULL 02/47] tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
2021-01-07 20:14 ` [PULL 03/47] tcg: Introduce INDEX_op_qemu_st8_i32 Richard Henderson
2021-01-07 20:14 ` [PULL 04/47] util/oslib-win32: Use _aligned_malloc for qemu_try_memalign Richard Henderson
2021-01-10 23:18   ` Volker Rümelin
2021-01-11  3:10     ` 罗勇刚(Yonggang Luo)
2021-01-07 20:14 ` [PULL 05/47] util/oslib: Assert qemu_try_memalign() alignment is a power of 2 Richard Henderson
2021-01-07 20:14 ` [PULL 06/47] tcg: Do not flush icache for interpreter Richard Henderson
2021-01-07 20:14 ` Richard Henderson [this message]
2021-01-07 20:14 ` [PULL 08/47] util: Specialize flush_idcache_range for aarch64 Richard Henderson
2021-01-07 20:14 ` [PULL 09/47] tcg: Move tcg prologue pointer out of TCGContext Richard Henderson
2021-01-07 20:14 ` [PULL 10/47] tcg: Move tcg epilogue " Richard Henderson
2021-01-07 20:14 ` [PULL 11/47] tcg: Add in_code_gen_buffer Richard Henderson
2021-01-07 20:14 ` [PULL 12/47] tcg: Introduce tcg_splitwx_to_{rx,rw} Richard Henderson
2021-01-07 20:14 ` [PULL 13/47] tcg: Adjust TCGLabel for const Richard Henderson
2021-01-07 20:14 ` [PULL 14/47] tcg: Adjust tcg_out_call " Richard Henderson
2021-01-07 20:14 ` [PULL 15/47] tcg: Adjust tcg_out_label " Richard Henderson
2021-01-07 20:14 ` [PULL 16/47] tcg: Adjust tcg_register_jit " Richard Henderson
2021-01-07 20:14 ` [PULL 17/47] tcg: Adjust tb_target_set_jmp_target for split-wx Richard Henderson
2021-01-07 20:14 ` [PULL 18/47] tcg: Make DisasContextBase.tb const Richard Henderson
2021-01-07 20:14 ` [PULL 19/47] tcg: Make tb arg to synchronize_from_tb const Richard Henderson
2021-01-07 20:14 ` [PULL 20/47] tcg: Use Error with alloc_code_gen_buffer Richard Henderson
2021-01-07 20:14 ` [PULL 21/47] tcg: Add --accel tcg,split-wx property Richard Henderson
2021-01-07 20:14 ` [PULL 22/47] accel/tcg: Support split-wx for linux with memfd Richard Henderson
2021-01-07 20:14 ` [PULL 23/47] accel/tcg: Support split-wx for darwin/iOS with vm_remap Richard Henderson
2021-01-07 20:14 ` [PULL 24/47] tcg: Return the TB pointer from the rx region from exit_tb Richard Henderson
2021-01-07 20:14 ` [PULL 25/47] tcg/i386: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 26/47] tcg/aarch64: Use B not BL for tcg_out_goto_long Richard Henderson
2021-01-07 20:14 ` [PULL 27/47] tcg/aarch64: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 28/47] disas: Push const down through host disassembly Richard Henderson
2021-01-07 20:14 ` [PULL 29/47] tcg/tci: Push const down through bytecode reading Richard Henderson
2021-01-07 20:14 ` [PULL 30/47] tcg: Introduce tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 31/47] tcg/ppc: Use tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 32/47] tcg/ppc: Use tcg_out_mem_long to reset TCG_REG_TB Richard Henderson
2021-01-07 20:14 ` [PULL 33/47] tcg/ppc: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 34/47] tcg/sparc: Use tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 35/47] tcg/sparc: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 36/47] tcg/s390: Use tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 37/47] tcg/s390: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 38/47] tcg/riscv: Fix branch range checks Richard Henderson
2021-01-07 20:14 ` [PULL 39/47] tcg/riscv: Remove branch-over-branch fallback Richard Henderson
2021-01-07 20:14 ` [PULL 40/47] tcg/riscv: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 41/47] accel/tcg: Add mips support to alloc_code_gen_buffer_splitwx_memfd Richard Henderson
2021-01-07 20:14 ` [PULL 42/47] tcg/mips: Do not assert on relocation overflow Richard Henderson
2021-01-07 20:14 ` [PULL 43/47] tcg/mips: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 44/47] tcg/arm: " Richard Henderson
2021-01-07 20:14 ` [PULL 45/47] tcg: Remove TCG_TARGET_SUPPORT_MIRROR Richard Henderson
2021-01-07 20:14 ` [PULL 46/47] tcg: Constify tcg_code_gen_epilogue Richard Henderson
2021-01-07 20:14 ` [PULL 47/47] tcg: Constify TCGLabelQemuLdst.raddr Richard Henderson
2021-01-07 21:03 ` [PULL 00/47] tcg patch queue no-reply
2021-01-08 10:28 ` Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:58ae48849 dfblob:ae20bcda7 dfblob:8b9ffc41c dfblob:6301f4f0a
dfblob:9bdc45019 dfblob:759a41d84 dfblob:2881832a3 dfblob:92805efe4
dfblob:7804c186b dfblob:b182f0b69 dfblob:26f71cb59 dfblob:83af3108a
dfblob:41be574e8 dfblob:c255ecb44 dfblob:0d068ec8a dfblob:b75628104
dfblob:6775bd30f dfblob:6e2d755f6 )
 OR (
bs:"[PULL 07/47] util: Enhance flush_icache_range with separate data pointer" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210107201448.1152301-8-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=j@getutm.app \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).