From: Joelle van Dyne <j@getutm.app>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: QEMU Developers <qemu-devel@nongnu.org>
Subject: Re: [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data pointer
Date: Sat, 31 Oct 2020 23:54:42 -0700 [thread overview]
Message-ID: <CA+E+eSCn=NjEWrGngmgQtk+robNxL01Ksu7T6nBUNXEg90yvsQ@mail.gmail.com> (raw)
In-Reply-To: <20201030004921.721096-2-richard.henderson@linaro.org>
s->code_ptr and s->code_buf are 4 byte pointers on aarch64 so the
cache flush is off by a factor of 4
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 44b923f5fe..2c4b66965b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -4325,7 +4325,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
/* flush instruction cache */
flush_idcache_range((uintptr_t)tcg_mirror_rw_to_rx(s->code_buf),
- (uintptr_t)s->code_buf, s->code_ptr - s->code_buf);
+ (uintptr_t)s->code_buf,
+ (uintptr_t)s->code_ptr - (uintptr_t)s->code_buf);
return tcg_current_code_size(s);
}
With this and the other changes, split JIT works fine on iOS.
-j
On Thu, Oct 29, 2020 at 5:49 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We are shortly going to have a split rw/rx jit buffer. Depending
> on the host, we need to flush the dcache at the rw data pointer and
> flush the icache at the rx code pointer.
>
> For now, the two passed pointers are identical, so there is no
> effective change in behaviour.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/aarch64/tcg-target.h | 9 +++++++--
> tcg/arm/tcg-target.h | 8 ++++++--
> tcg/i386/tcg-target.h | 3 ++-
> tcg/mips/tcg-target.h | 8 ++++++--
> tcg/ppc/tcg-target.h | 2 +-
> tcg/riscv/tcg-target.h | 8 ++++++--
> tcg/s390/tcg-target.h | 3 ++-
> tcg/sparc/tcg-target.h | 8 +++++---
> tcg/tci/tcg-target.h | 3 ++-
> softmmu/physmem.c | 9 ++++++++-
> tcg/tcg.c | 5 +++--
> tcg/aarch64/tcg-target.c.inc | 2 +-
> tcg/mips/tcg-target.c.inc | 2 +-
> tcg/ppc/tcg-target.c.inc | 21 +++++++++++----------
> tcg/sparc/tcg-target.c.inc | 4 ++--
> 15 files changed, 63 insertions(+), 32 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 663dd0b95e..d0a6a059b7 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -148,9 +148,14 @@ typedef enum {
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> - __builtin___clear_cache((char *)start, (char *)stop);
> + /* TODO: Copy this from gcc to avoid 4 loops instead of 2. */
> + if (rw != rx) {
> + __builtin___clear_cache((char *)rw, (char *)(rw + len));
> + }
> + __builtin___clear_cache((char *)rx, (char *)(rx + len));
> }
>
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 17e771374d..fa88b24e43 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -134,9 +134,13 @@ enum {
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> - __builtin___clear_cache((char *) start, (char *) stop);
> + if (rw != rx) {
> + __builtin___clear_cache((char *)rw, (char *)(rw + len));
> + }
> + __builtin___clear_cache((char *)rx, (char *)(rx + len));
> }
>
> /* not defined -- call should be eliminated at compile time */
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index abd4ac7fc0..8323e72639 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -206,7 +206,8 @@ extern bool have_avx2;
> #define TCG_TARGET_extract_i64_valid(ofs, len) \
> (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> }
>
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index c6b091d849..47b1226ee9 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -207,9 +207,13 @@ extern bool use_mips32r2_instructions;
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> - cacheflush ((void *)start, stop-start, ICACHE);
> + if (rx != rw) {
> + cacheflush((void *)rw, len, DCACHE);
> + }
> + cacheflush((void *)rx, len, ICACHE);
> }
>
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index be10363956..fbb6dc1b47 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -175,7 +175,7 @@ extern bool have_vsx;
> #define TCG_TARGET_HAS_bitsel_vec have_vsx
> #define TCG_TARGET_HAS_cmpsel_vec 0
>
> -void flush_icache_range(uintptr_t start, uintptr_t stop);
> +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
>
> #define TCG_TARGET_DEFAULT_MO (0)
> diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
> index 032439d806..0fa6ae358e 100644
> --- a/tcg/riscv/tcg-target.h
> +++ b/tcg/riscv/tcg-target.h
> @@ -159,9 +159,13 @@ typedef enum {
> #define TCG_TARGET_HAS_mulsh_i64 1
> #endif
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> - __builtin___clear_cache((char *)start, (char *)stop);
> + if (rx != rw) {
> + __builtin___clear_cache((char *)rw, (char *)(rw + len));
> + }
> + __builtin___clear_cache((char *)rx, (char *)(rx + len));
> }
>
> /* not defined -- call should be eliminated at compile time */
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 63c8797bd3..c3dc2e8938 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -145,7 +145,8 @@ enum {
> TCG_AREG0 = TCG_REG_R10,
> };
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> }
>
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index 633841ebf2..c27c40231e 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -168,10 +168,12 @@ extern bool use_vis3_instructions;
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> - uintptr_t p;
> - for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
> + /* No additional data flush to the RW virtual address required. */
> + uintptr_t p, end = (rx + len + 7) & -8;
> + for (p = rx & -8; p < end; p += 8) {
> __asm__ __volatile__("flush\t%0" : : "r" (p));
> }
> }
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index 8c1c1d265d..6460449719 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -191,7 +191,8 @@ void tci_disas(uint8_t opc);
>
> #define HAVE_TCG_QEMU_TB_EXEC
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
> {
> }
>
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index a9adedb9f8..b23c1fef54 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -2954,7 +2954,14 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
> invalidate_and_set_dirty(mr, addr1, l);
> break;
> case FLUSH_CACHE:
> - flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
> + /*
> + * FIXME: This function is currently located in tcg/host/,
> + * but we never come here when tcg is enabled; only for
> + * real hardware acceleration. This can actively fail
> + * when TCI is configured, since that function is a nop.
> + * We should move this to util/ or something.
> + */
> + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
> break;
> }
> }
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a8c28440e2..3bf36e0cfe 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1076,7 +1076,7 @@ void tcg_prologue_init(TCGContext *s)
> #endif
>
> buf1 = s->code_ptr;
> - flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
> + flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0, buf1 - buf0);
>
> /* Deduct the prologue from the buffer. */
> prologue_size = tcg_current_code_size(s);
> @@ -4268,7 +4268,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
> }
>
> /* flush instruction cache */
> - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
> + flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
> + s->code_ptr - s->code_buf);
>
> return tcg_current_code_size(s);
> }
> diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
> index 26f71cb599..83af3108a4 100644
> --- a/tcg/aarch64/tcg-target.c.inc
> +++ b/tcg/aarch64/tcg-target.c.inc
> @@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
> }
> pair = (uint64_t)i2 << 32 | i1;
> qatomic_set((uint64_t *)jmp_addr, pair);
> - flush_icache_range(jmp_addr, jmp_addr + 8);
> + flush_idcache_range(jmp_addr, jmp_addr, 8);
> }
>
> static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
> diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
> index 41be574e89..c255ecb444 100644
> --- a/tcg/mips/tcg-target.c.inc
> +++ b/tcg/mips/tcg-target.c.inc
> @@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
> uintptr_t addr)
> {
> qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
> - flush_icache_range(jmp_addr, jmp_addr + 4);
> + flush_idcache_range(jmp_addr, jmp_addr, 4);
> }
>
> typedef struct {
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 18ee989f95..a848e98383 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
> /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
> within qatomic_set that would fail to build a ppc32 host. */
> qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
> - flush_icache_range(jmp_addr, jmp_addr + 8);
> + flush_idcache_range(jmp_addr, jmp_addr, 8);
> } else {
> intptr_t diff = addr - jmp_addr;
> tcg_debug_assert(in_range_b(diff));
> qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
> - flush_icache_range(jmp_addr, jmp_addr + 4);
> + flush_idcache_range(jmp_addr, jmp_addr, 4);
> }
> }
>
> @@ -3864,22 +3864,23 @@ void tcg_register_jit(void *buf, size_t buf_size)
> }
> #endif /* __ELF__ */
>
> -void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +void flush_idcache_range(uintptr_t rx, uintptr_t rw, uintptr_t len)
> {
> - uintptr_t p, start1, stop1;
> + uintptr_t p, start, stop;
> size_t dsize = qemu_dcache_linesize;
> size_t isize = qemu_icache_linesize;
>
> - start1 = start & ~(dsize - 1);
> - stop1 = (stop + dsize - 1) & ~(dsize - 1);
> - for (p = start1; p < stop1; p += dsize) {
> + start = rw & ~(dsize - 1);
> + stop = (rw + len + dsize - 1) & ~(dsize - 1);
> + for (p = start; p < stop; p += dsize) {
> asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
> }
> asm volatile ("sync" : : : "memory");
>
> - start &= start & ~(isize - 1);
> - stop1 = (stop + isize - 1) & ~(isize - 1);
> - for (p = start1; p < stop1; p += isize) {
> + start = rx & ~(isize - 1);
> + stop = (rx + len + isize - 1) & ~(isize - 1);
> + for (p = start; p < stop; p += isize) {
> asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
> }
> asm volatile ("sync" : : : "memory");
> diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
> index 6775bd30fc..6e2d755f6a 100644
> --- a/tcg/sparc/tcg-target.c.inc
> +++ b/tcg/sparc/tcg-target.c.inc
> @@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
> if (!USE_REG_TB) {
> qatomic_set((uint32_t *)jmp_addr,
> deposit32(CALL, 0, 30, br_disp >> 2));
> - flush_icache_range(jmp_addr, jmp_addr + 4);
> + flush_idcache_range(jmp_addr, jmp_addr, 4);
> return;
> }
>
> @@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
> }
>
> qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
> - flush_icache_range(jmp_addr, jmp_addr + 8);
> + flush_idcache_range(jmp_addr, jmp_addr, 8);
> }
> --
> 2.25.1
>
next prev parent reply other threads:[~2020-11-01 6:56 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-30 0:49 [PATCH v2 00/19] Mirror map JIT memory for TCG Richard Henderson
2020-10-30 0:49 ` [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data pointer Richard Henderson
2020-11-01 6:54 ` Joelle van Dyne [this message]
2020-11-03 23:02 ` Richard Henderson
2020-10-30 0:49 ` [PATCH v2 02/19] tcg: Move tcg prologue pointer out of TCGContext Richard Henderson
2020-10-30 0:49 ` [PATCH v2 03/19] tcg: Move tcg epilogue " Richard Henderson
2020-10-30 0:49 ` [PATCH v2 04/19] tcg: Introduce tcg_mirror_rw_to_rx/tcg_mirror_rx_to_rw Richard Henderson
2020-10-30 0:49 ` [PATCH v2 05/19] tcg: Adjust tcg_out_call for const Richard Henderson
2020-10-30 0:49 ` [PATCH v2 06/19] tcg: Adjust tcg_out_label " Richard Henderson
2020-10-30 0:49 ` [PATCH v2 07/19] tcg: Adjust tcg_register_jit " Richard Henderson
2020-10-30 0:49 ` [PATCH v2 08/19] tcg: Adjust tb_target_set_jmp_target for split rwx Richard Henderson
2020-10-30 0:49 ` [PATCH v2 09/19] tcg: Make DisasContextBase.tb const Richard Henderson
2020-10-30 0:49 ` [PATCH v2 10/19] tcg: Make tb arg to synchronize_from_tb const Richard Henderson
2020-10-30 0:49 ` [PATCH v2 11/19] tcg: Use Error with alloc_code_gen_buffer Richard Henderson
2020-10-30 0:49 ` [PATCH v2 12/19] tcg: Add --accel tcg,split-rwx property Richard Henderson
2020-10-30 0:49 ` [PATCH v2 13/19] accel/tcg: Support split-rwx for linux with memfd Richard Henderson
2020-10-30 0:49 ` [PATCH v2 14/19] RFC: accel/tcg: Support split-rwx for darwin/iOS with vm_remap Richard Henderson
2020-11-01 1:42 ` Joelle van Dyne
2020-11-01 21:11 ` Joelle van Dyne
2020-10-30 0:49 ` [PATCH v2 15/19] tcg: Return the rx mirror of TranslationBlock from exit_tb Richard Henderson
2020-10-30 0:49 ` [PATCH v2 16/19] tcg/i386: Support split-rwx code generation Richard Henderson
2020-10-30 0:49 ` [PATCH v2 17/19] tcg/aarch64: Use B not BL for tcg_out_goto_long Richard Henderson
2020-10-30 0:49 ` [PATCH v2 18/19] tcg/aarch64: Implement flush_idcache_range manually Richard Henderson
2020-11-01 1:25 ` Joelle van Dyne
2020-11-01 15:09 ` Richard Henderson
2020-11-03 23:08 ` Richard Henderson
2020-10-30 0:49 ` [PATCH v2 19/19] tcg/aarch64: Support split-rwx code generation Richard Henderson
2020-10-30 1:27 ` [PATCH v2 00/19] Mirror map JIT memory for TCG no-reply
2020-10-30 18:26 ` Paolo Bonzini
2020-10-30 18:57 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CA+E+eSCn=NjEWrGngmgQtk+robNxL01Ksu7T6nBUNXEg90yvsQ@mail.gmail.com' \
--to=j@getutm.app \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).