From: Nicholas Piggin <npiggin@gmail.com>
To: qemu-ppc@nongnu.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
qemu-devel@nongnu.org,
Richard Henderson <richard.henderson@linaro.org>
Subject: [RFC PATCH 1/3] accel/tcg: Option to permit incoherent translation block cache vs stores
Date: Tue, 1 Apr 2025 01:54:21 +1000 [thread overview]
Message-ID: <20250331155423.619451-2-npiggin@gmail.com> (raw)
In-Reply-To: <20250331155423.619451-1-npiggin@gmail.com>
Add an option TARGET_HAS_LAZY_ICACHE that does not invalidate TBs upon
store, but instead tracks that the icache has become incoherent, and
provides a tb_flush_incoherent() function that the target may call to
bring the TB back to coherency.
XXX: docs/devel/tcg.rst says that this is not permitted because TB must
be coherent with memory to handle exceptions correctly... I'm not sure
where this is, maybe it can be worked around and is not a showstopper?
---
accel/tcg/tb-internal.h | 10 ++++++
include/exec/tb-flush.h | 3 ++
accel/tcg/cputlb.c | 15 +++++++--
accel/tcg/tb-maint.c | 73 ++++++++++++++++++++++++++++++++++++++++
system/memory_ldst.c.inc | 2 +-
5 files changed, 99 insertions(+), 4 deletions(-)
diff --git a/accel/tcg/tb-internal.h b/accel/tcg/tb-internal.h
index 68aa8d17f41..ccc59eac3f5 100644
--- a/accel/tcg/tb-internal.h
+++ b/accel/tcg/tb-internal.h
@@ -82,6 +82,16 @@ void tb_unlock_pages(TranslationBlock *);
void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
unsigned size,
uintptr_t retaddr);
+#ifdef TARGET_HAS_LAZY_ICACHE
+void tb_store_to_phys_range(ram_addr_t ram_addr,
+ unsigned size, uintptr_t retaddr);
+#else
+static inline void tb_store_to_phys_range(ram_addr_t ram_addr,
+ unsigned size, uintptr_t retaddr)
+{
+ tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
+}
+#endif
#endif /* CONFIG_SOFTMMU */
bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
diff --git a/include/exec/tb-flush.h b/include/exec/tb-flush.h
index 142c240d94c..1f8b718be57 100644
--- a/include/exec/tb-flush.h
+++ b/include/exec/tb-flush.h
@@ -23,6 +23,9 @@
*/
void tb_flush(CPUState *cs);
+/* like tb_flush() but only flush incoherent blocks */
+void tb_flush_incoherent(CPUState *cpu);
+
void tcg_flush_jmp_cache(CPUState *cs);
#endif /* _TB_FLUSH_H_ */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 500c8d3adc0..2a19c82e5d4 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1338,18 +1338,27 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUTLBEntryFull *full, uintptr_t retaddr)
{
ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
+ uint8_t mask;
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
- tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
+ tb_store_to_phys_range(ram_addr, size, retaddr);
}
/*
* Set both VGA and migration bits for simplicity and to remove
- * the notdirty callback faster.
+ * the notdirty callback faster. Incoherent icache also sets the
+ * code bit because incoherency is tracked and resolved in the TB
+ * code.
*/
- cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
+#ifdef TARGET_HAS_LAZY_ICACHE
+ mask = DIRTY_CLIENTS_ALL;
+#else
+ mask = DIRTY_CLIENTS_NOCODE;
+#endif
+
+ cpu_physical_memory_set_dirty_range(ram_addr, size, mask);
/* We remove the notdirty callback only if the code has been flushed. */
if (!cpu_physical_memory_is_clean(ram_addr)) {
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 3f1bebf6ab5..4f634469456 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -60,10 +60,20 @@ static bool tb_cmp(const void *ap, const void *bp)
tb_page_addr1(a) == tb_page_addr1(b));
}
+#ifdef TARGET_HAS_LAZY_ICACHE
+static QemuSpin icache_incoherent_lock;
+static bool icache_incoherent;
+static ram_addr_t icache_incoherent_start;
+static ram_addr_t icache_incoherent_end;
+#endif
+
void tb_htable_init(void)
{
unsigned int mode = QHT_MODE_AUTO_RESIZE;
+#ifdef TARGET_HAS_LAZY_ICACHE
+ qemu_spin_init(&icache_incoherent_lock);
+#endif
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
}
@@ -803,6 +813,35 @@ void tb_flush(CPUState *cpu)
}
}
+#ifdef TARGET_HAS_LAZY_ICACHE
+static void do_tb_flush_incoherent(CPUState *cpu, run_on_cpu_data data)
+{
+ /* Should be no other CPUs running */
+ assert(!qemu_spin_trylock(&icache_incoherent_lock));
+ if (icache_incoherent) {
+ unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
+
+ tb_invalidate_phys_range(icache_incoherent_start,
+ icache_incoherent_end);
+ icache_incoherent = false;
+ icache_incoherent_start = 0;
+ icache_incoherent_end = 0;
+ qemu_spin_unlock(&icache_incoherent_lock);
+
+ do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
+ } else {
+ qemu_spin_unlock(&icache_incoherent_lock);
+ }
+}
+
+void tb_flush_incoherent(CPUState *cpu)
+{
+ if (tcg_enabled() && icache_incoherent) {
+ async_safe_run_on_cpu(cpu, do_tb_flush_incoherent, RUN_ON_CPU_NULL);
+ }
+}
+#endif
+
/* remove @orig from its @n_orig-th jump list */
static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
{
@@ -1231,4 +1270,38 @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
page_collection_unlock(pages);
}
+
+void tb_store_to_phys_range(ram_addr_t ram_addr,
+ unsigned size, uintptr_t retaddr)
+{
+#ifndef TARGET_HAS_LAZY_ICACHE
+ tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
+#else
+ ram_addr_t start, end;
+
+ /*
+ * Address comes in as byte-wise, but the cputlb dirty tracking operates
+ * on a page basis and will not be called a second time on the same page,
+ * so must cover a full page here.
+ */
+ start = ROUND_DOWN(ram_addr, TARGET_PAGE_SIZE);
+ end = ROUND_UP(ram_addr + size, TARGET_PAGE_SIZE) - 1;
+
+ qemu_spin_lock(&icache_incoherent_lock);
+ if (icache_incoherent) {
+ if (start < icache_incoherent_start) {
+ icache_incoherent_start = start;
+ }
+ if (end > icache_incoherent_end) {
+ icache_incoherent_end = end;
+ }
+ } else {
+ icache_incoherent = true;
+ icache_incoherent_start = start;
+ icache_incoherent_end = end;
+ }
+ qemu_spin_unlock(&icache_incoherent_lock);
+#endif
+}
+
#endif /* CONFIG_USER_ONLY */
diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc
index 7f32d3d9ff3..029d9749d57 100644
--- a/system/memory_ldst.c.inc
+++ b/system/memory_ldst.c.inc
@@ -286,7 +286,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
stl_p(ptr, val);
dirty_log_mask = memory_region_get_dirty_log_mask(mr);
- dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
+ dirty_log_mask &= DIRTY_CLIENTS_NOCODE;
cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
4, dirty_log_mask);
r = MEMTX_OK;
--
2.47.1
next prev parent reply other threads:[~2025-03-31 15:55 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-31 15:54 [RFC PATCH 0/3] translation performance improvements Nicholas Piggin
2025-03-31 15:54 ` Nicholas Piggin [this message]
2025-03-31 19:51 ` [RFC PATCH 1/3] accel/tcg: Option to permit incoherent translation block cache vs stores Richard Henderson
2025-04-01 8:34 ` Nicholas Piggin
2025-04-01 8:42 ` Philippe Mathieu-Daudé
2025-03-31 15:54 ` [RFC PATCH 2/3] target/ppc: define TARGET_HAS_LAZY_ICACHE Nicholas Piggin
2025-03-31 15:54 ` [RFC PATCH 3/3] target/ppc: Allow goto-tb on fixed real mode translations Nicholas Piggin
2025-03-31 19:40 ` [RFC PATCH 0/3] translation performance improvements Richard Henderson
2025-04-01 8:33 ` Nicholas Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250331155423.619451-2-npiggin@gmail.com \
--to=npiggin@gmail.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).