From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 09/41] accel/tcg: Add tlb_flush_page_bits_by_mmuidx*
Date: Tue, 20 Oct 2020 16:56:24 +0100 [thread overview]
Message-ID: <20201020155656.8045-10-peter.maydell@linaro.org> (raw)
In-Reply-To: <20201020155656.8045-1-peter.maydell@linaro.org>
From: Richard Henderson <richard.henderson@linaro.org>
On ARM, the Top Byte Ignore feature means that only 56 bits of
the address are significant in the virtual address. We are
required to give the entire 64-bit address to FAR_ELx on fault,
which means that we do not "clean" the top byte early in TCG.
This new interface allows us to flush all 256 possible aliases
for a given page, currently missed by tlb_flush_page*.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20201016210754.818257-2-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
include/exec/exec-all.h | 36 ++++++
accel/tcg/cputlb.c | 275 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 302 insertions(+), 9 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 66f9b4cca67..4707ac140ce 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -251,6 +251,25 @@ void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap);
* depend on when the guests translation ends the TB.
*/
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
+
+/**
+ * tlb_flush_page_bits_by_mmuidx
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of mmu indexes to flush
+ * @bits: number of significant bits in address
+ *
+ * Similar to tlb_flush_page_mask, but with a bitmap of indexes.
+ */
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap, unsigned bits);
+
+/* Similarly, with broadcast and syncing. */
+void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap, unsigned bits);
+void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
+ (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits);
+
/**
* tlb_set_page_with_attrs:
* @cpu: CPU to add this TLB entry for
@@ -337,6 +356,23 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
uint16_t idxmap)
{
}
+static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu,
+ target_ulong addr,
+ uint16_t idxmap,
+ unsigned bits)
+{
+}
+static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu,
+ target_ulong addr,
+ uint16_t idxmap,
+ unsigned bits)
+{
+}
+static inline void
+tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap, unsigned bits)
+{
+}
#endif
/**
* probe_access:
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 2bbbb3ab290..42ab79c1a58 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -409,12 +409,21 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu)
tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
}
+static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
+ target_ulong page, target_ulong mask)
+{
+ page &= mask;
+ mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
+
+ return (page == (tlb_entry->addr_read & mask) ||
+ page == (tlb_addr_write(tlb_entry) & mask) ||
+ page == (tlb_entry->addr_code & mask));
+}
+
static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
target_ulong page)
{
- return tlb_hit_page(tlb_entry->addr_read, page) ||
- tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
- tlb_hit_page(tlb_entry->addr_code, page);
+ return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
}
/**
@@ -427,31 +436,45 @@ static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
}
/* Called with tlb_c.lock held */
-static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
- target_ulong page)
+static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
+ target_ulong page,
+ target_ulong mask)
{
- if (tlb_hit_page_anyprot(tlb_entry, page)) {
+ if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
memset(tlb_entry, -1, sizeof(*tlb_entry));
return true;
}
return false;
}
+static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
+ target_ulong page)
+{
+ return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
+}
+
/* Called with tlb_c.lock held */
-static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
- target_ulong page)
+static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
+ target_ulong page,
+ target_ulong mask)
{
CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
int k;
assert_cpu_is_self(env_cpu(env));
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- if (tlb_flush_entry_locked(&d->vtable[k], page)) {
+ if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
tlb_n_used_entries_dec(env, mmu_idx);
}
}
}
+static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
+ target_ulong page)
+{
+ tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1);
+}
+
static void tlb_flush_page_locked(CPUArchState *env, int midx,
target_ulong page)
{
@@ -666,6 +689,240 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
}
+static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
+ target_ulong page, unsigned bits)
+{
+ CPUTLBDesc *d = &env_tlb(env)->d[midx];
+ CPUTLBDescFast *f = &env_tlb(env)->f[midx];
+ target_ulong mask = MAKE_64BIT_MASK(0, bits);
+
+ /*
+ * If @bits is smaller than the tlb size, there may be multiple entries
+ * within the TLB; otherwise all addresses that match under @mask hit
+ * the same TLB entry.
+ *
+ * TODO: Perhaps allow bits to be a few bits less than the size.
+ * For now, just flush the entire TLB.
+ */
+ if (mask < f->mask) {
+ tlb_debug("forcing full flush midx %d ("
+ TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+ midx, page, mask);
+ tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
+ return;
+ }
+
+ /* Check if we need to flush due to large pages. */
+ if ((page & d->large_page_mask) == d->large_page_addr) {
+ tlb_debug("forcing full flush midx %d ("
+ TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+ midx, d->large_page_addr, d->large_page_mask);
+ tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
+ return;
+ }
+
+ if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) {
+ tlb_n_used_entries_dec(env, midx);
+ }
+ tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
+}
+
+typedef struct {
+ target_ulong addr;
+ uint16_t idxmap;
+ uint16_t bits;
+} TLBFlushPageBitsByMMUIdxData;
+
+static void
+tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu,
+ TLBFlushPageBitsByMMUIdxData d)
+{
+ CPUArchState *env = cpu->env_ptr;
+ int mmu_idx;
+
+ assert_cpu_is_self(cpu);
+
+ tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n",
+ d.addr, d.bits, d.idxmap);
+
+ qemu_spin_lock(&env_tlb(env)->c.lock);
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ if ((d.idxmap >> mmu_idx) & 1) {
+ tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits);
+ }
+ }
+ qemu_spin_unlock(&env_tlb(env)->c.lock);
+
+ tb_flush_jmp_cache(cpu, d.addr);
+}
+
+static bool encode_pbm_to_runon(run_on_cpu_data *out,
+ TLBFlushPageBitsByMMUIdxData d)
+{
+ /* We need 6 bits to hold to hold @bits up to 63. */
+ if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) {
+ *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits);
+ return true;
+ }
+ return false;
+}
+
+static TLBFlushPageBitsByMMUIdxData
+decode_runon_to_pbm(run_on_cpu_data data)
+{
+ target_ulong addr_map_bits = (target_ulong) data.target_ptr;
+ return (TLBFlushPageBitsByMMUIdxData){
+ .addr = addr_map_bits & TARGET_PAGE_MASK,
+ .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6,
+ .bits = addr_map_bits & 0x3f
+ };
+}
+
+static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu,
+ run_on_cpu_data runon)
+{
+ tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon));
+}
+
+static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu,
+ run_on_cpu_data data)
+{
+ TLBFlushPageBitsByMMUIdxData *d = data.host_ptr;
+ tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d);
+ g_free(d);
+}
+
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap, unsigned bits)
+{
+ TLBFlushPageBitsByMMUIdxData d;
+ run_on_cpu_data runon;
+
+ /* If all bits are significant, this devolves to tlb_flush_page. */
+ if (bits >= TARGET_LONG_BITS) {
+ tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
+ return;
+ }
+ /* If no page bits are significant, this devolves to tlb_flush. */
+ if (bits < TARGET_PAGE_BITS) {
+ tlb_flush_by_mmuidx(cpu, idxmap);
+ return;
+ }
+
+ /* This should already be page aligned */
+ d.addr = addr & TARGET_PAGE_MASK;
+ d.idxmap = idxmap;
+ d.bits = bits;
+
+ if (qemu_cpu_is_self(cpu)) {
+ tlb_flush_page_bits_by_mmuidx_async_0(cpu, d);
+ } else if (encode_pbm_to_runon(&runon, d)) {
+ async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
+ } else {
+ TLBFlushPageBitsByMMUIdxData *p
+ = g_new(TLBFlushPageBitsByMMUIdxData, 1);
+
+ /* Otherwise allocate a structure, freed by the worker. */
+ *p = d;
+ async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2,
+ RUN_ON_CPU_HOST_PTR(p));
+ }
+}
+
+void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
+ target_ulong addr,
+ uint16_t idxmap,
+ unsigned bits)
+{
+ TLBFlushPageBitsByMMUIdxData d;
+ run_on_cpu_data runon;
+
+ /* If all bits are significant, this devolves to tlb_flush_page. */
+ if (bits >= TARGET_LONG_BITS) {
+ tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
+ return;
+ }
+ /* If no page bits are significant, this devolves to tlb_flush. */
+ if (bits < TARGET_PAGE_BITS) {
+ tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap);
+ return;
+ }
+
+ /* This should already be page aligned */
+ d.addr = addr & TARGET_PAGE_MASK;
+ d.idxmap = idxmap;
+ d.bits = bits;
+
+ if (encode_pbm_to_runon(&runon, d)) {
+ flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
+ } else {
+ CPUState *dst_cpu;
+ TLBFlushPageBitsByMMUIdxData *p;
+
+ /* Allocate a separate data block for each destination cpu. */
+ CPU_FOREACH(dst_cpu) {
+ if (dst_cpu != src_cpu) {
+ p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
+ *p = d;
+ async_run_on_cpu(dst_cpu,
+ tlb_flush_page_bits_by_mmuidx_async_2,
+ RUN_ON_CPU_HOST_PTR(p));
+ }
+ }
+ }
+
+ tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d);
+}
+
+void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+ target_ulong addr,
+ uint16_t idxmap,
+ unsigned bits)
+{
+ TLBFlushPageBitsByMMUIdxData d;
+ run_on_cpu_data runon;
+
+ /* If all bits are significant, this devolves to tlb_flush_page. */
+ if (bits >= TARGET_LONG_BITS) {
+ tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
+ return;
+ }
+ /* If no page bits are significant, this devolves to tlb_flush. */
+ if (bits < TARGET_PAGE_BITS) {
+ tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
+ return;
+ }
+
+ /* This should already be page aligned */
+ d.addr = addr & TARGET_PAGE_MASK;
+ d.idxmap = idxmap;
+ d.bits = bits;
+
+ if (encode_pbm_to_runon(&runon, d)) {
+ flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1,
+ runon);
+ } else {
+ CPUState *dst_cpu;
+ TLBFlushPageBitsByMMUIdxData *p;
+
+ /* Allocate a separate data block for each destination cpu. */
+ CPU_FOREACH(dst_cpu) {
+ if (dst_cpu != src_cpu) {
+ p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
+ *p = d;
+ async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
+ RUN_ON_CPU_HOST_PTR(p));
+ }
+ }
+
+ p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
+ *p = d;
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
+ RUN_ON_CPU_HOST_PTR(p));
+ }
+}
+
/* update the TLBs so that writes to code in the virtual page 'addr'
can be detected */
void tlb_protect_code(ram_addr_t ram_addr)
--
2.20.1
next prev parent reply other threads:[~2020-10-20 16:12 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-20 15:56 [PULL 00/41] target-arm queue Peter Maydell
2020-10-20 15:56 ` [PULL 01/41] target/arm: Fix SMLAD incorrect setting of Q bit Peter Maydell
2020-10-20 15:56 ` [PULL 02/41] target/arm: AArch32 VCVT fixed-point to float is always round-to-nearest Peter Maydell
2020-10-20 15:56 ` [PULL 03/41] hw/arm/strongarm: Fix 'time to transmit a char' unit comment Peter Maydell
2020-10-20 15:56 ` [PULL 04/41] hw/arm: Restrict APEI tables generation to the 'virt' machine Peter Maydell
2020-10-20 15:56 ` [PULL 05/41] hw/timer/bcm2835: Introduce BCM2835_SYSTIMER_COUNT definition Peter Maydell
2020-10-20 15:56 ` [PULL 06/41] hw/timer/bcm2835: Rename variable holding CTRL_STATUS register Peter Maydell
2020-10-20 15:56 ` [PULL 07/41] hw/timer/bcm2835: Support the timer COMPARE registers Peter Maydell
2020-10-20 15:56 ` [PULL 08/41] hw/arm/bcm2835_peripherals: Correctly wire the SYS_timer IRQs Peter Maydell
2020-10-20 15:56 ` Peter Maydell [this message]
2020-10-20 15:56 ` [PULL 10/41] target/arm: Use tlb_flush_page_bits_by_mmuidx* Peter Maydell
2020-10-20 15:56 ` [PULL 11/41] tests/qtest: Add npcm7xx timer test Peter Maydell
2020-10-20 15:56 ` [PULL 12/41] loads-stores.rst: add footnote that clarifies GETPC usage Peter Maydell
2020-10-20 15:56 ` [PULL 13/41] hw/intc/bcm2835_ic: Trace GPU/CPU IRQ handlers Peter Maydell
2020-10-20 15:56 ` [PULL 14/41] hw/intc/bcm2836_control: Use IRQ definitions instead of magic numbers Peter Maydell
2020-10-20 15:56 ` [PULL 15/41] target/arm: Remove redundant mmu_idx lookup Peter Maydell
2020-10-20 15:56 ` [PULL 16/41] target/arm: Fix reported EL for mte_check_fail Peter Maydell
2020-10-20 15:56 ` [PULL 17/41] target/arm: Ignore HCR_EL2.ATA when {E2H,TGE} != 11 Peter Maydell
2020-10-20 15:56 ` [PULL 18/41] microbit_i2c: Fix coredump when dump-vmstate Peter Maydell
2020-10-20 15:56 ` [PULL 19/41] hw/arm/nseries: Fix loading kernel image on n8x0 machines Peter Maydell
2020-10-20 15:56 ` [PULL 20/41] decodetree: Fix codegen for non-overlapping group inside overlapping group Peter Maydell
2020-10-20 15:56 ` [PULL 21/41] target/arm: Implement v8.1M NOCP handling Peter Maydell
2020-10-20 15:56 ` [PULL 22/41] target/arm: Implement v8.1M conditional-select insns Peter Maydell
2020-10-20 15:56 ` [PULL 23/41] target/arm: Make the t32 insn[25:23]=111 group non-overlapping Peter Maydell
2020-10-20 15:56 ` [PULL 24/41] target/arm: Don't allow BLX imm for M-profile Peter Maydell
2020-10-20 15:56 ` [PULL 25/41] target/arm: Implement v8.1M branch-future insns (as NOPs) Peter Maydell
2020-10-20 15:56 ` [PULL 26/41] target/arm: Implement v8.1M low-overhead-loop instructions Peter Maydell
2020-10-20 15:56 ` [PULL 27/41] target/arm: Fix has_vfp/has_neon ID reg squashing for M-profile Peter Maydell
2020-10-20 15:56 ` [PULL 28/41] target/arm: Allow M-profile CPUs with FP16 to set FPSCR.FP16 Peter Maydell
2020-10-20 15:56 ` [PULL 29/41] target/arm: Implement FPSCR.LTPSIZE for M-profile LOB extension Peter Maydell
2020-10-20 15:56 ` [PULL 30/41] linux-user/aarch64: Reset btype for signals Peter Maydell
2020-10-20 15:56 ` [PULL 31/41] linux-user: Set PAGE_TARGET_1 for TARGET_PROT_BTI Peter Maydell
2020-10-20 15:56 ` [PULL 32/41] include/elf: Add defines related to GNU property notes for AArch64 Peter Maydell
2020-10-20 15:56 ` [PULL 33/41] linux-user/elfload: Avoid leaking interp_name using GLib memory API Peter Maydell
2020-10-20 15:56 ` [PULL 34/41] linux-user/elfload: Fix coding style in load_elf_image Peter Maydell
2020-10-20 15:56 ` [PULL 35/41] linux-user/elfload: Adjust iteration over phdr Peter Maydell
2020-10-20 15:56 ` [PULL 36/41] linux-user/elfload: Move PT_INTERP detection to first loop Peter Maydell
2020-10-20 15:56 ` [PULL 37/41] linux-user/elfload: Use Error for load_elf_image Peter Maydell
2020-10-20 15:56 ` [PULL 38/41] linux-user/elfload: Use Error for load_elf_interp Peter Maydell
2020-10-20 15:56 ` [PULL 39/41] linux-user/elfload: Parse NT_GNU_PROPERTY_TYPE_0 notes Peter Maydell
2020-10-20 15:56 ` [PULL 40/41] linux-user/elfload: Parse GNU_PROPERTY_AARCH64_FEATURE_1_AND Peter Maydell
2020-10-20 15:56 ` [PULL 41/41] tests/tcg/aarch64: Add bti smoke tests Peter Maydell
2020-10-20 16:36 ` [PULL 00/41] target-arm queue Philippe Mathieu-Daudé
2020-10-20 16:36 ` no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201020155656.8045-10-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).