* [PATCH 00/14] target/i386: Use atomic operations for pte updates
@ 2022-08-22 23:57 Richard Henderson
2022-08-22 23:57 ` [PATCH 01/14] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull Richard Henderson
` (14 more replies)
0 siblings, 15 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
This patch set does two things:
(1) Remove assert(!probe) from the x86 tlb_fill
It turns out that this is a prerequisite for
[PATCH v6 00/21] linux-user: Fix siginfo_t contents when jumping
to non-readable pages
because of a new use of probe_access(..., nonfault)
when comparing TBs that cross a page boundary.
Patches 7-10 are sufficient to fix this.
After auditing all of the targets, Sparc has a similar assert,
and AVR simply doesn't check probe at all. Both will need fixing.
(2) Use atomic operations for pte updates, which is a long-standing
bug since our conversion to MTTCG.
For simplicity, patches 1-6 are from the middle of
("[PATCH v2 00/66] target/arm: Implement FEAT_HAFDBS")
r~
Richard Henderson (14):
accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
accel/tcg: Drop addr member from SavedIOTLB
accel/tcg: Suppress auto-invalidate in probe_access_internal
accel/tcg: Introduce probe_access_full
accel/tcg: Introduce tlb_set_page_full
include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
target/i386: Use MMUAccessType across excp_helper.c
target/i386: Direct call get_hphys from mmu_translate
target/i386: Introduce structures for mmu_translate
target/i386: Reorg GET_HPHYS
target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX
target/i386: Use MMU_NESTED_IDX for vmload/vmsave
target/i386: Combine 5 sets of variables in mmu_translate
target/i386: Use atomic operations for pte updates
include/exec/cpu-defs.h | 45 +-
include/exec/exec-all.h | 33 ++
include/hw/core/cpu.h | 1 -
target/i386/cpu-param.h | 2 +-
target/i386/cpu.h | 5 +-
accel/tcg/cputlb.c | 215 +++++----
target/arm/mte_helper.c | 14 +-
target/arm/sve_helper.c | 4 +-
target/arm/translate-a64.c | 2 +-
target/i386/tcg/sysemu/excp_helper.c | 692 +++++++++++++++++----------
target/i386/tcg/sysemu/svm_helper.c | 234 +++++----
target/s390x/tcg/mem_helper.c | 4 -
12 files changed, 772 insertions(+), 479 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 01/14] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 02/14] accel/tcg: Drop addr member from SavedIOTLB Richard Henderson
` (13 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
This structure will shortly contain more than just
data for accessing MMIO. Rename the 'addr' member
to 'xlat_section' to more clearly indicate its purpose.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-defs.h | 22 ++++----
accel/tcg/cputlb.c | 102 +++++++++++++++++++------------------
target/arm/mte_helper.c | 14 ++---
target/arm/sve_helper.c | 4 +-
target/arm/translate-a64.c | 2 +-
5 files changed, 73 insertions(+), 71 deletions(-)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index ba3cd32a1e..f70f54d850 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -108,6 +108,7 @@ typedef uint64_t target_ulong;
# endif
# endif
+/* Minimalized TLB entry for use by TCG fast path. */
typedef struct CPUTLBEntry {
/* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not
@@ -131,14 +132,14 @@ typedef struct CPUTLBEntry {
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
-/* The IOTLB is not accessed directly inline by generated TCG code,
- * so the CPUIOTLBEntry layout is not as critical as that of the
- * CPUTLBEntry. (This is also why we don't want to combine the two
- * structs into one.)
+/*
+ * The full TLB entry, which is not accessed by generated TCG code,
+ * so the layout is not as critical as that of CPUTLBEntry. This is
+ * also why we don't want to combine the two structs.
*/
-typedef struct CPUIOTLBEntry {
+typedef struct CPUTLBEntryFull {
/*
- * @addr contains:
+ * @xlat_section contains:
* - in the lower TARGET_PAGE_BITS, a physical section number
* - with the lower TARGET_PAGE_BITS masked off, an offset which
* must be added to the virtual address to obtain:
@@ -146,9 +147,9 @@ typedef struct CPUIOTLBEntry {
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
* + the offset within the target MemoryRegion (otherwise)
*/
- hwaddr addr;
+ hwaddr xlat_section;
MemTxAttrs attrs;
-} CPUIOTLBEntry;
+} CPUTLBEntryFull;
/*
* Data elements that are per MMU mode, minus the bits accessed by
@@ -172,9 +173,8 @@ typedef struct CPUTLBDesc {
size_t vindex;
/* The tlb victim table, in two parts. */
CPUTLBEntry vtable[CPU_VTLB_SIZE];
- CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
- /* The iotlb. */
- CPUIOTLBEntry *iotlb;
+ CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
+ CPUTLBEntryFull *fulltlb;
} CPUTLBDesc;
/*
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index a46f3a654d..a37275bf8e 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -200,13 +200,13 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
}
g_free(fast->table);
- g_free(desc->iotlb);
+ g_free(desc->fulltlb);
tlb_window_reset(desc, now, 0);
/* desc->n_used_entries is cleared by the caller */
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
fast->table = g_try_new(CPUTLBEntry, new_size);
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
/*
* If the allocations fail, try smaller sizes. We just freed some
@@ -215,7 +215,7 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
* allocations to fail though, so we progressively reduce the allocation
* size, aborting if we cannot even allocate the smallest TLB we support.
*/
- while (fast->table == NULL || desc->iotlb == NULL) {
+ while (fast->table == NULL || desc->fulltlb == NULL) {
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
error_report("%s: %s", __func__, strerror(errno));
abort();
@@ -224,9 +224,9 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
g_free(fast->table);
- g_free(desc->iotlb);
+ g_free(desc->fulltlb);
fast->table = g_try_new(CPUTLBEntry, new_size);
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
}
}
@@ -258,7 +258,7 @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
desc->n_used_entries = 0;
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
fast->table = g_new(CPUTLBEntry, n_entries);
- desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+ desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
tlb_mmu_flush_locked(desc, fast);
}
@@ -299,7 +299,7 @@ void tlb_destroy(CPUState *cpu)
CPUTLBDescFast *fast = &env_tlb(env)->f[i];
g_free(fast->table);
- g_free(desc->iotlb);
+ g_free(desc->fulltlb);
}
}
@@ -1219,7 +1219,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
/* Evict the old entry into the victim tlb. */
copy_tlb_helper_locked(tv, te);
- desc->viotlb[vidx] = desc->iotlb[index];
+ desc->vfulltlb[vidx] = desc->fulltlb[index];
tlb_n_used_entries_dec(env, mmu_idx);
}
@@ -1236,8 +1236,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* subtract here is that of the page base, and not the same as the
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
*/
- desc->iotlb[index].addr = iotlb - vaddr_page;
- desc->iotlb[index].attrs = attrs;
+ desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
+ desc->fulltlb[index].attrs = attrs;
/* Now calculate the new entry */
tn.addend = addend - vaddr_page;
@@ -1341,7 +1341,7 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
}
}
-static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, target_ulong addr, uintptr_t retaddr,
MMUAccessType access_type, MemOp op)
{
@@ -1353,9 +1353,9 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
bool locked = false;
MemTxResult r;
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
mr = section->mr;
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
if (!cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
@@ -1365,14 +1365,14 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
qemu_mutex_lock_iothread();
locked = true;
}
- r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
+ r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
if (r != MEMTX_OK) {
hwaddr physaddr = mr_offset +
section->offset_within_address_space -
section->offset_within_region;
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
- mmu_idx, iotlbentry->attrs, r, retaddr);
+ mmu_idx, full->attrs, r, retaddr);
}
if (locked) {
qemu_mutex_unlock_iothread();
@@ -1382,8 +1382,8 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
/*
- * Save a potentially trashed IOTLB entry for later lookup by plugin.
- * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
+ * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
+ * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
* because of the side effect of io_writex changing memory layout.
*/
static void save_iotlb_data(CPUState *cs, hwaddr addr,
@@ -1397,7 +1397,7 @@ static void save_iotlb_data(CPUState *cs, hwaddr addr,
#endif
}
-static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, uint64_t val, target_ulong addr,
uintptr_t retaddr, MemOp op)
{
@@ -1408,9 +1408,9 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
bool locked = false;
MemTxResult r;
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
mr = section->mr;
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
if (!cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
@@ -1420,20 +1420,20 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
* The memory_region_dispatch may trigger a flush/resize
* so for plugins we save the iotlb_data just in case.
*/
- save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
+ save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
if (!qemu_mutex_iothread_locked()) {
qemu_mutex_lock_iothread();
locked = true;
}
- r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
+ r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
if (r != MEMTX_OK) {
hwaddr physaddr = mr_offset +
section->offset_within_address_space -
section->offset_within_region;
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
- MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
+ MMU_DATA_STORE, mmu_idx, full->attrs, r,
retaddr);
}
if (locked) {
@@ -1480,9 +1480,10 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
copy_tlb_helper_locked(vtlb, &tmptlb);
qemu_spin_unlock(&env_tlb(env)->c.lock);
- CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
- CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
- tmpio = *io; *io = *vio; *vio = tmpio;
+ CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+ CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
+ CPUTLBEntryFull tmpf;
+ tmpf = *f1; *f1 = *f2; *f2 = tmpf;
return true;
}
}
@@ -1550,9 +1551,9 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
}
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
- CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
+ CPUTLBEntryFull *full, uintptr_t retaddr)
{
- ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
+ ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
@@ -1645,9 +1646,9 @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
/* Handle clean RAM pages. */
if (unlikely(flags & TLB_NOTDIRTY)) {
uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
flags &= ~TLB_NOTDIRTY;
}
@@ -1672,19 +1673,19 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
/* Handle watchpoints. */
if (flags & TLB_WATCHPOINT) {
int wp_access = (access_type == MMU_DATA_STORE
? BP_MEM_WRITE : BP_MEM_READ);
cpu_check_watchpoint(env_cpu(env), addr, size,
- iotlbentry->attrs, wp_access, retaddr);
+ full->attrs, wp_access, retaddr);
}
/* Handle clean RAM pages. */
if (flags & TLB_NOTDIRTY) {
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
}
}
@@ -1715,7 +1716,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
* should have just filled the TLB. The one corner case is io_writex
* which can cause TLB flushes and potential resizing of the TLBs
* losing the information we need. In those cases we need to recover
- * data from a copy of the iotlbentry. As long as this always occurs
+ * data from a copy of the CPUTLBEntryFull. As long as this always occurs
* from the same thread (which a mem callback will be) this is safe.
*/
@@ -1730,11 +1731,12 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
if (likely(tlb_hit(tlb_addr, addr))) {
/* We must have an iotlb entry for MMIO */
if (tlb_addr & TLB_MMIO) {
- CPUIOTLBEntry *iotlbentry;
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+ CPUTLBEntryFull *full;
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
data->is_io = true;
- data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
- data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ data->v.io.section =
+ iotlb_to_section(cpu, full->xlat_section, full->attrs);
+ data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
} else {
data->is_io = false;
data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
@@ -1842,7 +1844,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
notdirty_write(env_cpu(env), addr, size,
- &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
+ &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
}
return hostaddr;
@@ -1950,7 +1952,7 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
/* Handle anything that isn't just a straight memory access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUIOTLBEntry *iotlbentry;
+ CPUTLBEntryFull *full;
bool need_swap;
/* For anything that is unaligned, recurse through full_load. */
@@ -1958,20 +1960,20 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
goto do_unaligned_access;
}
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
/* Handle watchpoints. */
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
/* On watchpoint hit, this will longjmp out. */
cpu_check_watchpoint(env_cpu(env), addr, size,
- iotlbentry->attrs, BP_MEM_READ, retaddr);
+ full->attrs, BP_MEM_READ, retaddr);
}
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
/* Handle I/O access. */
if (likely(tlb_addr & TLB_MMIO)) {
- return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
+ return io_readx(env, full, mmu_idx, addr, retaddr,
access_type, op ^ (need_swap * MO_BSWAP));
}
@@ -2286,12 +2288,12 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
*/
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), addr, size - size2,
- env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
+ env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
BP_MEM_WRITE, retaddr);
}
if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), page2, size2,
- env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
+ env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
BP_MEM_WRITE, retaddr);
}
@@ -2355,7 +2357,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
/* Handle anything that isn't just a straight memory access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUIOTLBEntry *iotlbentry;
+ CPUTLBEntryFull *full;
bool need_swap;
/* For anything that is unaligned, recurse through byte stores. */
@@ -2363,20 +2365,20 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
goto do_unaligned_access;
}
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
/* Handle watchpoints. */
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
/* On watchpoint hit, this will longjmp out. */
cpu_check_watchpoint(env_cpu(env), addr, size,
- iotlbentry->attrs, BP_MEM_WRITE, retaddr);
+ full->attrs, BP_MEM_WRITE, retaddr);
}
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
/* Handle I/O access. */
if (tlb_addr & TLB_MMIO) {
- io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
+ io_writex(env, full, mmu_idx, val, addr, retaddr,
op ^ (need_swap * MO_BSWAP));
return;
}
@@ -2388,7 +2390,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
/* Handle clean RAM pages. */
if (tlb_addr & TLB_NOTDIRTY) {
- notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
+ notdirty_write(env_cpu(env), addr, size, full, retaddr);
}
haddr = (void *)((uintptr_t)addr + entry->addend);
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index d11a8c70d0..fdd23ab3f8 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -106,7 +106,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
return tags + index;
#else
uintptr_t index;
- CPUIOTLBEntry *iotlbentry;
+ CPUTLBEntryFull *full;
int in_page, flags;
ram_addr_t ptr_ra;
hwaddr ptr_paddr, tag_paddr, xlat;
@@ -129,7 +129,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
assert(!(flags & TLB_INVALID_MASK));
/*
- * Find the iotlbentry for ptr. This *must* be present in the TLB
+ * Find the CPUTLBEntryFull for ptr. This *must* be present in the TLB
* because we just found the mapping.
* TODO: Perhaps there should be a cputlb helper that returns a
* matching tlb entry + iotlb entry.
@@ -144,10 +144,10 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
g_assert(tlb_hit(comparator, ptr));
}
# endif
- iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
+ full = &env_tlb(env)->d[ptr_mmu_idx].fulltlb[index];
/* If the virtual page MemAttr != Tagged, access unchecked. */
- if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
+ if (!arm_tlb_mte_tagged(&full->attrs)) {
return NULL;
}
@@ -181,7 +181,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
assert(ra != 0);
cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
- iotlbentry->attrs, wp, ra);
+ full->attrs, wp, ra);
}
/*
@@ -202,11 +202,11 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
/* Look up the address in tag space. */
- tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
+ tag_asi = full->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
tag_access == MMU_DATA_STORE,
- iotlbentry->attrs);
+ full->attrs);
/*
* Note that @mr will never be NULL. If there is nothing in the address
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index d6f7ef94fe..9cae8fd352 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -5384,8 +5384,8 @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
g_assert(tlb_hit(comparator, addr));
# endif
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
- info->attrs = iotlbentry->attrs;
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+ info->attrs = full->attrs;
}
#endif
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 163df8c615..b7787e7786 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14634,7 +14634,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
* table entry even for that case.
*/
return (tlb_hit(entry->addr_code, addr) &&
- arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
+ arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].fulltlb[index].attrs));
#endif
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 02/14] accel/tcg: Drop addr member from SavedIOTLB
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
2022-08-22 23:57 ` [PATCH 01/14] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal Richard Henderson
` (12 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
This field is only written, not read; remove it.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/hw/core/cpu.h | 1 -
accel/tcg/cputlb.c | 7 +++----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 500503da13..9e47184513 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -218,7 +218,6 @@ struct CPUWatchpoint {
* the memory regions get moved around by io_writex.
*/
typedef struct SavedIOTLB {
- hwaddr addr;
MemoryRegionSection *section;
hwaddr mr_offset;
} SavedIOTLB;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index a37275bf8e..1509df96b4 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1386,12 +1386,11 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
* This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
* because of the side effect of io_writex changing memory layout.
*/
-static void save_iotlb_data(CPUState *cs, hwaddr addr,
- MemoryRegionSection *section, hwaddr mr_offset)
+static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
+ hwaddr mr_offset)
{
#ifdef CONFIG_PLUGIN
SavedIOTLB *saved = &cs->saved_iotlb;
- saved->addr = addr;
saved->section = section;
saved->mr_offset = mr_offset;
#endif
@@ -1420,7 +1419,7 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
* The memory_region_dispatch may trigger a flush/resize
* so for plugins we save the iotlb_data just in case.
*/
- save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
+ save_iotlb_data(cpu, section, mr_offset);
if (!qemu_mutex_iothread_locked()) {
qemu_mutex_lock_iothread();
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
2022-08-22 23:57 ` [PATCH 01/14] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull Richard Henderson
2022-08-22 23:57 ` [PATCH 02/14] accel/tcg: Drop addr member from SavedIOTLB Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-23 9:19 ` David Hildenbrand
2022-08-22 23:57 ` [PATCH 04/14] accel/tcg: Introduce probe_access_full Richard Henderson
` (11 subsequent siblings)
14 siblings, 1 reply; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo, David Hildenbrand
When PAGE_WRITE_INV is set when calling tlb_set_page,
we immediately set TLB_INVALID_MASK in order to force
tlb_fill to be called on the next lookup. Here in
probe_access_internal, we have just called tlb_fill
and eliminated true misses, thus the lookup must be valid.
This allows us to remove a warning comment from s390x.
There doesn't seem to be a reason to change the code though.
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 10 +++++++++-
target/s390x/tcg/mem_helper.c | 4 ----
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 1509df96b4..5359113e8d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1602,6 +1602,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
}
tlb_addr = tlb_read_ofs(entry, elt_ofs);
+ flags = TLB_FLAGS_MASK;
page_addr = addr & TARGET_PAGE_MASK;
if (!tlb_hit_page(tlb_addr, page_addr)) {
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
@@ -1617,10 +1618,17 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
/* TLB resize via tlb_fill may have moved the entry. */
entry = tlb_entry(env, mmu_idx, addr);
+
+ /*
+ * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
+ * to force the next access through tlb_fill. We've just
+ * called tlb_fill, so we know that this entry *is* valid.
+ */
+ flags &= ~TLB_INVALID_MASK;
}
tlb_addr = tlb_read_ofs(entry, elt_ofs);
}
- flags = tlb_addr & TLB_FLAGS_MASK;
+ flags &= tlb_addr;
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index fc52aa128b..3758b9e688 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -148,10 +148,6 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
#else
int flags;
- /*
- * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
- * to detect if there was an exception during tlb_fill().
- */
env->tlb_fill_exc = 0;
flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
ra);
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 04/14] accel/tcg: Introduce probe_access_full
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (2 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 05/14] accel/tcg: Introduce tlb_set_page_full Richard Henderson
` (10 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Add an interface to return the CPUTLBEntryFull struct
that goes with the lookup. The result is not intended
to be valid across multiple lookups, so the user must
use the results immediately.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/exec-all.h | 11 +++++++++++
accel/tcg/cputlb.c | 44 +++++++++++++++++++++++++----------------
2 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 311e5fb422..e366b5c1ba 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -435,6 +435,17 @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t retaddr);
+#ifndef CONFIG_USER_ONLY
+/**
+ * probe_access_full:
+ * Like probe_access_flags, except also return into @pfull.
+ */
+int probe_access_full(CPUArchState *env, target_ulong addr,
+ MMUAccessType access_type, int mmu_idx,
+ bool nonfault, void **phost,
+ CPUTLBEntryFull **pfull, uintptr_t retaddr);
+#endif
+
#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
/* Estimated block size for TB allocation. */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 5359113e8d..1c59e701e6 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1579,7 +1579,8 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
static int probe_access_internal(CPUArchState *env, target_ulong addr,
int fault_size, MMUAccessType access_type,
int mmu_idx, bool nonfault,
- void **phost, uintptr_t retaddr)
+ void **phost, CPUTLBEntryFull **pfull,
+ uintptr_t retaddr)
{
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -1613,10 +1614,12 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
mmu_idx, nonfault, retaddr)) {
/* Non-faulting page table read failed. */
*phost = NULL;
+ *pfull = NULL;
return TLB_INVALID_MASK;
}
/* TLB resize via tlb_fill may have moved the entry. */
+ index = tlb_index(env, mmu_idx, addr);
entry = tlb_entry(env, mmu_idx, addr);
/*
@@ -1630,6 +1633,8 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
}
flags &= tlb_addr;
+ *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
*phost = NULL;
@@ -1641,37 +1646,44 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
return flags;
}
-int probe_access_flags(CPUArchState *env, target_ulong addr,
- MMUAccessType access_type, int mmu_idx,
- bool nonfault, void **phost, uintptr_t retaddr)
+int probe_access_full(CPUArchState *env, target_ulong addr,
+ MMUAccessType access_type, int mmu_idx,
+ bool nonfault, void **phost, CPUTLBEntryFull **pfull,
+ uintptr_t retaddr)
{
- int flags;
-
- flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
- nonfault, phost, retaddr);
+ int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
+ nonfault, phost, pfull, retaddr);
/* Handle clean RAM pages. */
if (unlikely(flags & TLB_NOTDIRTY)) {
- uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
- notdirty_write(env_cpu(env), addr, 1, full, retaddr);
+ notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
flags &= ~TLB_NOTDIRTY;
}
return flags;
}
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+ MMUAccessType access_type, int mmu_idx,
+ bool nonfault, void **phost, uintptr_t retaddr)
+{
+ CPUTLBEntryFull *full;
+
+ return probe_access_full(env, addr, access_type, mmu_idx,
+ nonfault, phost, &full, retaddr);
+}
+
void *probe_access(CPUArchState *env, target_ulong addr, int size,
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
{
+ CPUTLBEntryFull *full;
void *host;
int flags;
g_assert(-(addr | TARGET_PAGE_MASK) >= size);
flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
- false, &host, retaddr);
+ false, &host, &full, retaddr);
/* Per the interface, size == 0 merely faults the access. */
if (size == 0) {
@@ -1679,9 +1691,6 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
}
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
- uintptr_t index = tlb_index(env, mmu_idx, addr);
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
/* Handle watchpoints. */
if (flags & TLB_WATCHPOINT) {
int wp_access = (access_type == MMU_DATA_STORE
@@ -1702,11 +1711,12 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
MMUAccessType access_type, int mmu_idx)
{
+ CPUTLBEntryFull *full;
void *host;
int flags;
flags = probe_access_internal(env, addr, 0, access_type,
- mmu_idx, true, &host, 0);
+ mmu_idx, true, &host, &full, 0);
/* No combination of flags are expected by the caller. */
return flags ? NULL : host;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 05/14] accel/tcg: Introduce tlb_set_page_full
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (3 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 04/14] accel/tcg: Introduce probe_access_full Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 06/14] include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA Richard Henderson
` (9 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Now that we have collected all of the page data into
CPUTLBEntryFull, provide an interface to record that
all in one go, instead of using 4 arguments. This interface
allows CPUTLBEntryFull to be extended without having to
change the number of arguments.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-defs.h | 14 ++++++++++
include/exec/exec-all.h | 22 +++++++++++++++
accel/tcg/cputlb.c | 62 ++++++++++++++++++++++++++++-------------
3 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index f70f54d850..5e12cc1854 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -148,7 +148,21 @@ typedef struct CPUTLBEntryFull {
* + the offset within the target MemoryRegion (otherwise)
*/
hwaddr xlat_section;
+
+ /*
+ * @phys_addr contains the physical address in the address space
+ * given by cpu_asidx_from_attrs(cpu, @attrs).
+ */
+ hwaddr phys_addr;
+
+ /* @attrs contains the memory transaction attributes for the page. */
MemTxAttrs attrs;
+
+ /* @prot contains the complete protections for the page. */
+ uint8_t prot;
+
+ /* @lg_page_size contains the log2 of the page size. */
+ uint8_t lg_page_size;
} CPUTLBEntryFull;
/*
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e366b5c1ba..e7b54e8e5c 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -258,6 +258,28 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
uint16_t idxmap,
unsigned bits);
+/**
+ * tlb_set_page_full:
+ * @cpu: CPU context
+ * @mmu_idx: mmu index of the tlb to modify
+ * @vaddr: virtual address of the entry to add
+ * @full: the details of the tlb entry
+ *
+ * Add an entry to @cpu tlb index @mmu_idx. All of the fields of
+ * @full must be filled, except for xlat_section, and constitute
+ * the complete description of the translated page.
+ *
+ * This is generally called by the target tlb_fill function after
+ * having performed a successful page table walk to find the physical
+ * address and attributes for the translation.
+ *
+ * At most one entry for a given virtual address is permitted. Only a
+ * single TARGET_PAGE_SIZE region is mapped; @full->ld_page_size is only
+ * used by tlb_flush_page.
+ */
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
+ CPUTLBEntryFull *full);
+
/**
* tlb_set_page_with_attrs:
* @cpu: CPU to add this TLB entry for
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 1c59e701e6..a93d715e42 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1095,16 +1095,16 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
}
-/* Add a new TLB entry. At most one entry for a given virtual address
+/*
+ * Add a new TLB entry. At most one entry for a given virtual address
* is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
* supplied size is only used by tlb_flush_page.
*
* Called from TCG-generated code, which is under an RCU read-side
* critical section.
*/
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
- hwaddr paddr, MemTxAttrs attrs, int prot,
- int mmu_idx, target_ulong size)
+void tlb_set_page_full(CPUState *cpu, int mmu_idx,
+ target_ulong vaddr, CPUTLBEntryFull *full)
{
CPUArchState *env = cpu->env_ptr;
CPUTLB *tlb = env_tlb(env);
@@ -1117,35 +1117,36 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
CPUTLBEntry *te, tn;
hwaddr iotlb, xlat, sz, paddr_page;
target_ulong vaddr_page;
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
- int wp_flags;
+ int asidx, wp_flags, prot;
bool is_ram, is_romd;
assert_cpu_is_self(cpu);
- if (size <= TARGET_PAGE_SIZE) {
+ if (full->lg_page_size <= TARGET_PAGE_BITS) {
sz = TARGET_PAGE_SIZE;
} else {
- tlb_add_large_page(env, mmu_idx, vaddr, size);
- sz = size;
+ sz = (hwaddr)1 << full->lg_page_size;
+ tlb_add_large_page(env, mmu_idx, vaddr, sz);
}
vaddr_page = vaddr & TARGET_PAGE_MASK;
- paddr_page = paddr & TARGET_PAGE_MASK;
+ paddr_page = full->phys_addr & TARGET_PAGE_MASK;
+ prot = full->prot;
+ asidx = cpu_asidx_from_attrs(cpu, full->attrs);
section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
- &xlat, &sz, attrs, &prot);
+ &xlat, &sz, full->attrs, &prot);
assert(sz >= TARGET_PAGE_SIZE);
tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
" prot=%x idx=%d\n",
- vaddr, paddr, prot, mmu_idx);
+ vaddr, full->phys_addr, prot, mmu_idx);
address = vaddr_page;
- if (size < TARGET_PAGE_SIZE) {
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
/* Repeat the MMU check and TLB fill on every access. */
address |= TLB_INVALID_MASK;
}
- if (attrs.byte_swap) {
+ if (full->attrs.byte_swap) {
address |= TLB_BSWAP;
}
@@ -1236,8 +1237,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* subtract here is that of the page base, and not the same as the
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
*/
+ desc->fulltlb[index] = *full;
desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
- desc->fulltlb[index].attrs = attrs;
+ desc->fulltlb[index].phys_addr = paddr_page;
+ desc->fulltlb[index].prot = prot;
/* Now calculate the new entry */
tn.addend = addend - vaddr_page;
@@ -1272,15 +1275,34 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
qemu_spin_unlock(&tlb->c.lock);
}
-/* Add a new TLB entry, but without specifying the memory
- * transaction attributes to be used.
- */
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+ hwaddr paddr, MemTxAttrs attrs, int prot,
+ int mmu_idx, target_ulong size)
+{
+ CPUTLBEntryFull full = {
+ .phys_addr = paddr,
+ .attrs = attrs,
+ .prot = prot,
+ .lg_page_size = ctz64(size)
+ };
+
+ assert(is_power_of_2(size));
+ tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
+}
+
void tlb_set_page(CPUState *cpu, target_ulong vaddr,
hwaddr paddr, int prot,
int mmu_idx, target_ulong size)
{
- tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
- prot, mmu_idx, size);
+ CPUTLBEntryFull full = {
+ .phys_addr = paddr,
+ .attrs = MEMTXATTRS_UNSPECIFIED,
+ .prot = prot,
+ .lg_page_size = ctz64(size)
+ };
+
+ assert(is_power_of_2(size));
+ tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
}
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 06/14] include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (4 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 05/14] accel/tcg: Introduce tlb_set_page_full Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 07/14] target/i386: Use MMUAccessType across excp_helper.c Richard Henderson
` (8 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Allow the target to cache items from the guest page tables.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-defs.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 5e12cc1854..67239b4e5e 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -163,6 +163,15 @@ typedef struct CPUTLBEntryFull {
/* @lg_page_size contains the log2 of the page size. */
uint8_t lg_page_size;
+
+ /*
+ * Allow target-specific additions to this structure.
+ * This may be used to cache items from the guest cpu
+ * page tables for later use by the implementation.
+ */
+#ifdef TARGET_PAGE_ENTRY_EXTRA
+ TARGET_PAGE_ENTRY_EXTRA
+#endif
} CPUTLBEntryFull;
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 07/14] target/i386: Use MMUAccessType across excp_helper.c
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (5 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 06/14] include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 08/14] target/i386: Direct call get_hphys from mmu_translate Richard Henderson
` (7 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Replace int is_write1 and magic numbers with the proper
MMUAccessType access_type and enumerators.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/sysemu/excp_helper.c | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 48feba7e75..414d8032de 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -30,8 +30,10 @@ typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType acc
#define GET_HPHYS(cs, gpa, access_type, prot) \
(get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
-static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
- uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
+static int mmu_translate(CPUState *cs, hwaddr addr,
+ MMUTranslateFunc get_hphys_func,
+ uint64_t cr3, MMUAccessType access_type,
+ int mmu_idx, int pg_mode,
hwaddr *xlat, int *page_size, int *prot)
{
X86CPU *cpu = X86_CPU(cs);
@@ -40,13 +42,13 @@ static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_f
int32_t a20_mask;
target_ulong pde_addr, pte_addr;
int error_code = 0;
- int is_dirty, is_write, is_user;
+ bool is_dirty, is_write, is_user;
uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
uint32_t page_offset;
uint32_t pkr;
is_user = (mmu_idx == MMU_USER_IDX);
- is_write = is_write1 & 1;
+ is_write = (access_type == MMU_DATA_STORE);
a20_mask = x86_get_a20_mask(env);
if (!(pg_mode & PG_MODE_NXE)) {
@@ -264,14 +266,14 @@ do_check_protect_pse36:
}
*prot &= pkr_prot;
- if ((pkr_prot & (1 << is_write1)) == 0) {
- assert(is_write1 != 2);
+ if ((pkr_prot & (1 << access_type)) == 0) {
+ assert(access_type != MMU_INST_FETCH);
error_code |= PG_ERROR_PK_MASK;
goto do_fault_protect;
}
}
- if ((*prot & (1 << is_write1)) == 0) {
+ if ((*prot & (1 << access_type)) == 0) {
goto do_fault_protect;
}
@@ -297,7 +299,7 @@ do_check_protect_pse36:
/* align to page_size */
pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
page_offset = addr & (*page_size - 1);
- *xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
+ *xlat = GET_HPHYS(cs, pte + page_offset, access_type, prot);
return PG_ERROR_OK;
do_fault_rsvd:
@@ -308,7 +310,7 @@ do_check_protect_pse36:
error_code |= (is_write << PG_ERROR_W_BIT);
if (is_user)
error_code |= PG_ERROR_U_MASK;
- if (is_write1 == 2 &&
+ if (access_type == MMU_INST_FETCH &&
((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP)))
error_code |= PG_ERROR_I_D_MASK;
return error_code;
@@ -353,7 +355,7 @@ hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
* 1 = generate PF fault
*/
static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
- int is_write1, int mmu_idx)
+ MMUAccessType access_type, int mmu_idx)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
@@ -365,7 +367,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
#if defined(DEBUG_MMU)
printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
- addr, is_write1, mmu_idx, env->eip);
+ addr, access_type, mmu_idx, env->eip);
#endif
if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -393,7 +395,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
}
}
- error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
+ error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], access_type,
mmu_idx, pg_mode,
&paddr, &page_size, &prot);
}
@@ -404,7 +406,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
vaddr = addr & TARGET_PAGE_MASK;
paddr &= TARGET_PAGE_MASK;
- assert(prot & (1 << is_write1));
+ assert(prot & (1 << access_type));
tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
prot, mmu_idx, page_size);
return 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 08/14] target/i386: Direct call get_hphys from mmu_translate
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (6 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 07/14] target/i386: Use MMUAccessType across excp_helper.c Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 09/14] target/i386: Introduce structures for mmu_translate Richard Henderson
` (6 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Use a boolean to control the call to get_hphys instead
of passing a null function pointer.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/sysemu/excp_helper.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 414d8032de..ea195f7a28 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -24,14 +24,10 @@
#define PG_ERROR_OK (-1)
-typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
- int *prot);
-
#define GET_HPHYS(cs, gpa, access_type, prot) \
- (get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
+ (use_stage2 ? get_hphys(cs, gpa, access_type, prot) : gpa)
-static int mmu_translate(CPUState *cs, hwaddr addr,
- MMUTranslateFunc get_hphys_func,
+static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
uint64_t cr3, MMUAccessType access_type,
int mmu_idx, int pg_mode,
hwaddr *xlat, int *page_size, int *prot)
@@ -329,7 +325,7 @@ hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
return gphys;
}
- exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
+ exit_info_1 = mmu_translate(cs, gphys, false, env->nested_cr3,
access_type, MMU_USER_IDX, env->nested_pg_mode,
&hphys, &page_size, &next_prot);
if (exit_info_1 == PG_ERROR_OK) {
@@ -395,7 +391,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
}
}
- error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], access_type,
+ error_code = mmu_translate(cs, addr, true, env->cr[3], access_type,
mmu_idx, pg_mode,
&paddr, &page_size, &prot);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 09/14] target/i386: Introduce structures for mmu_translate
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (7 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 08/14] target/i386: Direct call get_hphys from mmu_translate Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 10/14] target/i386: Reorg GET_HPHYS Richard Henderson
` (5 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Create TranslateParams for inputs, TranslateResults for successful
outputs, and TranslateFault for error outputs; return true on success.
Move stage1 error paths from handle_mmu_fault to x86_cpu_tlb_fill;
reorg the rest of handle_mmu_fault into get_physical_address.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/sysemu/excp_helper.c | 322 ++++++++++++++-------------
1 file changed, 171 insertions(+), 151 deletions(-)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index ea195f7a28..a6b7562bf3 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -22,30 +22,45 @@
#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
-#define PG_ERROR_OK (-1)
+typedef struct TranslateParams {
+ target_ulong addr;
+ target_ulong cr3;
+ int pg_mode;
+ int mmu_idx;
+ MMUAccessType access_type;
+ bool use_stage2;
+} TranslateParams;
+
+typedef struct TranslateResult {
+ hwaddr paddr;
+ int prot;
+ int page_size;
+} TranslateResult;
+
+typedef struct TranslateFault {
+ int exception_index;
+ int error_code;
+ target_ulong cr2;
+} TranslateFault;
#define GET_HPHYS(cs, gpa, access_type, prot) \
- (use_stage2 ? get_hphys(cs, gpa, access_type, prot) : gpa)
+ (in->use_stage2 ? get_hphys(cs, gpa, access_type, prot) : gpa)
-static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
- uint64_t cr3, MMUAccessType access_type,
- int mmu_idx, int pg_mode,
- hwaddr *xlat, int *page_size, int *prot)
+static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
+ TranslateResult *out, TranslateFault *err)
{
- X86CPU *cpu = X86_CPU(cs);
- CPUX86State *env = &cpu->env;
+ CPUState *cs = env_cpu(env);
+ X86CPU *cpu = env_archcpu(env);
+ const int32_t a20_mask = x86_get_a20_mask(env);
+ const target_ulong addr = in->addr;
+ const int pg_mode = in->pg_mode;
+ const bool is_user = (in->mmu_idx == MMU_USER_IDX);
+ const MMUAccessType access_type = in->access_type;
uint64_t ptep, pte;
- int32_t a20_mask;
- target_ulong pde_addr, pte_addr;
- int error_code = 0;
- bool is_dirty, is_write, is_user;
+ hwaddr pde_addr, pte_addr;
uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
- uint32_t page_offset;
uint32_t pkr;
-
- is_user = (mmu_idx == MMU_USER_IDX);
- is_write = (access_type == MMU_DATA_STORE);
- a20_mask = x86_get_a20_mask(env);
+ int page_size;
if (!(pg_mode & PG_MODE_NXE)) {
rsvd_mask |= PG_NX_MASK;
@@ -62,7 +77,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
uint64_t pml4e_addr, pml4e;
if (la57) {
- pml5e_addr = ((cr3 & ~0xfff) +
+ pml5e_addr = ((in->cr3 & ~0xfff) +
(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
pml5e = x86_ldq_phys(cs, pml5e_addr);
@@ -78,7 +93,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
}
ptep = pml5e ^ PG_NX_MASK;
} else {
- pml5e = cr3;
+ pml5e = in->cr3;
ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
}
@@ -114,7 +129,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
}
if (pdpe & PG_PSE_MASK) {
/* 1 GB page */
- *page_size = 1024 * 1024 * 1024;
+ page_size = 1024 * 1024 * 1024;
pte_addr = pdpe_addr;
pte = pdpe;
goto do_check_protect;
@@ -123,7 +138,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
#endif
{
/* XXX: load them when cr3 is loaded ? */
- pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
+ pdpe_addr = ((in->cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
a20_mask;
pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
pdpe = x86_ldq_phys(cs, pdpe_addr);
@@ -150,7 +165,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
ptep &= pde ^ PG_NX_MASK;
if (pde & PG_PSE_MASK) {
/* 2 MB page */
- *page_size = 2048 * 1024;
+ page_size = 2048 * 1024;
pte_addr = pde_addr;
pte = pde;
goto do_check_protect;
@@ -172,12 +187,12 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
}
/* combine pde and pte nx, user and rw protections */
ptep &= pte ^ PG_NX_MASK;
- *page_size = 4096;
+ page_size = 4096;
} else {
uint32_t pde;
/* page directory entry */
- pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
+ pde_addr = ((in->cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
a20_mask;
pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
pde = x86_ldl_phys(cs, pde_addr);
@@ -188,7 +203,7 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
/* if PSE bit is set, then we use a 4MB page */
if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
- *page_size = 4096 * 1024;
+ page_size = 4096 * 1024;
pte_addr = pde_addr;
/* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
@@ -214,12 +229,12 @@ static int mmu_translate(CPUState *cs, hwaddr addr, bool use_stage2,
}
/* combine pde and pte user and rw protections */
ptep &= pte | PG_NX_MASK;
- *page_size = 4096;
+ page_size = 4096;
rsvd_mask = 0;
}
do_check_protect:
- rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
+ rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
do_check_protect_pse36:
if (pte & rsvd_mask) {
goto do_fault_rsvd;
@@ -231,17 +246,17 @@ do_check_protect_pse36:
goto do_fault_protect;
}
- *prot = 0;
- if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
- *prot |= PAGE_READ;
+ int prot = 0;
+ if (in->mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
+ prot |= PAGE_READ;
if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
- *prot |= PAGE_WRITE;
+ prot |= PAGE_WRITE;
}
}
if (!(ptep & PG_NX_MASK) &&
- (mmu_idx == MMU_USER_IDX ||
+ (is_user ||
!((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
- *prot |= PAGE_EXEC;
+ prot |= PAGE_EXEC;
}
if (ptep & PG_USER_MASK) {
@@ -260,164 +275,151 @@ do_check_protect_pse36:
} else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
pkr_prot &= ~PAGE_WRITE;
}
-
- *prot &= pkr_prot;
if ((pkr_prot & (1 << access_type)) == 0) {
- assert(access_type != MMU_INST_FETCH);
- error_code |= PG_ERROR_PK_MASK;
- goto do_fault_protect;
+ goto do_fault_pk_protect;
}
+ prot &= pkr_prot;
}
- if ((*prot & (1 << access_type)) == 0) {
+ if ((prot & (1 << access_type)) == 0) {
goto do_fault_protect;
}
/* yes, it can! */
- is_dirty = is_write && !(pte & PG_DIRTY_MASK);
- if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
- pte |= PG_ACCESSED_MASK;
- if (is_dirty) {
- pte |= PG_DIRTY_MASK;
+ {
+ uint32_t set = PG_ACCESSED_MASK;
+ if (access_type == MMU_DATA_STORE) {
+ set |= PG_DIRTY_MASK;
+ }
+ if (set & ~pte) {
+ pte |= set;
+ x86_stl_phys_notdirty(cs, pte_addr, pte);
}
- x86_stl_phys_notdirty(cs, pte_addr, pte);
}
if (!(pte & PG_DIRTY_MASK)) {
/* only set write access if already dirty... otherwise wait
for dirty access */
- assert(!is_write);
- *prot &= ~PAGE_WRITE;
+ assert(access_type != MMU_DATA_STORE);
+ prot &= ~PAGE_WRITE;
}
-
- pte = pte & a20_mask;
+ out->prot = prot;
+ out->page_size = page_size;
/* align to page_size */
- pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
- page_offset = addr & (*page_size - 1);
- *xlat = GET_HPHYS(cs, pte + page_offset, access_type, prot);
- return PG_ERROR_OK;
+ out->paddr = (pte & a20_mask & PG_ADDRESS_MASK & ~(page_size - 1))
+ | (addr & (page_size - 1));
+ out->paddr = GET_HPHYS(cs, out->paddr, access_type, &out->prot);
+ return true;
+ int error_code;
do_fault_rsvd:
- error_code |= PG_ERROR_RSVD_MASK;
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault_cont;
do_fault_protect:
- error_code |= PG_ERROR_P_MASK;
+ error_code = PG_ERROR_P_MASK;
+ goto do_fault_cont;
+ do_fault_pk_protect:
+ assert(access_type != MMU_INST_FETCH);
+ error_code = PG_ERROR_PK_MASK | PG_ERROR_P_MASK;
+ goto do_fault_cont;
do_fault:
- error_code |= (is_write << PG_ERROR_W_BIT);
- if (is_user)
+ error_code = 0;
+ do_fault_cont:
+ if (is_user) {
error_code |= PG_ERROR_U_MASK;
- if (access_type == MMU_INST_FETCH &&
- ((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP)))
- error_code |= PG_ERROR_I_D_MASK;
- return error_code;
+ }
+ switch (access_type) {
+ case MMU_DATA_LOAD:
+ break;
+ case MMU_DATA_STORE:
+ error_code |= PG_ERROR_W_MASK;
+ break;
+ case MMU_INST_FETCH:
+ if (pg_mode & (PG_MODE_NXE | PG_MODE_SMEP)) {
+ error_code |= PG_ERROR_I_D_MASK;
+ }
+ break;
+ }
+ err->exception_index = EXCP0E_PAGE;
+ err->error_code = error_code;
+ err->cr2 = addr;
+ return false;
}
hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
- int *prot)
+ int *prot)
{
CPUX86State *env = &X86_CPU(cs)->env;
- uint64_t exit_info_1;
- int page_size;
- int next_prot;
- hwaddr hphys;
if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
return gphys;
- }
+ } else {
+ TranslateParams in = {
+ .addr = gphys,
+ .cr3 = env->nested_cr3,
+ .pg_mode = env->nested_pg_mode,
+ .mmu_idx = MMU_USER_IDX,
+ .access_type = access_type,
+ .use_stage2 = false,
+ };
+ TranslateResult out;
+ TranslateFault err;
+ uint64_t exit_info_1;
- exit_info_1 = mmu_translate(cs, gphys, false, env->nested_cr3,
- access_type, MMU_USER_IDX, env->nested_pg_mode,
- &hphys, &page_size, &next_prot);
- if (exit_info_1 == PG_ERROR_OK) {
- if (prot) {
- *prot &= next_prot;
+ if (mmu_translate(env, &in, &out, &err)) {
+ if (prot) {
+ *prot &= out.prot;
+ }
+ return out.paddr;
}
- return hphys;
- }
- x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
- gphys);
- if (prot) {
- exit_info_1 |= SVM_NPTEXIT_GPA;
- } else { /* page table access */
- exit_info_1 |= SVM_NPTEXIT_GPT;
+ x86_stq_phys(cs, env->vm_vmcb +
+ offsetof(struct vmcb, control.exit_info_2), gphys);
+ exit_info_1 = err.error_code
+ | (prot ? SVM_NPTEXIT_GPA : SVM_NPTEXIT_GPT);
+ cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
}
- cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
}
-/* return value:
- * -1 = cannot handle fault
- * 0 = nothing more to do
- * 1 = generate PF fault
- */
-static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
- MMUAccessType access_type, int mmu_idx)
+static bool get_physical_address(CPUX86State *env, vaddr addr,
+ MMUAccessType access_type, int mmu_idx,
+ TranslateResult *out, TranslateFault *err)
{
- X86CPU *cpu = X86_CPU(cs);
- CPUX86State *env = &cpu->env;
- int error_code = PG_ERROR_OK;
- int pg_mode, prot, page_size;
- int32_t a20_mask;
- hwaddr paddr;
- hwaddr vaddr;
-
-#if defined(DEBUG_MMU)
- printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
- addr, access_type, mmu_idx, env->eip);
-#endif
-
if (!(env->cr[0] & CR0_PG_MASK)) {
- a20_mask = x86_get_a20_mask(env);
- paddr = addr & a20_mask;
+ out->paddr = addr & x86_get_a20_mask(env);
+
#ifdef TARGET_X86_64
if (!(env->hflags & HF_LMA_MASK)) {
/* Without long mode we can only address 32bits in real mode */
- paddr = (uint32_t)paddr;
+ out->paddr = (uint32_t)out->paddr;
}
#endif
- prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- page_size = 4096;
+ out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ out->page_size = TARGET_PAGE_SIZE;
+ return true;
} else {
- pg_mode = get_pg_mode(env);
- if (pg_mode & PG_MODE_LMA) {
- int32_t sext;
+ TranslateParams in = {
+ .addr = addr,
+ .cr3 = env->cr[3],
+ .pg_mode = get_pg_mode(env),
+ .mmu_idx = mmu_idx,
+ .access_type = access_type,
+ .use_stage2 = true
+ };
+ if (in.pg_mode & PG_MODE_LMA) {
/* test virtual address sign extension */
- sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
+ int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
+ int64_t sext = (int64_t)addr >> shift;
if (sext != 0 && sext != -1) {
- env->error_code = 0;
- cs->exception_index = EXCP0D_GPF;
- return 1;
+ err->exception_index = EXCP0D_GPF;
+ err->error_code = 0;
+ err->cr2 = addr;
+ return false;
}
}
-
- error_code = mmu_translate(cs, addr, true, env->cr[3], access_type,
- mmu_idx, pg_mode,
- &paddr, &page_size, &prot);
- }
-
- if (error_code == PG_ERROR_OK) {
- /* Even if 4MB pages, we map only one 4KB page in the cache to
- avoid filling it too fast */
- vaddr = addr & TARGET_PAGE_MASK;
- paddr &= TARGET_PAGE_MASK;
-
- assert(prot & (1 << access_type));
- tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
- prot, mmu_idx, page_size);
- return 0;
- } else {
- if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
- /* cr2 is not modified in case of exceptions */
- x86_stq_phys(cs,
- env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
- addr);
- } else {
- env->cr[2] = addr;
- }
- env->error_code = error_code;
- cs->exception_index = EXCP0E_PAGE;
- return 1;
+ return mmu_translate(env, &in, out, err);
}
}
@@ -425,15 +427,33 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr)
{
- X86CPU *cpu = X86_CPU(cs);
- CPUX86State *env = &cpu->env;
+ CPUX86State *env = cs->env_ptr;
+ TranslateResult out;
+ TranslateFault err;
- env->retaddr = retaddr;
- if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
- /* FIXME: On error in get_hphys we have already jumped out. */
- g_assert(!probe);
- raise_exception_err_ra(env, cs->exception_index,
- env->error_code, retaddr);
+ if (get_physical_address(env, addr, access_type, mmu_idx, &out, &err)) {
+ /*
+ * Even if 4MB pages, we map only one 4KB page in the cache to
+ * avoid filling it too fast.
+ */
+ assert(out.prot & (1 << access_type));
+ tlb_set_page_with_attrs(cs, addr & TARGET_PAGE_MASK,
+ out.paddr & TARGET_PAGE_MASK,
+ cpu_get_mem_attrs(env),
+ out.prot, mmu_idx, out.page_size);
+ return true;
}
- return true;
+
+ /* FIXME: On error in get_hphys we have already jumped out. */
+ g_assert(!probe);
+
+ if (env->intercept_exceptions & (1 << err.exception_index)) {
+ /* cr2 is not modified in case of exceptions */
+ x86_stq_phys(cs, env->vm_vmcb +
+ offsetof(struct vmcb, control.exit_info_2),
+ err.cr2);
+ } else {
+ env->cr[2] = err.cr2;
+ }
+ raise_exception_err_ra(env, err.exception_index, err.error_code, retaddr);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 10/14] target/i386: Reorg GET_HPHYS
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (8 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 09/14] target/i386: Introduce structures for mmu_translate Richard Henderson
@ 2022-08-22 23:57 ` Richard Henderson
2022-08-22 23:58 ` [PATCH 11/14] target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX Richard Henderson
` (4 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:57 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Replace with PTE_HPHYS for the page table walk, and a direct call
to mmu_translate for the final stage2 translation. Hoist the check
for HF2_NPT_MASK out to get_physical_address, which avoids the
recursive call when stage2 is disabled.
We can now return all the way out to x86_cpu_tlb_fill before raising
an exception, which means probe works.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/sysemu/excp_helper.c | 123 +++++++++++++++++++++------
1 file changed, 95 insertions(+), 28 deletions(-)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index a6b7562bf3..e9adaa3dad 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -37,18 +37,43 @@ typedef struct TranslateResult {
int page_size;
} TranslateResult;
+typedef enum TranslateFaultStage2 {
+ S2_NONE,
+ S2_GPA,
+ S2_GPT,
+} TranslateFaultStage2;
+
typedef struct TranslateFault {
int exception_index;
int error_code;
target_ulong cr2;
+ TranslateFaultStage2 stage2;
} TranslateFault;
-#define GET_HPHYS(cs, gpa, access_type, prot) \
- (in->use_stage2 ? get_hphys(cs, gpa, access_type, prot) : gpa)
+#define PTE_HPHYS(ADDR) \
+ do { \
+ if (in->use_stage2) { \
+ nested_in.addr = (ADDR); \
+ if (!mmu_translate(env, &nested_in, out, err)) { \
+ err->stage2 = S2_GPT; \
+ return false; \
+ } \
+ (ADDR) = out->paddr; \
+ } \
+ } while (0)
static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
TranslateResult *out, TranslateFault *err)
{
+ TranslateParams nested_in = {
+ /* Use store for page table entries, to allow A/D flag updates. */
+ .access_type = MMU_DATA_STORE,
+ .cr3 = env->nested_cr3,
+ .pg_mode = env->nested_pg_mode,
+ .mmu_idx = MMU_USER_IDX,
+ .use_stage2 = false,
+ };
+
CPUState *cs = env_cpu(env);
X86CPU *cpu = env_archcpu(env);
const int32_t a20_mask = x86_get_a20_mask(env);
@@ -79,7 +104,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
if (la57) {
pml5e_addr = ((in->cr3 & ~0xfff) +
(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
- pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pml5e_addr);
pml5e = x86_ldq_phys(cs, pml5e_addr);
if (!(pml5e & PG_PRESENT_MASK)) {
goto do_fault;
@@ -99,7 +124,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
(((addr >> 39) & 0x1ff) << 3)) & a20_mask;
- pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pml4e_addr);
pml4e = x86_ldq_phys(cs, pml4e_addr);
if (!(pml4e & PG_PRESENT_MASK)) {
goto do_fault;
@@ -114,7 +139,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
ptep &= pml4e ^ PG_NX_MASK;
pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
a20_mask;
- pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pdpe_addr);
pdpe = x86_ldq_phys(cs, pdpe_addr);
if (!(pdpe & PG_PRESENT_MASK)) {
goto do_fault;
@@ -140,7 +165,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
/* XXX: load them when cr3 is loaded ? */
pdpe_addr = ((in->cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
a20_mask;
- pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pdpe_addr);
pdpe = x86_ldq_phys(cs, pdpe_addr);
if (!(pdpe & PG_PRESENT_MASK)) {
goto do_fault;
@@ -154,7 +179,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
a20_mask;
- pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pde_addr);
pde = x86_ldq_phys(cs, pde_addr);
if (!(pde & PG_PRESENT_MASK)) {
goto do_fault;
@@ -177,7 +202,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
}
pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
a20_mask;
- pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pte_addr);
pte = x86_ldq_phys(cs, pte_addr);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
@@ -194,7 +219,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
/* page directory entry */
pde_addr = ((in->cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
a20_mask;
- pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pde_addr);
pde = x86_ldl_phys(cs, pde_addr);
if (!(pde & PG_PRESENT_MASK)) {
goto do_fault;
@@ -222,7 +247,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
/* page directory entry */
pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
a20_mask;
- pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
+ PTE_HPHYS(pte_addr);
pte = x86_ldl_phys(cs, pte_addr);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
@@ -303,13 +328,31 @@ do_check_protect_pse36:
assert(access_type != MMU_DATA_STORE);
prot &= ~PAGE_WRITE;
}
- out->prot = prot;
- out->page_size = page_size;
/* align to page_size */
out->paddr = (pte & a20_mask & PG_ADDRESS_MASK & ~(page_size - 1))
| (addr & (page_size - 1));
- out->paddr = GET_HPHYS(cs, out->paddr, access_type, &out->prot);
+
+ if (in->use_stage2) {
+ nested_in.addr = out->paddr;
+ nested_in.access_type = access_type;
+
+ if (!mmu_translate(env, &nested_in, out, err)) {
+ err->stage2 = S2_GPA;
+ return false;
+ }
+
+ /* Merge stage1 & stage2 protection bits. */
+ prot &= out->prot;
+
+ /* Re-verify resulting protection. */
+ if ((prot & (1 << access_type)) == 0) {
+ goto do_fault_protect;
+ }
+ }
+
+ out->prot = prot;
+ out->page_size = page_size;
return true;
int error_code;
@@ -344,13 +387,36 @@ do_check_protect_pse36:
err->exception_index = EXCP0E_PAGE;
err->error_code = error_code;
err->cr2 = addr;
+ err->stage2 = S2_NONE;
return false;
}
+static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err,
+ uintptr_t retaddr)
+{
+ uint64_t exit_info_1 = err->error_code;
+
+ switch (err->stage2) {
+ case S2_GPT:
+ exit_info_1 |= SVM_NPTEXIT_GPT;
+ break;
+ case S2_GPA:
+ exit_info_1 |= SVM_NPTEXIT_GPA;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ x86_stq_phys(env_cpu(env),
+ env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+ err->cr2);
+ cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr);
+}
+
hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
int *prot)
{
- CPUX86State *env = &X86_CPU(cs)->env;
+ CPUX86State *env = cs->env_ptr;
if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
return gphys;
@@ -365,20 +431,16 @@ hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
};
TranslateResult out;
TranslateFault err;
- uint64_t exit_info_1;
- if (mmu_translate(env, &in, &out, &err)) {
- if (prot) {
- *prot &= out.prot;
- }
- return out.paddr;
+ if (!mmu_translate(env, &in, &out, &err)) {
+ err.stage2 = prot ? SVM_NPTEXIT_GPA : SVM_NPTEXIT_GPT;
+ raise_stage2(env, &err, env->retaddr);
}
- x86_stq_phys(cs, env->vm_vmcb +
- offsetof(struct vmcb, control.exit_info_2), gphys);
- exit_info_1 = err.error_code
- | (prot ? SVM_NPTEXIT_GPA : SVM_NPTEXIT_GPT);
- cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
+ if (prot) {
+ *prot &= out.prot;
+ }
+ return out.paddr;
}
}
@@ -405,7 +467,7 @@ static bool get_physical_address(CPUX86State *env, vaddr addr,
.pg_mode = get_pg_mode(env),
.mmu_idx = mmu_idx,
.access_type = access_type,
- .use_stage2 = true
+ .use_stage2 = env->hflags2 & HF2_NPT_MASK,
};
if (in.pg_mode & PG_MODE_LMA) {
@@ -444,8 +506,13 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
return true;
}
- /* FIXME: On error in get_hphys we have already jumped out. */
- g_assert(!probe);
+ if (probe) {
+ return false;
+ }
+
+ if (err.stage2 != S2_NONE) {
+ raise_stage2(env, &err, retaddr);
+ }
if (env->intercept_exceptions & (1 << err.exception_index)) {
/* cr2 is not modified in case of exceptions */
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 11/14] target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (9 preceding siblings ...)
2022-08-22 23:57 ` [PATCH 10/14] target/i386: Reorg GET_HPHYS Richard Henderson
@ 2022-08-22 23:58 ` Richard Henderson
2022-08-22 23:58 ` [PATCH 12/14] target/i386: Use MMU_NESTED_IDX for vmload/vmsave Richard Henderson
` (3 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:58 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
These new mmu indexes will be helpful for improving
paging and code throughout the target.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu-param.h | 2 +-
target/i386/cpu.h | 3 +
target/i386/tcg/sysemu/excp_helper.c | 82 ++++++++++++++++++----------
target/i386/tcg/sysemu/svm_helper.c | 3 +
4 files changed, 60 insertions(+), 30 deletions(-)
diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h
index 9740bd7abd..abad52af20 100644
--- a/target/i386/cpu-param.h
+++ b/target/i386/cpu-param.h
@@ -23,6 +23,6 @@
# define TARGET_VIRT_ADDR_SPACE_BITS 32
#endif
#define TARGET_PAGE_BITS 12
-#define NB_MMU_MODES 3
+#define NB_MMU_MODES 5
#endif
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 82004b65b9..9a40b54ae5 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2144,6 +2144,9 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define MMU_KSMAP_IDX 0
#define MMU_USER_IDX 1
#define MMU_KNOSMAP_IDX 2
+#define MMU_NESTED_IDX 3
+#define MMU_PHYS_IDX 4
+
static inline int cpu_mmu_index(CPUX86State *env, bool ifetch)
{
return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index e9adaa3dad..f771d25f32 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -448,41 +448,65 @@ static bool get_physical_address(CPUX86State *env, vaddr addr,
MMUAccessType access_type, int mmu_idx,
TranslateResult *out, TranslateFault *err)
{
- if (!(env->cr[0] & CR0_PG_MASK)) {
- out->paddr = addr & x86_get_a20_mask(env);
+ TranslateParams in;
+ bool use_stage2 = env->hflags2 & HF2_NPT_MASK;
-#ifdef TARGET_X86_64
- if (!(env->hflags & HF_LMA_MASK)) {
- /* Without long mode we can only address 32bits in real mode */
- out->paddr = (uint32_t)out->paddr;
- }
-#endif
- out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- out->page_size = TARGET_PAGE_SIZE;
- return true;
- } else {
- TranslateParams in = {
- .addr = addr,
- .cr3 = env->cr[3],
- .pg_mode = get_pg_mode(env),
- .mmu_idx = mmu_idx,
- .access_type = access_type,
- .use_stage2 = env->hflags2 & HF2_NPT_MASK,
- };
+ in.addr = addr;
+ in.access_type = access_type;
- if (in.pg_mode & PG_MODE_LMA) {
- /* test virtual address sign extension */
- int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
- int64_t sext = (int64_t)addr >> shift;
- if (sext != 0 && sext != -1) {
- err->exception_index = EXCP0D_GPF;
- err->error_code = 0;
- err->cr2 = addr;
+ switch (mmu_idx) {
+ case MMU_PHYS_IDX:
+ break;
+
+ case MMU_NESTED_IDX:
+ if (likely(use_stage2)) {
+ in.cr3 = env->nested_cr3;
+ in.pg_mode = env->nested_pg_mode;
+ in.mmu_idx = MMU_USER_IDX;
+ in.use_stage2 = false;
+
+ if (!mmu_translate(env, &in, out, err)) {
+ err->stage2 = S2_GPA;
return false;
}
+ return true;
}
- return mmu_translate(env, &in, out, err);
+ break;
+
+ default:
+ in.cr3 = env->cr[3];
+ in.mmu_idx = mmu_idx;
+ in.use_stage2 = use_stage2;
+ in.pg_mode = get_pg_mode(env);
+
+ if (likely(in.pg_mode)) {
+ if (in.pg_mode & PG_MODE_LMA) {
+ /* test virtual address sign extension */
+ int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
+ int64_t sext = (int64_t)addr >> shift;
+ if (sext != 0 && sext != -1) {
+ err->exception_index = EXCP0D_GPF;
+ err->error_code = 0;
+ err->cr2 = addr;
+ return false;
+ }
+ }
+ return mmu_translate(env, &in, out, err);
+ }
+ break;
}
+
+ /* Translation disabled. */
+ out->paddr = addr & x86_get_a20_mask(env);
+#ifdef TARGET_X86_64
+ if (!(env->hflags & HF_LMA_MASK)) {
+ /* Without long mode we can only address 32bits in real mode */
+ out->paddr = (uint32_t)out->paddr;
+ }
+#endif
+ out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ out->page_size = TARGET_PAGE_SIZE;
+ return true;
}
bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 2b6f450af9..85b7741d94 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -271,6 +271,8 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
env->hflags2 |= HF2_NPT_MASK;
env->nested_pg_mode = get_pg_mode(env) & PG_MODE_SVM_MASK;
+
+ tlb_flush_by_mmuidx(cs, 1 << MMU_NESTED_IDX);
}
/* enable intercepts */
@@ -720,6 +722,7 @@ void do_vmexit(CPUX86State *env)
env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
}
env->hflags2 &= ~HF2_NPT_MASK;
+ tlb_flush_by_mmuidx(cs, 1 << MMU_NESTED_IDX);
/* Save the VM state in the vmcb */
svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 12/14] target/i386: Use MMU_NESTED_IDX for vmload/vmsave
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (10 preceding siblings ...)
2022-08-22 23:58 ` [PATCH 11/14] target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX Richard Henderson
@ 2022-08-22 23:58 ` Richard Henderson
2022-08-22 23:58 ` [PATCH 13/14] target/i386: Combine 5 sets of variables in mmu_translate Richard Henderson
` (2 subsequent siblings)
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:58 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Use MMU_NESTED_IDX for each memory access, rather than
just a single translation to physical. Adjust svm_save_seg
and svm_load_seg to pass in mmu_idx.
This removes the last use of get_hphys so remove it.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 2 -
target/i386/tcg/sysemu/excp_helper.c | 31 ----
target/i386/tcg/sysemu/svm_helper.c | 231 +++++++++++++++------------
3 files changed, 126 insertions(+), 138 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 9a40b54ae5..10a5e79774 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2381,8 +2381,6 @@ static inline bool ctl_has_irq(CPUX86State *env)
return (env->int_ctl & V_IRQ_MASK) && (int_prio >= tpr);
}
-hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
- int *prot);
#if defined(TARGET_X86_64) && \
defined(CONFIG_USER_ONLY) && \
defined(CONFIG_LINUX)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index f771d25f32..11f64e5965 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -413,37 +413,6 @@ static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err,
cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr);
}
-hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
- int *prot)
-{
- CPUX86State *env = cs->env_ptr;
-
- if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
- return gphys;
- } else {
- TranslateParams in = {
- .addr = gphys,
- .cr3 = env->nested_cr3,
- .pg_mode = env->nested_pg_mode,
- .mmu_idx = MMU_USER_IDX,
- .access_type = access_type,
- .use_stage2 = false,
- };
- TranslateResult out;
- TranslateFault err;
-
- if (!mmu_translate(env, &in, &out, &err)) {
- err.stage2 = prot ? SVM_NPTEXIT_GPA : SVM_NPTEXIT_GPT;
- raise_stage2(env, &err, env->retaddr);
- }
-
- if (prot) {
- *prot &= out.prot;
- }
- return out.paddr;
- }
-}
-
static bool get_physical_address(CPUX86State *env, vaddr addr,
MMUAccessType access_type, int mmu_idx,
TranslateResult *out, TranslateFault *err)
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 85b7741d94..8e88567399 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -27,19 +27,19 @@
/* Secure Virtual Machine helpers */
-static inline void svm_save_seg(CPUX86State *env, hwaddr addr,
- const SegmentCache *sc)
+static void svm_save_seg(CPUX86State *env, int mmu_idx, hwaddr addr,
+ const SegmentCache *sc)
{
- CPUState *cs = env_cpu(env);
-
- x86_stw_phys(cs, addr + offsetof(struct vmcb_seg, selector),
- sc->selector);
- x86_stq_phys(cs, addr + offsetof(struct vmcb_seg, base),
- sc->base);
- x86_stl_phys(cs, addr + offsetof(struct vmcb_seg, limit),
- sc->limit);
- x86_stw_phys(cs, addr + offsetof(struct vmcb_seg, attrib),
- ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
+ cpu_stw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, selector),
+ sc->selector, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, base),
+ sc->base, mmu_idx, 0);
+ cpu_stl_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, limit),
+ sc->limit, mmu_idx, 0);
+ cpu_stw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, attrib),
+ ((sc->flags >> 8) & 0xff)
+ | ((sc->flags >> 12) & 0x0f00),
+ mmu_idx, 0);
}
/*
@@ -52,29 +52,36 @@ static inline void svm_canonicalization(CPUX86State *env, target_ulong *seg_base
*seg_base = ((((long) *seg_base) << shift_amt) >> shift_amt);
}
-static inline void svm_load_seg(CPUX86State *env, hwaddr addr,
- SegmentCache *sc)
+static void svm_load_seg(CPUX86State *env, int mmu_idx, hwaddr addr,
+ SegmentCache *sc)
{
- CPUState *cs = env_cpu(env);
unsigned int flags;
- sc->selector = x86_lduw_phys(cs,
- addr + offsetof(struct vmcb_seg, selector));
- sc->base = x86_ldq_phys(cs, addr + offsetof(struct vmcb_seg, base));
- sc->limit = x86_ldl_phys(cs, addr + offsetof(struct vmcb_seg, limit));
- flags = x86_lduw_phys(cs, addr + offsetof(struct vmcb_seg, attrib));
+ sc->selector =
+ cpu_lduw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, selector),
+ mmu_idx, 0);
+ sc->base =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, base),
+ mmu_idx, 0);
+ sc->limit =
+ cpu_ldl_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, limit),
+ mmu_idx, 0);
+ flags =
+ cpu_lduw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, attrib),
+ mmu_idx, 0);
sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
+
svm_canonicalization(env, &sc->base);
}
-static inline void svm_load_seg_cache(CPUX86State *env, hwaddr addr,
- int seg_reg)
+static void svm_load_seg_cache(CPUX86State *env, int mmu_idx,
+ hwaddr addr, int seg_reg)
{
- SegmentCache sc1, *sc = &sc1;
+ SegmentCache sc;
- svm_load_seg(env, addr, sc);
- cpu_x86_load_seg_cache(env, seg_reg, sc->selector,
- sc->base, sc->limit, sc->flags);
+ svm_load_seg(env, mmu_idx, addr, &sc);
+ cpu_x86_load_seg_cache(env, seg_reg, sc.selector,
+ sc.base, sc.limit, sc.flags);
}
static inline bool is_efer_invalid_state (CPUX86State *env)
@@ -199,13 +206,17 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
env->vm_hsave + offsetof(struct vmcb, save.rflags),
cpu_compute_eflags(env));
- svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.es),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.es),
&env->segs[R_ES]);
- svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.cs),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.cs),
&env->segs[R_CS]);
- svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.ss),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.ss),
&env->segs[R_SS]);
- svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.ds),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.ds),
&env->segs[R_DS]);
x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rip),
@@ -325,18 +336,18 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
save.rflags)),
~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
- svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
- R_ES);
- svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.cs),
- R_CS);
- svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.ss),
- R_SS);
- svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.ds),
- R_DS);
- svm_load_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.idtr),
- &env->idt);
- svm_load_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.gdtr),
- &env->gdt);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.es), R_ES);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.cs), R_CS);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.ss), R_SS);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.ds), R_DS);
+ svm_load_seg(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.idtr), &env->idt);
+ svm_load_seg(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.gdtr), &env->gdt);
env->eip = x86_ldq_phys(cs,
env->vm_vmcb + offsetof(struct vmcb, save.rip));
@@ -451,9 +462,8 @@ void helper_vmmcall(CPUX86State *env)
void helper_vmload(CPUX86State *env, int aflag)
{
- CPUState *cs = env_cpu(env);
+ int mmu_idx = MMU_PHYS_IDX;
target_ulong addr;
- int prot;
cpu_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0, GETPC());
@@ -464,43 +474,52 @@ void helper_vmload(CPUX86State *env, int aflag)
}
if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMLOAD, GETPC())) {
- addr = get_hphys(cs, addr, MMU_DATA_LOAD, &prot);
+ mmu_idx = MMU_NESTED_IDX;
}
- qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx
- "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
- addr, x86_ldq_phys(cs, addr + offsetof(struct vmcb,
- save.fs.base)),
- env->segs[R_FS].base);
-
- svm_load_seg_cache(env, addr + offsetof(struct vmcb, save.fs), R_FS);
- svm_load_seg_cache(env, addr + offsetof(struct vmcb, save.gs), R_GS);
- svm_load_seg(env, addr + offsetof(struct vmcb, save.tr), &env->tr);
- svm_load_seg(env, addr + offsetof(struct vmcb, save.ldtr), &env->ldt);
+ svm_load_seg_cache(env, mmu_idx,
+ addr + offsetof(struct vmcb, save.fs), R_FS);
+ svm_load_seg_cache(env, mmu_idx,
+ addr + offsetof(struct vmcb, save.gs), R_GS);
+ svm_load_seg(env, mmu_idx,
+ addr + offsetof(struct vmcb, save.tr), &env->tr);
+ svm_load_seg(env, mmu_idx,
+ addr + offsetof(struct vmcb, save.ldtr), &env->ldt);
#ifdef TARGET_X86_64
- env->kernelgsbase = x86_ldq_phys(cs, addr + offsetof(struct vmcb,
- save.kernel_gs_base));
- env->lstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.lstar));
- env->cstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.cstar));
- env->fmask = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sfmask));
+ env->kernelgsbase =
+ cpu_ldq_mmuidx_ra(env,
+ addr + offsetof(struct vmcb, save.kernel_gs_base),
+ mmu_idx, 0);
+ env->lstar =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.lstar),
+ mmu_idx, 0);
+ env->cstar =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.cstar),
+ mmu_idx, 0);
+ env->fmask =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sfmask),
+ mmu_idx, 0);
svm_canonicalization(env, &env->kernelgsbase);
#endif
- env->star = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.star));
- env->sysenter_cs = x86_ldq_phys(cs,
- addr + offsetof(struct vmcb, save.sysenter_cs));
- env->sysenter_esp = x86_ldq_phys(cs, addr + offsetof(struct vmcb,
- save.sysenter_esp));
- env->sysenter_eip = x86_ldq_phys(cs, addr + offsetof(struct vmcb,
- save.sysenter_eip));
-
+ env->star =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.star),
+ mmu_idx, 0);
+ env->sysenter_cs =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_cs),
+ mmu_idx, 0);
+ env->sysenter_esp =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_esp),
+ mmu_idx, 0);
+ env->sysenter_eip =
+ cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_eip),
+ mmu_idx, 0);
}
void helper_vmsave(CPUX86State *env, int aflag)
{
- CPUState *cs = env_cpu(env);
+ int mmu_idx = MMU_PHYS_IDX;
target_ulong addr;
- int prot;
cpu_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0, GETPC());
@@ -511,38 +530,36 @@ void helper_vmsave(CPUX86State *env, int aflag)
}
if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMSAVE, GETPC())) {
- addr = get_hphys(cs, addr, MMU_DATA_STORE, &prot);
+ mmu_idx = MMU_NESTED_IDX;
}
- qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx
- "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
- addr, x86_ldq_phys(cs,
- addr + offsetof(struct vmcb, save.fs.base)),
- env->segs[R_FS].base);
-
- svm_save_seg(env, addr + offsetof(struct vmcb, save.fs),
+ svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.fs),
&env->segs[R_FS]);
- svm_save_seg(env, addr + offsetof(struct vmcb, save.gs),
+ svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.gs),
&env->segs[R_GS]);
- svm_save_seg(env, addr + offsetof(struct vmcb, save.tr),
+ svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.tr),
&env->tr);
- svm_save_seg(env, addr + offsetof(struct vmcb, save.ldtr),
+ svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.ldtr),
&env->ldt);
#ifdef TARGET_X86_64
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.kernel_gs_base),
- env->kernelgsbase);
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.lstar), env->lstar);
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.cstar), env->cstar);
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sfmask), env->fmask);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.kernel_gs_base),
+ env->kernelgsbase, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.lstar),
+ env->lstar, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.cstar),
+ env->cstar, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sfmask),
+ env->fmask, mmu_idx, 0);
#endif
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.star), env->star);
- x86_stq_phys(cs,
- addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs);
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_esp),
- env->sysenter_esp);
- x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_eip),
- env->sysenter_eip);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.star),
+ env->star, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_cs),
+ env->sysenter_cs, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_esp),
+ env->sysenter_esp, mmu_idx, 0);
+ cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_eip),
+ env->sysenter_eip, mmu_idx, 0);
}
void helper_stgi(CPUX86State *env)
@@ -725,13 +742,17 @@ void do_vmexit(CPUX86State *env)
tlb_flush_by_mmuidx(cs, 1 << MMU_NESTED_IDX);
/* Save the VM state in the vmcb */
- svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.es),
&env->segs[R_ES]);
- svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.cs),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.cs),
&env->segs[R_CS]);
- svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.ss),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.ss),
&env->segs[R_SS]);
- svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.ds),
+ svm_save_seg(env, MMU_PHYS_IDX,
+ env->vm_vmcb + offsetof(struct vmcb, save.ds),
&env->segs[R_DS]);
x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base),
@@ -812,14 +833,14 @@ void do_vmexit(CPUX86State *env)
~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
VM_MASK));
- svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
- R_ES);
- svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.cs),
- R_CS);
- svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.ss),
- R_SS);
- svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.ds),
- R_DS);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.es), R_ES);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.cs), R_CS);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.ss), R_SS);
+ svm_load_seg_cache(env, MMU_PHYS_IDX,
+ env->vm_hsave + offsetof(struct vmcb, save.ds), R_DS);
env->eip = x86_ldq_phys(cs,
env->vm_hsave + offsetof(struct vmcb, save.rip));
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 13/14] target/i386: Combine 5 sets of variables in mmu_translate
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (11 preceding siblings ...)
2022-08-22 23:58 ` [PATCH 12/14] target/i386: Use MMU_NESTED_IDX for vmload/vmsave Richard Henderson
@ 2022-08-22 23:58 ` Richard Henderson
2022-08-22 23:58 ` [PATCH 14/14] target/i386: Use atomic operations for pte updates Richard Henderson
2022-08-23 2:05 ` [PATCH 00/14] " Richard Henderson
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:58 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
We don't need one variable set per translation level,
which requires copying into pte/pte_addr for huge pages.
Standardize on pte/pte_addr for all levels.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/sysemu/excp_helper.c | 178 ++++++++++++++-------------
1 file changed, 91 insertions(+), 87 deletions(-)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 11f64e5965..e5d9ff138e 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -82,7 +82,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
const bool is_user = (in->mmu_idx == MMU_USER_IDX);
const MMUAccessType access_type = in->access_type;
uint64_t ptep, pte;
- hwaddr pde_addr, pte_addr;
+ hwaddr pte_addr;
uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
uint32_t pkr;
int page_size;
@@ -92,116 +92,122 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
}
if (pg_mode & PG_MODE_PAE) {
- uint64_t pde, pdpe;
- target_ulong pdpe_addr;
-
#ifdef TARGET_X86_64
if (pg_mode & PG_MODE_LMA) {
- bool la57 = pg_mode & PG_MODE_LA57;
- uint64_t pml5e_addr, pml5e;
- uint64_t pml4e_addr, pml4e;
-
- if (la57) {
- pml5e_addr = ((in->cr3 & ~0xfff) +
- (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pml5e_addr);
- pml5e = x86_ldq_phys(cs, pml5e_addr);
- if (!(pml5e & PG_PRESENT_MASK)) {
+ if (pg_mode & PG_MODE_LA57) {
+ /*
+ * Page table level 5
+ */
+ pte_addr = ((in->cr3 & ~0xfff) +
+ (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
+ PTE_HPHYS(pte_addr);
+ pte = x86_ldq_phys(cs, pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
- if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
+ if (pte & (rsvd_mask | PG_PSE_MASK)) {
goto do_fault_rsvd;
}
- if (!(pml5e & PG_ACCESSED_MASK)) {
- pml5e |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
+ if (!(pte & PG_ACCESSED_MASK)) {
+ pte |= PG_ACCESSED_MASK;
+ x86_stl_phys_notdirty(cs, pte_addr, pte);
}
- ptep = pml5e ^ PG_NX_MASK;
+ ptep = pte ^ PG_NX_MASK;
} else {
- pml5e = in->cr3;
+ pte = in->cr3;
ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
}
- pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
- (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pml4e_addr);
- pml4e = x86_ldq_phys(cs, pml4e_addr);
- if (!(pml4e & PG_PRESENT_MASK)) {
+ /*
+ * Page table level 4
+ */
+ pte_addr = ((pte & PG_ADDRESS_MASK) +
+ (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
+ PTE_HPHYS(pte_addr);
+ pte = x86_ldq_phys(cs, pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
- if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
+ if (pte & (rsvd_mask | PG_PSE_MASK)) {
goto do_fault_rsvd;
}
- if (!(pml4e & PG_ACCESSED_MASK)) {
- pml4e |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
+ if (!(pte & PG_ACCESSED_MASK)) {
+ pte |= PG_ACCESSED_MASK;
+ x86_stl_phys_notdirty(cs, pte_addr, pte);
}
- ptep &= pml4e ^ PG_NX_MASK;
- pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
- a20_mask;
- PTE_HPHYS(pdpe_addr);
- pdpe = x86_ldq_phys(cs, pdpe_addr);
- if (!(pdpe & PG_PRESENT_MASK)) {
+ ptep &= pte ^ PG_NX_MASK;
+
+ /*
+ * Page table level 3
+ */
+ pte_addr = ((pte & PG_ADDRESS_MASK) +
+ (((addr >> 30) & 0x1ff) << 3)) & a20_mask;
+ PTE_HPHYS(pte_addr);
+ pte = x86_ldq_phys(cs, pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
- if (pdpe & rsvd_mask) {
+ if (pte & rsvd_mask) {
goto do_fault_rsvd;
}
- ptep &= pdpe ^ PG_NX_MASK;
- if (!(pdpe & PG_ACCESSED_MASK)) {
- pdpe |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
+ ptep &= pte ^ PG_NX_MASK;
+ if (!(pte & PG_ACCESSED_MASK)) {
+ pte |= PG_ACCESSED_MASK;
+ x86_stl_phys_notdirty(cs, pte_addr, pte);
}
- if (pdpe & PG_PSE_MASK) {
+ if (pte & PG_PSE_MASK) {
/* 1 GB page */
page_size = 1024 * 1024 * 1024;
- pte_addr = pdpe_addr;
- pte = pdpe;
goto do_check_protect;
}
} else
#endif
{
- /* XXX: load them when cr3 is loaded ? */
- pdpe_addr = ((in->cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
- a20_mask;
- PTE_HPHYS(pdpe_addr);
- pdpe = x86_ldq_phys(cs, pdpe_addr);
- if (!(pdpe & PG_PRESENT_MASK)) {
+ /*
+ * Page table level 3
+ */
+ pte_addr = ((in->cr3 & ~0x1f) + ((addr >> 27) & 0x18)) & a20_mask;
+ PTE_HPHYS(pte_addr);
+ pte = x86_ldq_phys(cs, pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
rsvd_mask |= PG_HI_USER_MASK;
- if (pdpe & (rsvd_mask | PG_NX_MASK)) {
+ if (pte & (rsvd_mask | PG_NX_MASK)) {
goto do_fault_rsvd;
}
ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
}
- pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
- a20_mask;
- PTE_HPHYS(pde_addr);
- pde = x86_ldq_phys(cs, pde_addr);
- if (!(pde & PG_PRESENT_MASK)) {
+ /*
+ * Page table level 2
+ */
+ pte_addr = ((pte & PG_ADDRESS_MASK) +
+ (((addr >> 21) & 0x1ff) << 3)) & a20_mask;
+ PTE_HPHYS(pte_addr);
+ pte = x86_ldq_phys(cs, pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
- if (pde & rsvd_mask) {
+ if (pte & rsvd_mask) {
goto do_fault_rsvd;
}
- ptep &= pde ^ PG_NX_MASK;
- if (pde & PG_PSE_MASK) {
+ ptep &= pte ^ PG_NX_MASK;
+ if (pte & PG_PSE_MASK) {
/* 2 MB page */
page_size = 2048 * 1024;
- pte_addr = pde_addr;
- pte = pde;
goto do_check_protect;
}
- /* 4 KB page */
- if (!(pde & PG_ACCESSED_MASK)) {
- pde |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pde_addr, pde);
+ if (!(pte & PG_ACCESSED_MASK)) {
+ pte |= PG_ACCESSED_MASK;
+ x86_stl_phys_notdirty(cs, pte_addr, pte);
}
- pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
- a20_mask;
+
+ /*
+ * Page table level 1
+ */
+ pte_addr = ((pte & PG_ADDRESS_MASK) +
+ (((addr >> 12) & 0x1ff) << 3)) & a20_mask;
PTE_HPHYS(pte_addr);
pte = x86_ldq_phys(cs, pte_addr);
if (!(pte & PG_PRESENT_MASK)) {
@@ -214,39 +220,37 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
ptep &= pte ^ PG_NX_MASK;
page_size = 4096;
} else {
- uint32_t pde;
-
- /* page directory entry */
- pde_addr = ((in->cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
- a20_mask;
- PTE_HPHYS(pde_addr);
- pde = x86_ldl_phys(cs, pde_addr);
- if (!(pde & PG_PRESENT_MASK)) {
+ /*
+ * Page table level 2
+ */
+ pte_addr = ((in->cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) & a20_mask;
+ PTE_HPHYS(pte_addr);
+ pte = x86_ldl_phys(cs, pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
- ptep = pde | PG_NX_MASK;
+ ptep = pte | PG_NX_MASK;
/* if PSE bit is set, then we use a 4MB page */
- if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
+ if ((pte & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
page_size = 4096 * 1024;
- pte_addr = pde_addr;
-
- /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
+ /*
+ * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
* Leave bits 20-13 in place for setting accessed/dirty bits below.
*/
- pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
+ pte = (uint32_t)pte | ((pte & 0x1fe000LL) << (32 - 13));
rsvd_mask = 0x200000;
goto do_check_protect_pse36;
}
-
- if (!(pde & PG_ACCESSED_MASK)) {
- pde |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pde_addr, pde);
+ if (!(pte & PG_ACCESSED_MASK)) {
+ pte |= PG_ACCESSED_MASK;
+ x86_stl_phys_notdirty(cs, pte_addr, pte);
}
- /* page directory entry */
- pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
- a20_mask;
+ /*
+ * Page table level 1
+ */
+ pte_addr = ((pte & ~0xfffu) + ((addr >> 10) & 0xffc)) & a20_mask;
PTE_HPHYS(pte_addr);
pte = x86_ldl_phys(cs, pte_addr);
if (!(pte & PG_PRESENT_MASK)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 14/14] target/i386: Use atomic operations for pte updates
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (12 preceding siblings ...)
2022-08-22 23:58 ` [PATCH 13/14] target/i386: Combine 5 sets of variables in mmu_translate Richard Henderson
@ 2022-08-22 23:58 ` Richard Henderson
2022-08-23 2:05 ` [PATCH 00/14] " Richard Henderson
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-22 23:58 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Use probe_access_full in order to resolve to a host address,
which then lets us use a host cmpxchg to update the pte.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/279
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/sysemu/excp_helper.c | 242 +++++++++++++++++++--------
1 file changed, 168 insertions(+), 74 deletions(-)
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index e5d9ff138e..74a76cf883 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -27,8 +27,8 @@ typedef struct TranslateParams {
target_ulong cr3;
int pg_mode;
int mmu_idx;
+ int ptw_idx;
MMUAccessType access_type;
- bool use_stage2;
} TranslateParams;
typedef struct TranslateResult {
@@ -50,43 +50,106 @@ typedef struct TranslateFault {
TranslateFaultStage2 stage2;
} TranslateFault;
-#define PTE_HPHYS(ADDR) \
- do { \
- if (in->use_stage2) { \
- nested_in.addr = (ADDR); \
- if (!mmu_translate(env, &nested_in, out, err)) { \
- err->stage2 = S2_GPT; \
- return false; \
- } \
- (ADDR) = out->paddr; \
- } \
- } while (0)
+typedef struct PTETranslate {
+ CPUX86State *env;
+ TranslateFault *err;
+ int ptw_idx;
+ void *haddr;
+ hwaddr gaddr;
+} PTETranslate;
+
+static bool ptw_translate(PTETranslate *inout, hwaddr addr)
+{
+ CPUTLBEntryFull *full;
+ int flags;
+
+ inout->gaddr = addr;
+ flags = probe_access_full(inout->env, addr, MMU_DATA_STORE,
+ inout->ptw_idx, true, &inout->haddr, &full, 0);
+
+ if (unlikely(flags & TLB_INVALID_MASK)) {
+ TranslateFault *err = inout->err;
+
+ assert(inout->ptw_idx == MMU_NESTED_IDX);
+ err->exception_index = 0; /* unused */
+ err->error_code = inout->env->error_code;
+ err->cr2 = addr;
+ err->stage2 = S2_GPT;
+ return false;
+ }
+ return true;
+}
+
+static inline uint32_t ptw_ldl(const PTETranslate *in)
+{
+ if (likely(in->haddr)) {
+ return ldl_p(in->haddr);
+ }
+ return cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
+}
+
+static inline uint64_t ptw_ldq(const PTETranslate *in)
+{
+ if (likely(in->haddr)) {
+ return ldq_p(in->haddr);
+ }
+ return cpu_ldq_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
+}
+
+/*
+ * Note that we can use a 32-bit cmpxchg for all page table entries,
+ * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
+ * PG_DIRTY_MASK are all in the low 32 bits.
+ */
+static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new)
+{
+ uint32_t cmp;
+
+ /* Does x86 really perform a rmw cycle on mmio for ptw? */
+ start_exclusive();
+ cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
+ if (cmp == old) {
+ cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0);
+ }
+ end_exclusive();
+ return cmp == old;
+}
+
+static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set)
+{
+ if (set & ~old) {
+ uint32_t new = old | set;
+ if (likely(in->haddr)) {
+ old = cpu_to_le32(old);
+ new = cpu_to_le32(new);
+ return qatomic_cmpxchg((uint32_t *)in->haddr, old, new) == old;
+ }
+ return ptw_setl_slow(in, old, new);
+ }
+ return true;
+}
static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
TranslateResult *out, TranslateFault *err)
{
- TranslateParams nested_in = {
- /* Use store for page table entries, to allow A/D flag updates. */
- .access_type = MMU_DATA_STORE,
- .cr3 = env->nested_cr3,
- .pg_mode = env->nested_pg_mode,
- .mmu_idx = MMU_USER_IDX,
- .use_stage2 = false,
- };
-
- CPUState *cs = env_cpu(env);
- X86CPU *cpu = env_archcpu(env);
const int32_t a20_mask = x86_get_a20_mask(env);
const target_ulong addr = in->addr;
const int pg_mode = in->pg_mode;
const bool is_user = (in->mmu_idx == MMU_USER_IDX);
const MMUAccessType access_type = in->access_type;
- uint64_t ptep, pte;
+ uint64_t ptep, pte, rsvd_mask;
+ PTETranslate pte_trans = {
+ .env = env,
+ .err = err,
+ .ptw_idx = in->ptw_idx,
+ };
hwaddr pte_addr;
- uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
uint32_t pkr;
int page_size;
+ restart_all:
+ rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits);
+ rsvd_mask &= PG_ADDRESS_MASK;
if (!(pg_mode & PG_MODE_NXE)) {
rsvd_mask |= PG_NX_MASK;
}
@@ -100,17 +163,19 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
*/
pte_addr = ((in->cr3 & ~0xfff) +
(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldq_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ restart_5:
+ pte = ptw_ldq(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pte & (rsvd_mask | PG_PSE_MASK)) {
goto do_fault_rsvd;
}
- if (!(pte & PG_ACCESSED_MASK)) {
- pte |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pte_addr, pte);
+ if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
+ goto restart_5;
}
ptep = pte ^ PG_NX_MASK;
} else {
@@ -123,17 +188,19 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
*/
pte_addr = ((pte & PG_ADDRESS_MASK) +
(((addr >> 39) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldq_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ restart_4:
+ pte = ptw_ldq(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pte & (rsvd_mask | PG_PSE_MASK)) {
goto do_fault_rsvd;
}
- if (!(pte & PG_ACCESSED_MASK)) {
- pte |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pte_addr, pte);
+ if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
+ goto restart_4;
}
ptep &= pte ^ PG_NX_MASK;
@@ -142,19 +209,21 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
*/
pte_addr = ((pte & PG_ADDRESS_MASK) +
(((addr >> 30) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldq_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ restart_3_lma:
+ pte = ptw_ldq(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pte & rsvd_mask) {
goto do_fault_rsvd;
}
- ptep &= pte ^ PG_NX_MASK;
- if (!(pte & PG_ACCESSED_MASK)) {
- pte |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pte_addr, pte);
+ if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
+ goto restart_3_lma;
}
+ ptep &= pte ^ PG_NX_MASK;
if (pte & PG_PSE_MASK) {
/* 1 GB page */
page_size = 1024 * 1024 * 1024;
@@ -167,15 +236,21 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 3
*/
pte_addr = ((in->cr3 & ~0x1f) + ((addr >> 27) & 0x18)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldq_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ rsvd_mask |= PG_HI_USER_MASK;
+ restart_3_nolma:
+ pte = ptw_ldq(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
- rsvd_mask |= PG_HI_USER_MASK;
if (pte & (rsvd_mask | PG_NX_MASK)) {
goto do_fault_rsvd;
}
+ if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
+ goto restart_3_nolma;
+ }
ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
}
@@ -184,32 +259,37 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
*/
pte_addr = ((pte & PG_ADDRESS_MASK) +
(((addr >> 21) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldq_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ restart_2_pae:
+ pte = ptw_ldq(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pte & rsvd_mask) {
goto do_fault_rsvd;
}
- ptep &= pte ^ PG_NX_MASK;
if (pte & PG_PSE_MASK) {
/* 2 MB page */
page_size = 2048 * 1024;
+ ptep &= pte ^ PG_NX_MASK;
goto do_check_protect;
}
- if (!(pte & PG_ACCESSED_MASK)) {
- pte |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pte_addr, pte);
+ if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
+ goto restart_2_pae;
}
+ ptep &= pte ^ PG_NX_MASK;
/*
* Page table level 1
*/
pte_addr = ((pte & PG_ADDRESS_MASK) +
(((addr >> 12) & 0x1ff) << 3)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldq_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ pte = ptw_ldq(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
@@ -224,8 +304,11 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
* Page table level 2
*/
pte_addr = ((in->cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldl_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ restart_2_nopae:
+ pte = ptw_ldl(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
@@ -242,17 +325,18 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
rsvd_mask = 0x200000;
goto do_check_protect_pse36;
}
- if (!(pte & PG_ACCESSED_MASK)) {
- pte |= PG_ACCESSED_MASK;
- x86_stl_phys_notdirty(cs, pte_addr, pte);
+ if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
+ goto restart_2_nopae;
}
/*
* Page table level 1
*/
pte_addr = ((pte & ~0xfffu) + ((addr >> 10) & 0xffc)) & a20_mask;
- PTE_HPHYS(pte_addr);
- pte = x86_ldl_phys(cs, pte_addr);
+ if (!ptw_translate(&pte_trans, pte_addr)) {
+ return false;
+ }
+ pte = ptw_ldl(&pte_trans);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
@@ -319,27 +403,35 @@ do_check_protect_pse36:
uint32_t set = PG_ACCESSED_MASK;
if (access_type == MMU_DATA_STORE) {
set |= PG_DIRTY_MASK;
+ } else if (!(pte & PG_DIRTY_MASK)) {
+ /*
+ * Only set write access if already dirty...
+ * otherwise wait for dirty access.
+ */
+ prot &= ~PAGE_WRITE;
}
- if (set & ~pte) {
- pte |= set;
- x86_stl_phys_notdirty(cs, pte_addr, pte);
+ if (!ptw_setl(&pte_trans, pte, set)) {
+ /*
+ * We can arrive here from any of 3 levels and 2 formats.
+ * The only safe thing is to restart the entire lookup.
+ */
+ goto restart_all;
}
}
- if (!(pte & PG_DIRTY_MASK)) {
- /* only set write access if already dirty... otherwise wait
- for dirty access */
- assert(access_type != MMU_DATA_STORE);
- prot &= ~PAGE_WRITE;
- }
-
/* align to page_size */
out->paddr = (pte & a20_mask & PG_ADDRESS_MASK & ~(page_size - 1))
| (addr & (page_size - 1));
- if (in->use_stage2) {
- nested_in.addr = out->paddr;
- nested_in.access_type = access_type;
+ if (in->ptw_idx == MMU_NESTED_IDX) {
+ TranslateParams nested_in = {
+ .addr = out->paddr,
+ .access_type = access_type,
+ .cr3 = env->nested_cr3,
+ .pg_mode = env->nested_pg_mode,
+ .mmu_idx = MMU_USER_IDX,
+ .ptw_idx = MMU_PHYS_IDX,
+ };
if (!mmu_translate(env, &nested_in, out, err)) {
err->stage2 = S2_GPA;
@@ -436,7 +528,7 @@ static bool get_physical_address(CPUX86State *env, vaddr addr,
in.cr3 = env->nested_cr3;
in.pg_mode = env->nested_pg_mode;
in.mmu_idx = MMU_USER_IDX;
- in.use_stage2 = false;
+ in.ptw_idx = MMU_PHYS_IDX;
if (!mmu_translate(env, &in, out, err)) {
err->stage2 = S2_GPA;
@@ -449,7 +541,7 @@ static bool get_physical_address(CPUX86State *env, vaddr addr,
default:
in.cr3 = env->cr[3];
in.mmu_idx = mmu_idx;
- in.use_stage2 = use_stage2;
+ in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
in.pg_mode = get_pg_mode(env);
if (likely(in.pg_mode)) {
@@ -504,6 +596,8 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
}
if (probe) {
+ /* This will be used if recursing for stage2 translation. */
+ env->error_code = err.error_code;
return false;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 00/14] target/i386: Use atomic operations for pte updates
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
` (13 preceding siblings ...)
2022-08-22 23:58 ` [PATCH 14/14] target/i386: Use atomic operations for pte updates Richard Henderson
@ 2022-08-23 2:05 ` Richard Henderson
14 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-23 2:05 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
On 8/22/22 16:57, Richard Henderson wrote:
> This patch set does two things:
>
> (1) Remove assert(!probe) from the x86 tlb_fill
>
> It turns out that this is a prerequisite for
> [PATCH v6 00/21] linux-user: Fix siginfo_t contents when jumping
> to non-readable pages
>
> because of a new use of probe_access(..., nonfault)
> when comparing TBs that cross a page boundary.
Turns out this was a bug in the v6 patch set. We don't require nonfault probes on
PROT_EXEC at all; v7 will fix this.
But it's still nice that non-faulting probes now work...
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal
2022-08-22 23:57 ` [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal Richard Henderson
@ 2022-08-23 9:19 ` David Hildenbrand
2022-08-23 15:19 ` Richard Henderson
0 siblings, 1 reply; 19+ messages in thread
From: David Hildenbrand @ 2022-08-23 9:19 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: pbonzini, eduardo
On 23.08.22 01:57, Richard Henderson wrote:
> When PAGE_WRITE_INV is set when calling tlb_set_page,
> we immediately set TLB_INVALID_MASK in order to force
> tlb_fill to be called on the next lookup. Here in
> probe_access_internal, we have just called tlb_fill
> and eliminated true misses, thus the lookup must be valid.
>
> This allows us to remove a warning comment from s390x.
> There doesn't seem to be a reason to change the code though.
>
> Cc: David Hildenbrand <david@redhat.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> accel/tcg/cputlb.c | 10 +++++++++-
> target/s390x/tcg/mem_helper.c | 4 ----
> 2 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 1509df96b4..5359113e8d 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1602,6 +1602,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
> }
> tlb_addr = tlb_read_ofs(entry, elt_ofs);
>
> + flags = TLB_FLAGS_MASK;
> page_addr = addr & TARGET_PAGE_MASK;
> if (!tlb_hit_page(tlb_addr, page_addr)) {
> if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
> @@ -1617,10 +1618,17 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
>
> /* TLB resize via tlb_fill may have moved the entry. */
> entry = tlb_entry(env, mmu_idx, addr);
> +
> + /*
> + * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
> + * to force the next access through tlb_fill. We've just
> + * called tlb_fill, so we know that this entry *is* valid.
> + */
> + flags &= ~TLB_INVALID_MASK;
> }
> tlb_addr = tlb_read_ofs(entry, elt_ofs);
> }
> - flags = tlb_addr & TLB_FLAGS_MASK;
> + flags &= tlb_addr;
>
> /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
> if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
> diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
> index fc52aa128b..3758b9e688 100644
> --- a/target/s390x/tcg/mem_helper.c
> +++ b/target/s390x/tcg/mem_helper.c
> @@ -148,10 +148,6 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
> #else
> int flags;
>
> - /*
> - * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
> - * to detect if there was an exception during tlb_fill().
> - */
Yeah, that was primarily only a comment that we rely on tlb_fill_exc to
obtain the exact PGM_* value -- and at the same time use it to detect if
there was an exception at all.
> env->tlb_fill_exc = 0;
> flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
> ra);
Change itself looks good to me.
However, it's been a while since I stared at this code, but I wonder how
the CONFIG_USER_ONLY path is correct.
1) s390_probe_access() documents to "With nonfault=1, return the PGM_
exception that would have been injected into the guest; return 0 if no
exception was detected."
But in case of CONFIG_USER_ONLY, we return the flags returned by
s390_probe_access(), not a PGM__* value. Maybe it doesn't matter,
because we'll simply inject a SIGSEGV in any case ...
I'd have assume that we have to check here if there was an exception in
a similar way, and detect the actual type. As the old comment indicates,
we can either
* check for *phost == NULL
* flags having TLB_INVALID_MASK set
... and I wonder if we really care about the exception type for
CONFIG_USER_ONLY at all.
2) s390_probe_access() documents that for "CONFIG_USER_ONLY, the
faulting address is stored to env->__excp_addr.".
However, that's only set in s390_cpu_record_sigsegv(). With nonfault=1
that will never actually trigger, right?
I assume db9aab5783a2 ("target/s390x: Use probe_access_flags in
s390_probe_access") might have introduced both. We had a flag conversion
to PGM_ in there and stored env->__excp_addr:
flags = page_get_flags(addr);
if (!(flags & (access_type == MMU_DATA_LOAD ? PAGE_READ :
PAGE_WRITE_ORG))) {
env->__excp_addr = addr;
flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
if (nonfault) {
return flags;
}
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal
2022-08-23 9:19 ` David Hildenbrand
@ 2022-08-23 15:19 ` Richard Henderson
2022-08-23 16:09 ` Richard Henderson
0 siblings, 1 reply; 19+ messages in thread
From: Richard Henderson @ 2022-08-23 15:19 UTC (permalink / raw)
To: David Hildenbrand, qemu-devel; +Cc: pbonzini, eduardo
On 8/23/22 02:19, David Hildenbrand wrote:
> 1) s390_probe_access() documents to "With nonfault=1, return the PGM_
> exception that would have been injected into the guest; return 0 if no
> exception was detected."
>
> But in case of CONFIG_USER_ONLY, we return the flags returned by
> s390_probe_access(), not a PGM__* value. Maybe it doesn't matter,
> because we'll simply inject a SIGSEGV in any case ...
I would have said it would matter for MVPG, except that is incorrectly *not* marked as a
privileged instruction. There should be no CONFIG_USER_ONLY case to answer there.
> 2) s390_probe_access() documents that for "CONFIG_USER_ONLY, the
> faulting address is stored to env->__excp_addr.".
>
> However, that's only set in s390_cpu_record_sigsegv(). With nonfault=1
> that will never actually trigger, right?
Correct.
> I assume db9aab5783a2 ("target/s390x: Use probe_access_flags in
> s390_probe_access") might have introduced both. We had a flag conversion
> to PGM_ in there and stored env->__excp_addr:
Indeed, that commit is faulty in that it breaks the contract of s390_probe_access.
It's a shame, though, that we need to carry the extra code for the purpose, and that the
generic interfaces are not sufficient.
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal
2022-08-23 15:19 ` Richard Henderson
@ 2022-08-23 16:09 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-08-23 16:09 UTC (permalink / raw)
To: David Hildenbrand, qemu-devel; +Cc: pbonzini, eduardo
On 8/23/22 08:19, Richard Henderson wrote:
> On 8/23/22 02:19, David Hildenbrand wrote:
>> 1) s390_probe_access() documents to "With nonfault=1, return the PGM_
>> exception that would have been injected into the guest; return 0 if no
>> exception was detected."
>>
>> But in case of CONFIG_USER_ONLY, we return the flags returned by
>> s390_probe_access(), not a PGM__* value. Maybe it doesn't matter,
>> because we'll simply inject a SIGSEGV in any case ...
>
> I would have said it would matter for MVPG, except that is incorrectly *not* marked as a
> privileged instruction. There should be no CONFIG_USER_ONLY case to answer there.
Ho hum, misread that -- only privileged if access key specified (and we do not check or
support those).
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2022-08-23 16:23 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-08-22 23:57 [PATCH 00/14] target/i386: Use atomic operations for pte updates Richard Henderson
2022-08-22 23:57 ` [PATCH 01/14] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull Richard Henderson
2022-08-22 23:57 ` [PATCH 02/14] accel/tcg: Drop addr member from SavedIOTLB Richard Henderson
2022-08-22 23:57 ` [PATCH 03/14] accel/tcg: Suppress auto-invalidate in probe_access_internal Richard Henderson
2022-08-23 9:19 ` David Hildenbrand
2022-08-23 15:19 ` Richard Henderson
2022-08-23 16:09 ` Richard Henderson
2022-08-22 23:57 ` [PATCH 04/14] accel/tcg: Introduce probe_access_full Richard Henderson
2022-08-22 23:57 ` [PATCH 05/14] accel/tcg: Introduce tlb_set_page_full Richard Henderson
2022-08-22 23:57 ` [PATCH 06/14] include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA Richard Henderson
2022-08-22 23:57 ` [PATCH 07/14] target/i386: Use MMUAccessType across excp_helper.c Richard Henderson
2022-08-22 23:57 ` [PATCH 08/14] target/i386: Direct call get_hphys from mmu_translate Richard Henderson
2022-08-22 23:57 ` [PATCH 09/14] target/i386: Introduce structures for mmu_translate Richard Henderson
2022-08-22 23:57 ` [PATCH 10/14] target/i386: Reorg GET_HPHYS Richard Henderson
2022-08-22 23:58 ` [PATCH 11/14] target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX Richard Henderson
2022-08-22 23:58 ` [PATCH 12/14] target/i386: Use MMU_NESTED_IDX for vmload/vmsave Richard Henderson
2022-08-22 23:58 ` [PATCH 13/14] target/i386: Combine 5 sets of variables in mmu_translate Richard Henderson
2022-08-22 23:58 ` [PATCH 14/14] target/i386: Use atomic operations for pte updates Richard Henderson
2022-08-23 2:05 ` [PATCH 00/14] " Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).