* [PATCH 01/10] accel/tcg: Simplify tlb_plugin_lookup
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-28 18:55 ` [PATCH 02/10] accel/tcg: Split out io_prepare and io_failed Richard Henderson
` (9 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Now that we defer address space update and tlb_flush until
the next async_run_on_cpu, the plugin run at the end of the
instruction no longer has to contend with a flushed tlb.
Therefore, delete SavedIOTLB entirely.
Properly return false from tlb_plugin_lookup when we do
not have a tlb match.
Fixes a bug in which SavedIOTLB had stale data, because
there were multiple i/o accesses within a single insn.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/hw/core/cpu.h | 13 -------
include/qemu/typedefs.h | 1 -
accel/tcg/cputlb.c | 79 ++++++++++++-----------------------------
3 files changed, 23 insertions(+), 70 deletions(-)
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index fdcbe87352..20e4f64d72 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -227,17 +227,6 @@ struct CPUWatchpoint {
QTAILQ_ENTRY(CPUWatchpoint) entry;
};
-#ifdef CONFIG_PLUGIN
-/*
- * For plugins we sometime need to save the resolved iotlb data before
- * the memory regions get moved around by io_writex.
- */
-typedef struct SavedIOTLB {
- MemoryRegionSection *section;
- hwaddr mr_offset;
-} SavedIOTLB;
-#endif
-
struct KVMState;
struct kvm_run;
@@ -409,8 +398,6 @@ struct CPUState {
#ifdef CONFIG_PLUGIN
GArray *plugin_mem_cbs;
- /* saved iotlb data from io_writex */
- SavedIOTLB saved_iotlb;
#endif
/* TODO Move common fields from CPUArchState here. */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 834b0e47a0..5abdbc3874 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -129,7 +129,6 @@ typedef struct QString QString;
typedef struct RAMBlock RAMBlock;
typedef struct Range Range;
typedef struct ReservedRegion ReservedRegion;
-typedef struct SavedIOTLB SavedIOTLB;
typedef struct SHPCDevice SHPCDevice;
typedef struct SSIBus SSIBus;
typedef struct TCGHelperInfo TCGHelperInfo;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index c643d66190..fcf6ccebff 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1364,21 +1364,6 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
}
}
-/*
- * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
- * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
- * because of the side effect of io_writex changing memory layout.
- */
-static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
- hwaddr mr_offset)
-{
-#ifdef CONFIG_PLUGIN
- SavedIOTLB *saved = &cs->saved_iotlb;
- saved->section = section;
- saved->mr_offset = mr_offset;
-#endif
-}
-
static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, vaddr addr, uintptr_t retaddr,
MMUAccessType access_type, MemOp op)
@@ -1398,12 +1383,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
cpu_io_recompile(cpu, retaddr);
}
- /*
- * The memory_region_dispatch may trigger a flush/resize
- * so for plugins we save the iotlb_data just in case.
- */
- save_iotlb_data(cpu, section, mr_offset);
-
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
@@ -1438,12 +1417,6 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
}
cpu->mem_io_pc = retaddr;
- /*
- * The memory_region_dispatch may trigger a flush/resize
- * so for plugins we save the iotlb_data just in case.
- */
- save_iotlb_data(cpu, section, mr_offset);
-
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
@@ -1726,45 +1699,39 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
* in the softmmu lookup code (or helper). We don't handle re-fills or
* checking the victim table. This is purely informational.
*
- * This almost never fails as the memory access being instrumented
- * should have just filled the TLB. The one corner case is io_writex
- * which can cause TLB flushes and potential resizing of the TLBs
- * losing the information we need. In those cases we need to recover
- * data from a copy of the CPUTLBEntryFull. As long as this always occurs
- * from the same thread (which a mem callback will be) this is safe.
+ * The one corner case is i/o write, which can cause changes to the
+ * address space. Those changes, and the corresponding tlb flush,
+ * should be delayed until the next TB, so even then this ought not fail.
+ * But check, Just in Case.
*/
-
bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
bool is_store, struct qemu_plugin_hwaddr *data)
{
CPUArchState *env = cpu->env_ptr;
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
uintptr_t index = tlb_index(env, mmu_idx, addr);
- uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
+ MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
- if (likely(tlb_hit(tlb_addr, addr))) {
- /* We must have an iotlb entry for MMIO */
- if (tlb_addr & TLB_MMIO) {
- CPUTLBEntryFull *full;
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
- data->is_io = true;
- data->v.io.section =
- iotlb_to_section(cpu, full->xlat_section, full->attrs);
- data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
- } else {
- data->is_io = false;
- data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
- }
- return true;
- } else {
- SavedIOTLB *saved = &cpu->saved_iotlb;
- data->is_io = true;
- data->v.io.section = saved->section;
- data->v.io.offset = saved->mr_offset;
- return true;
+ if (unlikely(!tlb_hit(tlb_addr, addr))) {
+ return false;
}
-}
+ /* We must have an iotlb entry for MMIO */
+ if (tlb_addr & TLB_MMIO) {
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+ hwaddr xlat = full->xlat_section;
+
+ data->is_io = true;
+ data->v.io.offset = (xlat & TARGET_PAGE_MASK) + addr;
+ data->v.io.section =
+ iotlb_to_section(cpu, xlat & ~TARGET_PAGE_MASK, full->attrs);
+ } else {
+ data->is_io = false;
+ data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
+ }
+ return true;
+}
#endif
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 02/10] accel/tcg: Split out io_prepare and io_failed
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
2023-08-28 18:55 ` [PATCH 01/10] accel/tcg: Simplify tlb_plugin_lookup Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-28 21:24 ` Philippe Mathieu-Daudé
2023-08-28 18:55 ` [PATCH 03/10] accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed Richard Henderson
` (8 subsequent siblings)
10 siblings, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
These are common code from io_readx and io_writex.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 77 +++++++++++++++++++++++++++-------------------
1 file changed, 45 insertions(+), 32 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index fcf6ccebff..17987f74e5 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1264,7 +1264,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
* (non-page-aligned) vaddr of the eventual memory access to get
* the MemoryRegion offset for the access. Note that the vaddr we
* subtract here is that of the page base, and not the same as the
- * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
+ * vaddr we add back in io_prepare()/get_page_addr_code().
*/
desc->fulltlb[index] = *full;
full = &desc->fulltlb[index];
@@ -1364,37 +1364,60 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
}
}
-static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
- int mmu_idx, vaddr addr, uintptr_t retaddr,
- MMUAccessType access_type, MemOp op)
+static MemoryRegionSection *
+io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
+ MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
{
CPUState *cpu = env_cpu(env);
- hwaddr mr_offset;
MemoryRegionSection *section;
- MemoryRegion *mr;
- uint64_t val;
- MemTxResult r;
+ hwaddr mr_offset;
- section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
- mr = section->mr;
- mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
+ section = iotlb_to_section(cpu, xlat, attrs);
+ mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
if (!cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
+ *out_offset = mr_offset;
+ return section;
+}
+
+static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
+ unsigned size, MMUAccessType access_type, int mmu_idx,
+ MemTxResult response, uintptr_t retaddr,
+ MemoryRegionSection *section, hwaddr mr_offset)
+{
+ hwaddr physaddr = (mr_offset +
+ section->offset_within_address_space -
+ section->offset_within_region);
+
+ cpu_transaction_failed(env_cpu(env), physaddr, addr, size, access_type,
+ mmu_idx, full->attrs, response, retaddr);
+}
+
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
+ int mmu_idx, vaddr addr, uintptr_t retaddr,
+ MMUAccessType access_type, MemOp op)
+{
+ MemoryRegionSection *section;
+ hwaddr mr_offset;
+ MemoryRegion *mr;
+ MemTxResult r;
+ uint64_t val;
+
+ section = io_prepare(&mr_offset, env, full->xlat_section,
+ full->attrs, addr, retaddr);
+ mr = section->mr;
+
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
}
if (r != MEMTX_OK) {
- hwaddr physaddr = mr_offset +
- section->offset_within_address_space -
- section->offset_within_region;
-
- cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
- mmu_idx, full->attrs, r, retaddr);
+ io_failed(env, full, addr, memop_size(op), access_type, mmu_idx,
+ r, retaddr, section, mr_offset);
}
return val;
}
@@ -1403,19 +1426,14 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, uint64_t val, vaddr addr,
uintptr_t retaddr, MemOp op)
{
- CPUState *cpu = env_cpu(env);
- hwaddr mr_offset;
MemoryRegionSection *section;
+ hwaddr mr_offset;
MemoryRegion *mr;
MemTxResult r;
- section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
+ section = io_prepare(&mr_offset, env, full->xlat_section,
+ full->attrs, addr, retaddr);
mr = section->mr;
- mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
- if (!cpu->can_do_io) {
- cpu_io_recompile(cpu, retaddr);
- }
- cpu->mem_io_pc = retaddr;
{
QEMU_IOTHREAD_LOCK_GUARD();
@@ -1423,13 +1441,8 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
}
if (r != MEMTX_OK) {
- hwaddr physaddr = mr_offset +
- section->offset_within_address_space -
- section->offset_within_region;
-
- cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
- MMU_DATA_STORE, mmu_idx, full->attrs, r,
- retaddr);
+ io_failed(env, full, addr, memop_size(op), MMU_DATA_STORE, mmu_idx,
+ r, retaddr, section, mr_offset);
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 03/10] accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
2023-08-28 18:55 ` [PATCH 01/10] accel/tcg: Simplify tlb_plugin_lookup Richard Henderson
2023-08-28 18:55 ` [PATCH 02/10] accel/tcg: Split out io_prepare and io_failed Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-30 21:00 ` Philippe Mathieu-Daudé
2023-08-28 18:55 ` [PATCH 04/10] plugin: Simplify struct qemu_plugin_hwaddr Richard Henderson
` (7 subsequent siblings)
10 siblings, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Since the introduction of CPUTLBEntryFull, we can recover
the full cpu address space physical address without having
to examine the MemoryRegionSection.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 17987f74e5..fd6c956214 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1385,13 +1385,9 @@ io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
unsigned size, MMUAccessType access_type, int mmu_idx,
- MemTxResult response, uintptr_t retaddr,
- MemoryRegionSection *section, hwaddr mr_offset)
+ MemTxResult response, uintptr_t retaddr)
{
- hwaddr physaddr = (mr_offset +
- section->offset_within_address_space -
- section->offset_within_region);
-
+ hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
cpu_transaction_failed(env_cpu(env), physaddr, addr, size, access_type,
mmu_idx, full->attrs, response, retaddr);
}
@@ -1417,7 +1413,7 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
if (r != MEMTX_OK) {
io_failed(env, full, addr, memop_size(op), access_type, mmu_idx,
- r, retaddr, section, mr_offset);
+ r, retaddr);
}
return val;
}
@@ -1442,7 +1438,7 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
if (r != MEMTX_OK) {
io_failed(env, full, addr, memop_size(op), MMU_DATA_STORE, mmu_idx,
- r, retaddr, section, mr_offset);
+ r, retaddr);
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH 03/10] accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed
2023-08-28 18:55 ` [PATCH 03/10] accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed Richard Henderson
@ 2023-08-30 21:00 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 17+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-30 21:00 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: alex.bennee
On 28/8/23 20:55, Richard Henderson wrote:
> Since the introduction of CPUTLBEntryFull, we can recover
> the full cpu address space physical address without having
> to examine the MemoryRegionSection.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> accel/tcg/cputlb.c | 12 ++++--------
> 1 file changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 17987f74e5..fd6c956214 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1385,13 +1385,9 @@ io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
>
> static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
> unsigned size, MMUAccessType access_type, int mmu_idx,
> - MemTxResult response, uintptr_t retaddr,
> - MemoryRegionSection *section, hwaddr mr_offset)
> + MemTxResult response, uintptr_t retaddr)
> {
> - hwaddr physaddr = (mr_offset +
> - section->offset_within_address_space -
> - section->offset_within_region);
> -
> + hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
To the best of my knowledge:
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 04/10] plugin: Simplify struct qemu_plugin_hwaddr
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (2 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 03/10] accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-30 15:37 ` Alex Bennée
2023-08-28 18:55 ` [PATCH 05/10] accel/tcg: Merge cpu_transaction_failed into io_failed Richard Henderson
` (6 subsequent siblings)
10 siblings, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Rather than saving MemoryRegionSection and offset,
save phys_addr and MemoryRegion. This matches up
much closer with the plugin api.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/plugin-memory.h | 11 ++---------
accel/tcg/cputlb.c | 16 +++++++++-------
plugins/api.c | 27 ++++++---------------------
3 files changed, 17 insertions(+), 37 deletions(-)
diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
index 43165f2452..71c1123308 100644
--- a/include/qemu/plugin-memory.h
+++ b/include/qemu/plugin-memory.h
@@ -15,15 +15,8 @@
struct qemu_plugin_hwaddr {
bool is_io;
bool is_store;
- union {
- struct {
- MemoryRegionSection *section;
- hwaddr offset;
- } io;
- struct {
- void *hostaddr;
- } ram;
- } v;
+ hwaddr phys_addr;
+ MemoryRegion *mr;
};
/**
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index fd6c956214..b1dc213675 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1721,23 +1721,25 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
uintptr_t index = tlb_index(env, mmu_idx, addr);
MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
+ CPUTLBEntryFull *full;
if (unlikely(!tlb_hit(tlb_addr, addr))) {
return false;
}
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+ data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
+
/* We must have an iotlb entry for MMIO */
if (tlb_addr & TLB_MMIO) {
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
- hwaddr xlat = full->xlat_section;
-
+ MemoryRegionSection *section =
+ iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
+ full->attrs);
data->is_io = true;
- data->v.io.offset = (xlat & TARGET_PAGE_MASK) + addr;
- data->v.io.section =
- iotlb_to_section(cpu, xlat & ~TARGET_PAGE_MASK, full->attrs);
+ data->mr = section->mr;
} else {
data->is_io = false;
- data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
+ data->mr = NULL;
}
return true;
}
diff --git a/plugins/api.c b/plugins/api.c
index 2078b16edb..5521b0ad36 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -316,22 +316,7 @@ uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
{
#ifdef CONFIG_SOFTMMU
if (haddr) {
- if (!haddr->is_io) {
- RAMBlock *block;
- ram_addr_t offset;
- void *hostaddr = haddr->v.ram.hostaddr;
-
- block = qemu_ram_block_from_host(hostaddr, false, &offset);
- if (!block) {
- error_report("Bad host ram pointer %p", haddr->v.ram.hostaddr);
- abort();
- }
-
- return block->offset + offset + block->mr->addr;
- } else {
- MemoryRegionSection *mrs = haddr->v.io.section;
- return mrs->offset_within_address_space + haddr->v.io.offset;
- }
+ return haddr->phys_addr;
}
#endif
return 0;
@@ -341,13 +326,13 @@ const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
{
#ifdef CONFIG_SOFTMMU
if (h && h->is_io) {
- MemoryRegionSection *mrs = h->v.io.section;
- if (!mrs->mr->name) {
- unsigned long maddr = 0xffffffff & (uintptr_t) mrs->mr;
- g_autofree char *temp = g_strdup_printf("anon%08lx", maddr);
+ MemoryRegion *mr = h->mr;
+ if (!mr->name) {
+ unsigned maddr = (uintptr_t)mr;
+ g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
return g_intern_string(temp);
} else {
- return g_intern_string(mrs->mr->name);
+ return g_intern_string(mr->name);
}
} else {
return g_intern_static_string("RAM");
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 05/10] accel/tcg: Merge cpu_transaction_failed into io_failed
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (3 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 04/10] plugin: Simplify struct qemu_plugin_hwaddr Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-28 21:27 ` Philippe Mathieu-Daudé
2023-08-28 18:55 ` [PATCH 06/10] accel/tcg: Replace direct use of io_readx/io_writex in do_{ld, st}_1 Richard Henderson
` (5 subsequent siblings)
10 siblings, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Push computation down into the if statements to the point
the data is used.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 33 +++++++++++++--------------------
1 file changed, 13 insertions(+), 20 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index b1dc213675..d2e4c4459d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1347,23 +1347,6 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
mmu_idx, retaddr);
}
-static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
- vaddr addr, unsigned size,
- MMUAccessType access_type,
- int mmu_idx, MemTxAttrs attrs,
- MemTxResult response,
- uintptr_t retaddr)
-{
- CPUClass *cc = CPU_GET_CLASS(cpu);
-
- if (!cpu->ignore_memory_transaction_failures &&
- cc->tcg_ops->do_transaction_failed) {
- cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
- access_type, mmu_idx, attrs,
- response, retaddr);
- }
-}
-
static MemoryRegionSection *
io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
@@ -1387,9 +1370,19 @@ static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
unsigned size, MMUAccessType access_type, int mmu_idx,
MemTxResult response, uintptr_t retaddr)
{
- hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
- cpu_transaction_failed(env_cpu(env), physaddr, addr, size, access_type,
- mmu_idx, full->attrs, response, retaddr);
+ CPUState *cpu = env_cpu(env);
+
+ if (!cpu->ignore_memory_transaction_failures) {
+ CPUClass *cc = CPU_GET_CLASS(cpu);
+
+ if (cc->tcg_ops->do_transaction_failed) {
+ hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
+
+ cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
+ access_type, mmu_idx,
+ full->attrs, response, retaddr);
+ }
+ }
}
static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 06/10] accel/tcg: Replace direct use of io_readx/io_writex in do_{ld, st}_1
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (4 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 05/10] accel/tcg: Merge cpu_transaction_failed into io_failed Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-28 18:55 ` [PATCH 07/10] accel/tcg: Merge io_readx into do_ld_mmio_beN Richard Henderson
` (4 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index d2e4c4459d..6a7f6bf701 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2339,7 +2339,8 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
MMUAccessType type, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ return do_ld_mmio_beN(env, p->full, 0, p->addr, 1, mmu_idx, type, ra);
} else {
return *(uint8_t *)p->haddr;
}
@@ -2854,7 +2855,8 @@ static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
int mmu_idx, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ do_st_mmio_leN(env, p->full, val, p->addr, 1, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 07/10] accel/tcg: Merge io_readx into do_ld_mmio_beN
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (5 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 06/10] accel/tcg: Replace direct use of io_readx/io_writex in do_{ld, st}_1 Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-30 21:08 ` Philippe Mathieu-Daudé
2023-08-28 18:55 ` [PATCH 08/10] accel/tcg: Merge io_writex into do_st_mmio_leN Richard Henderson
` (3 subsequent siblings)
10 siblings, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Avoid multiple calls to io_prepare for unaligned acceses.
One call to do_ld_mmio_beN will never cross pages.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 84 +++++++++++++++++-----------------------------
1 file changed, 30 insertions(+), 54 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 6a7f6bf701..f4772b5c5d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1385,32 +1385,6 @@ static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
}
}
-static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
- int mmu_idx, vaddr addr, uintptr_t retaddr,
- MMUAccessType access_type, MemOp op)
-{
- MemoryRegionSection *section;
- hwaddr mr_offset;
- MemoryRegion *mr;
- MemTxResult r;
- uint64_t val;
-
- section = io_prepare(&mr_offset, env, full->xlat_section,
- full->attrs, addr, retaddr);
- mr = section->mr;
-
- {
- QEMU_IOTHREAD_LOCK_GUARD();
- r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
- }
-
- if (r != MEMTX_OK) {
- io_failed(env, full, addr, memop_size(op), access_type, mmu_idx,
- r, retaddr);
- }
- return val;
-}
-
static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
int mmu_idx, uint64_t val, vaddr addr,
uintptr_t retaddr, MemOp op)
@@ -2059,40 +2033,42 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t ret_be, vaddr addr, int size,
int mmu_idx, MMUAccessType type, uintptr_t ra)
{
- uint64_t t;
+ MemoryRegionSection *section;
+ hwaddr mr_offset;
+ MemoryRegion *mr;
+ MemTxAttrs attrs;
tcg_debug_assert(size > 0 && size <= 8);
+
+ attrs = full->attrs;
+ section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+ mr = section->mr;
+
do {
+ MemOp this_mop;
+ unsigned this_size;
+ uint64_t val;
+ MemTxResult r;
+
/* Read aligned pieces up to 8 bytes. */
- switch ((size | (int)addr) & 7) {
- case 1:
- case 3:
- case 5:
- case 7:
- t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
- ret_be = (ret_be << 8) | t;
- size -= 1;
- addr += 1;
- break;
- case 2:
- case 6:
- t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
- ret_be = (ret_be << 16) | t;
- size -= 2;
- addr += 2;
- break;
- case 4:
- t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
- ret_be = (ret_be << 32) | t;
- size -= 4;
- addr += 4;
- break;
- case 0:
- return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
- default:
- qemu_build_not_reached();
+ this_mop = ctz32(size | (int)addr | 8);
+ this_size = 1 << this_mop;
+ this_mop |= MO_BE;
+
+ r = memory_region_dispatch_read(mr, mr_offset, &val, this_mop, attrs);
+ if (unlikely(r != MEMTX_OK)) {
+ io_failed(env, full, addr, this_size, type, mmu_idx, r, ra);
}
+ if (this_size == 8) {
+ return val;
+ }
+
+ ret_be = (ret_be << (this_size * 8)) | val;
+ addr += this_size;
+ mr_offset += this_size;
+ size -= this_size;
} while (size);
+
return ret_be;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH 07/10] accel/tcg: Merge io_readx into do_ld_mmio_beN
2023-08-28 18:55 ` [PATCH 07/10] accel/tcg: Merge io_readx into do_ld_mmio_beN Richard Henderson
@ 2023-08-30 21:08 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 17+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-30 21:08 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: alex.bennee
On 28/8/23 20:55, Richard Henderson wrote:
> Avoid multiple calls to io_prepare for unaligned acceses.
> One call to do_ld_mmio_beN will never cross pages.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> accel/tcg/cputlb.c | 84 +++++++++++++++++-----------------------------
> 1 file changed, 30 insertions(+), 54 deletions(-)
> @@ -2059,40 +2033,42 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
> uint64_t ret_be, vaddr addr, int size,
> int mmu_idx, MMUAccessType type, uintptr_t ra)
> {
> - uint64_t t;
> + MemoryRegionSection *section;
> + hwaddr mr_offset;
> + MemoryRegion *mr;
> + MemTxAttrs attrs;
>
> tcg_debug_assert(size > 0 && size <= 8);
> +
> + attrs = full->attrs;
> + section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
> + mr = section->mr;
> +
> do {
> + MemOp this_mop;
> + unsigned this_size;
> + uint64_t val;
> + MemTxResult r;
> +
> /* Read aligned pieces up to 8 bytes. */
> - switch ((size | (int)addr) & 7) {
> - case 1:
> - case 3:
> - case 5:
> - case 7:
> - t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
> - ret_be = (ret_be << 8) | t;
> - size -= 1;
> - addr += 1;
> - break;
> - case 2:
> - case 6:
> - t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
> - ret_be = (ret_be << 16) | t;
> - size -= 2;
> - addr += 2;
> - break;
> - case 4:
> - t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
> - ret_be = (ret_be << 32) | t;
> - size -= 4;
> - addr += 4;
> - break;
> - case 0:
> - return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
> - default:
> - qemu_build_not_reached();
> + this_mop = ctz32(size | (int)addr | 8);
> + this_size = 1 << this_mop;
So far:
memop_size()
But I'll review this patch again after some rest.
> + this_mop |= MO_BE;
> +
> + r = memory_region_dispatch_read(mr, mr_offset, &val, this_mop, attrs);
> + if (unlikely(r != MEMTX_OK)) {
> + io_failed(env, full, addr, this_size, type, mmu_idx, r, ra);
> }
> + if (this_size == 8) {
> + return val;
> + }
> +
> + ret_be = (ret_be << (this_size * 8)) | val;
> + addr += this_size;
> + mr_offset += this_size;
> + size -= this_size;
> } while (size);
> +
> return ret_be;
> }
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 08/10] accel/tcg: Merge io_writex into do_st_mmio_leN
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (6 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 07/10] accel/tcg: Merge io_readx into do_ld_mmio_beN Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-28 18:55 ` [PATCH 09/10] accel/tcg: Introduce do_ld16_mmio_beN Richard Henderson
` (2 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Avoid multiple calls to io_prepare for unaligned acceses.
One call to do_st_mmio_leN will never cross pages.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 82 +++++++++++++++++-----------------------------
1 file changed, 30 insertions(+), 52 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f4772b5c5d..cf80add4b3 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1385,30 +1385,6 @@ static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
}
}
-static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
- int mmu_idx, uint64_t val, vaddr addr,
- uintptr_t retaddr, MemOp op)
-{
- MemoryRegionSection *section;
- hwaddr mr_offset;
- MemoryRegion *mr;
- MemTxResult r;
-
- section = io_prepare(&mr_offset, env, full->xlat_section,
- full->attrs, addr, retaddr);
- mr = section->mr;
-
- {
- QEMU_IOTHREAD_LOCK_GUARD();
- r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
- }
-
- if (r != MEMTX_OK) {
- io_failed(env, full, addr, memop_size(op), MMU_DATA_STORE, mmu_idx,
- r, retaddr);
- }
-}
-
/* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb. */
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
@@ -2679,39 +2655,41 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t val_le, vaddr addr, int size,
int mmu_idx, uintptr_t ra)
{
+ MemoryRegionSection *section;
+ hwaddr mr_offset;
+ MemoryRegion *mr;
+ MemTxAttrs attrs;
+
tcg_debug_assert(size > 0 && size <= 8);
+ attrs = full->attrs;
+ section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+ mr = section->mr;
+
do {
+ MemOp this_mop;
+ unsigned this_size;
+ MemTxResult r;
+
/* Store aligned pieces up to 8 bytes. */
- switch ((size | (int)addr) & 7) {
- case 1:
- case 3:
- case 5:
- case 7:
- io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB);
- val_le >>= 8;
- size -= 1;
- addr += 1;
- break;
- case 2:
- case 6:
- io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW);
- val_le >>= 16;
- size -= 2;
- addr += 2;
- break;
- case 4:
- io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL);
- val_le >>= 32;
- size -= 4;
- addr += 4;
- break;
- case 0:
- io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ);
- return 0;
- default:
- qemu_build_not_reached();
+ this_mop = ctz32(size | (int)addr | 8);
+ this_size = 1 << this_mop;
+ this_mop |= MO_LE;
+
+ r = memory_region_dispatch_write(mr, mr_offset, val_le,
+ this_mop, attrs);
+ if (unlikely(r != MEMTX_OK)) {
+ io_failed(env, full, addr, this_size, MMU_DATA_STORE,
+ mmu_idx, r, ra);
}
+ if (this_size == 8) {
+ return 0;
+ }
+
+ val_le >>= this_size * 8;
+ addr += this_size;
+ mr_offset += this_size;
+ size -= this_size;
} while (size);
return val_le;
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 09/10] accel/tcg: Introduce do_ld16_mmio_beN
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (7 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 08/10] accel/tcg: Merge io_writex into do_st_mmio_leN Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-08-28 18:55 ` [PATCH 10/10] accel/tcg: Introduce do_st16_mmio_leN Richard Henderson
2023-09-09 21:02 ` [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
10 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Split out int_ld_mmio_beN, to be used by both do_ld_mmio_beN
and do_ld16_mmio_beN. Move the locks down into the two
functions, since each one now covers all accesses to once page.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 91 ++++++++++++++++++++++++++++++----------------
1 file changed, 59 insertions(+), 32 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index cf80add4b3..8a7e386415 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2005,21 +2005,11 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
* Load @size bytes from @addr, which is memory-mapped i/o.
* The bytes are concatenated in big-endian order with @ret_be.
*/
-static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
- uint64_t ret_be, vaddr addr, int size,
- int mmu_idx, MMUAccessType type, uintptr_t ra)
+static uint64_t int_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t ret_be, vaddr addr, int size,
+ int mmu_idx, MMUAccessType type, uintptr_t ra,
+ MemoryRegion *mr, hwaddr mr_offset)
{
- MemoryRegionSection *section;
- hwaddr mr_offset;
- MemoryRegion *mr;
- MemTxAttrs attrs;
-
- tcg_debug_assert(size > 0 && size <= 8);
-
- attrs = full->attrs;
- section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
- mr = section->mr;
-
do {
MemOp this_mop;
unsigned this_size;
@@ -2031,7 +2021,8 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
this_size = 1 << this_mop;
this_mop |= MO_BE;
- r = memory_region_dispatch_read(mr, mr_offset, &val, this_mop, attrs);
+ r = memory_region_dispatch_read(mr, mr_offset, &val,
+ this_mop, full->attrs);
if (unlikely(r != MEMTX_OK)) {
io_failed(env, full, addr, this_size, type, mmu_idx, r, ra);
}
@@ -2048,6 +2039,56 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
return ret_be;
}
+static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t ret_be, vaddr addr, int size,
+ int mmu_idx, MMUAccessType type, uintptr_t ra)
+{
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
+ hwaddr mr_offset;
+ MemTxAttrs attrs;
+ uint64_t ret;
+
+ tcg_debug_assert(size > 0 && size <= 8);
+
+ attrs = full->attrs;
+ section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+ mr = section->mr;
+
+ qemu_mutex_lock_iothread();
+ ret = int_ld_mmio_beN(env, full, ret_be, addr, size, mmu_idx,
+ type, ra, mr, mr_offset);
+ qemu_mutex_unlock_iothread();
+
+ return ret;
+}
+
+static Int128 do_ld16_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t ret_be, vaddr addr, int size,
+ int mmu_idx, uintptr_t ra)
+{
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
+ hwaddr mr_offset;
+ MemTxAttrs attrs;
+ uint64_t a, b;
+
+ tcg_debug_assert(size > 8 && size <= 16);
+
+ attrs = full->attrs;
+ section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+ mr = section->mr;
+
+ qemu_mutex_lock_iothread();
+ a = int_ld_mmio_beN(env, full, ret_be, addr, size - 8, mmu_idx,
+ MMU_DATA_LOAD, ra, mr, mr_offset);
+ b = int_ld_mmio_beN(env, full, ret_be, addr + size - 8, 8, mmu_idx,
+ MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
+ qemu_mutex_unlock_iothread();
+
+ return int128_make128(b, a);
+}
+
/**
* do_ld_bytes_beN
* @p: translation parameters
@@ -2190,7 +2231,6 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
unsigned tmp, half_size;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size,
mmu_idx, type, ra);
}
@@ -2241,12 +2281,7 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
MemOp atom;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
- a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8,
- mmu_idx, MMU_DATA_LOAD, ra);
- b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8,
- mmu_idx, MMU_DATA_LOAD, ra);
- return int128_make128(b, a);
+ return do_ld16_mmio_beN(env, p->full, a, p->addr, size, mmu_idx, ra);
}
/*
@@ -2291,7 +2326,6 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
MMUAccessType type, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
return do_ld_mmio_beN(env, p->full, 0, p->addr, 1, mmu_idx, type, ra);
} else {
return *(uint8_t *)p->haddr;
@@ -2304,7 +2338,6 @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint16_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra);
if ((memop & MO_BSWAP) == MO_LE) {
ret = bswap16(ret);
@@ -2325,7 +2358,6 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint32_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra);
if ((memop & MO_BSWAP) == MO_LE) {
ret = bswap32(ret);
@@ -2346,7 +2378,6 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint64_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra);
if ((memop & MO_BSWAP) == MO_LE) {
ret = bswap64(ret);
@@ -2505,12 +2536,8 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
if (likely(!crosspage)) {
if (unlikely(l.page[0].flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
- a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8,
- l.mmu_idx, MMU_DATA_LOAD, ra);
- b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8,
- l.mmu_idx, MMU_DATA_LOAD, ra);
- ret = int128_make128(b, a);
+ ret = do_ld16_mmio_beN(env, l.page[0].full, 0, addr, 16,
+ l.mmu_idx, ra);
if ((l.memop & MO_BSWAP) == MO_LE) {
ret = bswap128(ret);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 10/10] accel/tcg: Introduce do_st16_mmio_leN
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (8 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 09/10] accel/tcg: Introduce do_ld16_mmio_beN Richard Henderson
@ 2023-08-28 18:55 ` Richard Henderson
2023-09-09 21:02 ` [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
10 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2023-08-28 18:55 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Split out int_st_mmio_leN, to be used by both do_st_mmio_leN
and do_st16_mmio_leN. Move the locks down into the two
functions, since each one now covers all accesses to once page.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 88 ++++++++++++++++++++++++++++++----------------
1 file changed, 58 insertions(+), 30 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 8a7e386415..ff253e78d2 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2678,21 +2678,11 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
* The bytes to store are extracted in little-endian order from @val_le;
* return the bytes of @val_le beyond @p->size that have not been stored.
*/
-static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
- uint64_t val_le, vaddr addr, int size,
- int mmu_idx, uintptr_t ra)
+static uint64_t int_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t val_le, vaddr addr, int size,
+ int mmu_idx, uintptr_t ra,
+ MemoryRegion *mr, hwaddr mr_offset)
{
- MemoryRegionSection *section;
- hwaddr mr_offset;
- MemoryRegion *mr;
- MemTxAttrs attrs;
-
- tcg_debug_assert(size > 0 && size <= 8);
-
- attrs = full->attrs;
- section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
- mr = section->mr;
-
do {
MemOp this_mop;
unsigned this_size;
@@ -2704,7 +2694,7 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
this_mop |= MO_LE;
r = memory_region_dispatch_write(mr, mr_offset, val_le,
- this_mop, attrs);
+ this_mop, full->attrs);
if (unlikely(r != MEMTX_OK)) {
io_failed(env, full, addr, this_size, MMU_DATA_STORE,
mmu_idx, r, ra);
@@ -2722,6 +2712,56 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
return val_le;
}
+static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t val_le, vaddr addr, int size,
+ int mmu_idx, uintptr_t ra)
+{
+ MemoryRegionSection *section;
+ hwaddr mr_offset;
+ MemoryRegion *mr;
+ MemTxAttrs attrs;
+ uint64_t ret;
+
+ tcg_debug_assert(size > 0 && size <= 8);
+
+ attrs = full->attrs;
+ section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+ mr = section->mr;
+
+ qemu_mutex_lock_iothread();
+ ret = int_st_mmio_leN(env, full, val_le, addr, size, mmu_idx,
+ ra, mr, mr_offset);
+ qemu_mutex_unlock_iothread();
+
+ return ret;
+}
+
+static uint64_t do_st16_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+ Int128 val_le, vaddr addr, int size,
+ int mmu_idx, uintptr_t ra)
+{
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
+ hwaddr mr_offset;
+ MemTxAttrs attrs;
+ uint64_t ret;
+
+ tcg_debug_assert(size > 8 && size <= 16);
+
+ attrs = full->attrs;
+ section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+ mr = section->mr;
+
+ qemu_mutex_lock_iothread();
+ int_st_mmio_leN(env, full, int128_getlo(val_le), addr, 8,
+ mmu_idx, ra, mr, mr_offset);
+ ret = int_st_mmio_leN(env, full, int128_gethi(val_le), addr + 8,
+ size - 8, mmu_idx, ra, mr, mr_offset + 8);
+ qemu_mutex_unlock_iothread();
+
+ return ret;
+}
+
/*
* Wrapper for the above.
*/
@@ -2733,7 +2773,6 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
unsigned tmp, half_size;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
return do_st_mmio_leN(env, p->full, val_le, p->addr,
p->size, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
@@ -2788,11 +2827,8 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
MemOp atom;
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
- do_st_mmio_leN(env, p->full, int128_getlo(val_le),
- p->addr, 8, mmu_idx, ra);
- return do_st_mmio_leN(env, p->full, int128_gethi(val_le),
- p->addr + 8, size - 8, mmu_idx, ra);
+ return do_st16_mmio_leN(env, p->full, val_le, p->addr,
+ size, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
return int128_gethi(val_le) >> ((size - 8) * 8);
}
@@ -2836,7 +2872,6 @@ static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
int mmu_idx, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 1, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@@ -2852,7 +2887,6 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
if ((memop & MO_BSWAP) != MO_LE) {
val = bswap16(val);
}
- QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@@ -2872,7 +2906,6 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
if ((memop & MO_BSWAP) != MO_LE) {
val = bswap32(val);
}
- QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@@ -2892,7 +2925,6 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
if ((memop & MO_BSWAP) != MO_LE) {
val = bswap64(val);
}
- QEMU_IOTHREAD_LOCK_GUARD();
do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
@@ -3020,11 +3052,7 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
if ((l.memop & MO_BSWAP) != MO_LE) {
val = bswap128(val);
}
- a = int128_getlo(val);
- b = int128_gethi(val);
- QEMU_IOTHREAD_LOCK_GUARD();
- do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra);
- do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra);
+ do_st16_mmio_leN(env, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
} else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
--
2.34.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH 00/10] plugin and tcg cleanups to cputlb.c.
2023-08-28 18:55 [PATCH 00/10] plugin and tcg cleanups to cputlb.c Richard Henderson
` (9 preceding siblings ...)
2023-08-28 18:55 ` [PATCH 10/10] accel/tcg: Introduce do_st16_mmio_leN Richard Henderson
@ 2023-09-09 21:02 ` Richard Henderson
10 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2023-09-09 21:02 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.bennee
Ping. Patches 1, 6-10 still need review.
r~
On 8/28/23 11:55, Richard Henderson wrote:
> Based-on: 20230826232415.80233-1-richard.henderson@linaro.org
> ("[PATCH 0/3] softmmu: Use async_run_on_cpu in tcg_commit")
>
> Remove the SaveIOTLB hackery, now that the flush won't happen
> until the TB. Clean up the locking, so that we don't try to
> take the lock twice. Clean up the iotlb lookup so that we only
> perform it once per page, rather than for each aligned piece.
>
>
> r~
>
>
> Richard Henderson (10):
> accel/tcg: Simplify tlb_plugin_lookup
> accel/tcg: Split out io_prepare and io_failed
> accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed
> plugin: Simplify struct qemu_plugin_hwaddr
> accel/tcg: Merge cpu_transaction_failed into io_failed
> accel/tcg: Replace direct use of io_readx/io_writex in do_{ld,st}_1
> accel/tcg: Merge io_readx into do_ld_mmio_beN
> accel/tcg: Merge io_writex into do_st_mmio_leN
> accel/tcg: Introduce do_ld16_mmio_beN
> accel/tcg: Introduce do_st16_mmio_leN
>
> include/hw/core/cpu.h | 13 --
> include/qemu/plugin-memory.h | 11 +-
> include/qemu/typedefs.h | 1 -
> accel/tcg/cputlb.c | 426 +++++++++++++++++------------------
> plugins/api.c | 27 +--
> 5 files changed, 212 insertions(+), 266 deletions(-)
>
^ permalink raw reply [flat|nested] 17+ messages in thread