* [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_*
2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
@ 2023-08-01 18:42 ` Richard Henderson
2023-08-01 21:09 ` Philippe Mathieu-Daudé
2023-08-01 18:42 ` [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* Richard Henderson
2023-08-01 18:42 ` [PATCH 3/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
2 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
To: qemu-devel
Replace MMULookupPageData* with CPUTLBEntryFull, addr, size.
Move QEMU_IOTHREAD_LOCK_GUARD to the caller.
This simplifies the usage from do_ld16_beN and do_st16_leN, where
we weren't locking the entire operation, and required hoop jumping
for passing addr and size.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 65 +++++++++++++++++++++++-----------------------
1 file changed, 32 insertions(+), 33 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index ba44501a7c..d28606b93e 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2066,24 +2066,21 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
/**
* do_ld_mmio_beN:
* @env: cpu context
- * @p: translation parameters
+ * @full: page parameters
* @ret_be: accumulated data
+ * @addr: virtual address
+ * @size: number of bytes
* @mmu_idx: virtual address context
* @ra: return address into tcg generated code, or 0
*
- * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
+ * Load @size bytes from @addr, which is memory-mapped i/o.
* The bytes are concatenated in big-endian order with @ret_be.
*/
-static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
- uint64_t ret_be, int mmu_idx,
- MMUAccessType type, uintptr_t ra)
+static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t ret_be, vaddr addr, int size,
+ int mmu_idx, MMUAccessType type, uintptr_t ra)
{
- CPUTLBEntryFull *full = p->full;
- vaddr addr = p->addr;
- int i, size = p->size;
-
- QEMU_IOTHREAD_LOCK_GUARD();
- for (i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
ret_be = (ret_be << 8) | x;
}
@@ -2232,7 +2229,9 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
unsigned tmp, half_size;
if (unlikely(p->flags & TLB_MMIO)) {
- return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size,
+ mmu_idx, type, ra);
}
/*
@@ -2281,11 +2280,11 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
MemOp atom;
if (unlikely(p->flags & TLB_MMIO)) {
- p->size = size - 8;
- a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
- p->addr += p->size;
- p->size = 8;
- b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8,
+ mmu_idx, MMU_DATA_LOAD, ra);
+ b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8,
+ mmu_idx, MMU_DATA_LOAD, ra);
return int128_make128(b, a);
}
@@ -2664,24 +2663,22 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
/**
* do_st_mmio_leN:
* @env: cpu context
- * @p: translation parameters
+ * @full: page parameters
* @val_le: data to store
+ * @addr: virtual address
+ * @size: number of bytes
* @mmu_idx: virtual address context
* @ra: return address into tcg generated code, or 0
*
- * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
+ * Store @size bytes at @addr, which is memory-mapped i/o.
* The bytes to store are extracted in little-endian order from @val_le;
* return the bytes of @val_le beyond @p->size that have not been stored.
*/
-static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
- uint64_t val_le, int mmu_idx, uintptr_t ra)
+static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+ uint64_t val_le, vaddr addr, int size,
+ int mmu_idx, uintptr_t ra)
{
- CPUTLBEntryFull *full = p->full;
- vaddr addr = p->addr;
- int i, size = p->size;
-
- QEMU_IOTHREAD_LOCK_GUARD();
- for (i = 0; i < size; i++, val_le >>= 8) {
+ for (int i = 0; i < size; i++, val_le >>= 8) {
io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
}
return val_le;
@@ -2698,7 +2695,9 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
unsigned tmp, half_size;
if (unlikely(p->flags & TLB_MMIO)) {
- return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ return do_st_mmio_leN(env, p->full, val_le, p->addr,
+ p->size, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
return val_le >> (p->size * 8);
}
@@ -2751,11 +2750,11 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
MemOp atom;
if (unlikely(p->flags & TLB_MMIO)) {
- p->size = 8;
- do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
- p->size = size - 8;
- p->addr += 8;
- return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ do_st_mmio_leN(env, p->full, int128_getlo(val_le),
+ p->addr, 8, mmu_idx, ra);
+ return do_st_mmio_leN(env, p->full, int128_gethi(val_le),
+ p->addr + 8, size - 8, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
return int128_gethi(val_le) >> ((size - 8) * 8);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_*
2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
@ 2023-08-01 18:42 ` Richard Henderson
2023-08-01 21:12 ` Philippe Mathieu-Daudé
2023-08-01 18:42 ` [PATCH 3/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
2 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
To: qemu-devel
If the address and size are aligned, send larger chunks
to the memory subsystem. This will be required to make
more use of these helpers.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 76 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 69 insertions(+), 7 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index d28606b93e..c3e1fdbf37 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2080,10 +2080,40 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t ret_be, vaddr addr, int size,
int mmu_idx, MMUAccessType type, uintptr_t ra)
{
- for (int i = 0; i < size; i++) {
- uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
- ret_be = (ret_be << 8) | x;
- }
+ uint64_t t;
+
+ tcg_debug_assert(size > 0 && size <= 8);
+ do {
+ /* Read aligned pieces up to 8 bytes. */
+ switch ((size | (int)addr) & 7) {
+ case 1:
+ case 3:
+ case 5:
+ case 7:
+ t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
+ ret_be = (ret_be << 8) | t;
+ size -= 1;
+ addr += 1;
+ break;
+ case 2:
+ case 6:
+ t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
+ ret_be = (ret_be << 16) | t;
+ size -= 2;
+ addr += 2;
+ break;
+ case 4:
+ t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
+ ret_be = (ret_be << 32) | t;
+ size -= 4;
+ addr += 4;
+ break;
+ case 0:
+ return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
+ default:
+ qemu_build_not_reached();
+ }
+ } while (size);
return ret_be;
}
@@ -2678,9 +2708,41 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
uint64_t val_le, vaddr addr, int size,
int mmu_idx, uintptr_t ra)
{
- for (int i = 0; i < size; i++, val_le >>= 8) {
- io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
- }
+ tcg_debug_assert(size > 0 && size <= 8);
+
+ do {
+ /* Store aligned pieces up to 8 bytes. */
+ switch ((size | (int)addr) & 7) {
+ case 1:
+ case 3:
+ case 5:
+ case 7:
+ io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB);
+ val_le >>= 8;
+ size -= 1;
+ addr += 1;
+ break;
+ case 2:
+ case 6:
+ io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW);
+ val_le >>= 16;
+ size -= 2;
+ addr += 2;
+ break;
+ case 4:
+ io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL);
+ val_le >>= 32;
+ size -= 4;
+ addr += 4;
+ break;
+ case 0:
+ io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ);
+ return 0;
+ default:
+ qemu_build_not_reached();
+ }
+ } while (size);
+
return val_le;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] accel/tcg: Do not issue misaligned i/o
2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
2023-08-01 18:42 ` [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* Richard Henderson
@ 2023-08-01 18:42 ` Richard Henderson
2023-08-01 21:22 ` Philippe Mathieu-Daudé
2 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
To: qemu-devel
In the single-page case we were issuing misaligned i/o to
the memory subsystem, which does not handle it properly.
Split such accesses via do_{ld,st}_mmio_*.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1800
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 118 +++++++++++++++++++++++++++------------------
1 file changed, 72 insertions(+), 46 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index c3e1fdbf37..05d272f839 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2369,16 +2369,20 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
MMUAccessType type, MemOp memop, uintptr_t ra)
{
- uint64_t ret;
+ uint16_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
- return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
- }
-
- /* Perform the load host endian, then swap if necessary. */
- ret = load_atom_2(env, ra, p->haddr, memop);
- if (memop & MO_BSWAP) {
- ret = bswap16(ret);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra);
+ if ((memop & MO_BSWAP) == MO_LE) {
+ ret = bswap16(ret);
+ }
+ } else {
+ /* Perform the load host endian, then swap if necessary. */
+ ret = load_atom_2(env, ra, p->haddr, memop);
+ if (memop & MO_BSWAP) {
+ ret = bswap16(ret);
+ }
}
return ret;
}
@@ -2389,13 +2393,17 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint32_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
- return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
- }
-
- /* Perform the load host endian. */
- ret = load_atom_4(env, ra, p->haddr, memop);
- if (memop & MO_BSWAP) {
- ret = bswap32(ret);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra);
+ if ((memop & MO_BSWAP) == MO_LE) {
+ ret = bswap32(ret);
+ }
+ } else {
+ /* Perform the load host endian. */
+ ret = load_atom_4(env, ra, p->haddr, memop);
+ if (memop & MO_BSWAP) {
+ ret = bswap32(ret);
+ }
}
return ret;
}
@@ -2406,13 +2414,17 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
uint64_t ret;
if (unlikely(p->flags & TLB_MMIO)) {
- return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
- }
-
- /* Perform the load host endian. */
- ret = load_atom_8(env, ra, p->haddr, memop);
- if (memop & MO_BSWAP) {
- ret = bswap64(ret);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra);
+ if ((memop & MO_BSWAP) == MO_LE) {
+ ret = bswap64(ret);
+ }
+ } else {
+ /* Perform the load host endian. */
+ ret = load_atom_8(env, ra, p->haddr, memop);
+ if (memop & MO_BSWAP) {
+ ret = bswap64(ret);
+ }
}
return ret;
}
@@ -2560,20 +2572,22 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
if (likely(!crosspage)) {
- /* Perform the load host endian. */
if (unlikely(l.page[0].flags & TLB_MMIO)) {
QEMU_IOTHREAD_LOCK_GUARD();
- a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
- ra, MMU_DATA_LOAD, MO_64);
- b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
- ra, MMU_DATA_LOAD, MO_64);
- ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
- HOST_BIG_ENDIAN ? a : b);
+ a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8,
+ l.mmu_idx, MMU_DATA_LOAD, ra);
+ b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8,
+ l.mmu_idx, MMU_DATA_LOAD, ra);
+ ret = int128_make128(b, a);
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = bswap128(ret);
+ }
} else {
+ /* Perform the load host endian. */
ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
- }
- if (l.memop & MO_BSWAP) {
- ret = bswap128(ret);
+ if (l.memop & MO_BSWAP) {
+ ret = bswap128(ret);
+ }
}
return ret;
}
@@ -2872,7 +2886,11 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
int mmu_idx, MemOp memop, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ if ((memop & MO_BSWAP) != MO_LE) {
+ val = bswap16(val);
+ }
+ QEMU_IOTHREAD_LOCK_GUARD();
+ do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
@@ -2888,7 +2906,11 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
int mmu_idx, MemOp memop, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ if ((memop & MO_BSWAP) != MO_LE) {
+ val = bswap32(val);
+ }
+ QEMU_IOTHREAD_LOCK_GUARD();
+ do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
@@ -2904,7 +2926,11 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
int mmu_idx, MemOp memop, uintptr_t ra)
{
if (unlikely(p->flags & TLB_MMIO)) {
- io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ if ((memop & MO_BSWAP) != MO_LE) {
+ val = bswap64(val);
+ }
+ QEMU_IOTHREAD_LOCK_GUARD();
+ do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra);
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
@@ -3027,22 +3053,22 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
- /* Swap to host endian if necessary, then store. */
- if (l.memop & MO_BSWAP) {
- val = bswap128(val);
- }
if (unlikely(l.page[0].flags & TLB_MMIO)) {
- QEMU_IOTHREAD_LOCK_GUARD();
- if (HOST_BIG_ENDIAN) {
- b = int128_getlo(val), a = int128_gethi(val);
- } else {
- a = int128_getlo(val), b = int128_gethi(val);
+ if ((l.memop & MO_BSWAP) != MO_LE) {
+ val = bswap128(val);
}
- io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
- io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
+ a = int128_getlo(val);
+ b = int128_gethi(val);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra);
+ do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra);
} else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
/* nothing */
} else {
+ /* Swap to host endian if necessary, then store. */
+ if (l.memop & MO_BSWAP) {
+ val = bswap128(val);
+ }
store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
}
return;
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread