[PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o
@ 2023-08-01 18:42 Richard Henderson
  2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
  To: qemu-devel

Fixing #1800, and possibly more.

r~

Richard Henderson (3):
  accel/tcg: Adjust parameters and locking with do_{ld,st}_mmio_*
  accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_*
  accel/tcg: Do not issue misaligned i/o

 accel/tcg/cputlb.c | 251 ++++++++++++++++++++++++++++++---------------
 1 file changed, 169 insertions(+), 82 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_*
  2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
@ 2023-08-01 18:42 ` Richard Henderson
  2023-08-01 21:09   ` Philippe Mathieu-Daudé
  2023-08-01 18:42 ` [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* Richard Henderson
  2023-08-01 18:42 ` [PATCH 3/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
  2 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
  To: qemu-devel

Replace MMULookupPageData* with CPUTLBEntryFull, addr, size.
Move QEMU_IOTHREAD_LOCK_GUARD to the caller.

This simplifies the usage from do_ld16_beN and do_st16_leN, where
we weren't locking the entire operation, and required hoop jumping
for passing addr and size.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 65 +++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index ba44501a7c..d28606b93e 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2066,24 +2066,21 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
 /**
  * do_ld_mmio_beN:
  * @env: cpu context
- * @p: translation parameters
+ * @full: page parameters
  * @ret_be: accumulated data
+ * @addr: virtual address
+ * @size: number of bytes
  * @mmu_idx: virtual address context
  * @ra: return address into tcg generated code, or 0
  *
- * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
+ * Load @size bytes from @addr, which is memory-mapped i/o.
  * The bytes are concatenated in big-endian order with @ret_be.
  */
-static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
-                               uint64_t ret_be, int mmu_idx,
-                               MMUAccessType type, uintptr_t ra)
+static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+                               uint64_t ret_be, vaddr addr, int size,
+                               int mmu_idx, MMUAccessType type, uintptr_t ra)
 {
-    CPUTLBEntryFull *full = p->full;
-    vaddr addr = p->addr;
-    int i, size = p->size;
-
-    QEMU_IOTHREAD_LOCK_GUARD();
-    for (i = 0; i < size; i++) {
+    for (int i = 0; i < size; i++) {
         uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
         ret_be = (ret_be << 8) | x;
     }
@@ -2232,7 +2229,9 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
     unsigned tmp, half_size;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size,
+                              mmu_idx, type, ra);
     }
 
     /*
@@ -2281,11 +2280,11 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
     MemOp atom;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        p->size = size - 8;
-        a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
-        p->addr += p->size;
-        p->size = 8;
-        b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8,
+                           mmu_idx, MMU_DATA_LOAD, ra);
+        b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8,
+                           mmu_idx, MMU_DATA_LOAD, ra);
         return int128_make128(b, a);
     }
 
@@ -2664,24 +2663,22 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
 /**
  * do_st_mmio_leN:
  * @env: cpu context
- * @p: translation parameters
+ * @full: page parameters
  * @val_le: data to store
+ * @addr: virtual address
+ * @size: number of bytes
  * @mmu_idx: virtual address context
  * @ra: return address into tcg generated code, or 0
  *
- * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
+ * Store @size bytes at @addr, which is memory-mapped i/o.
  * The bytes to store are extracted in little-endian order from @val_le;
  * return the bytes of @val_le beyond @p->size that have not been stored.
  */
-static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
-                               uint64_t val_le, int mmu_idx, uintptr_t ra)
+static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+                               uint64_t val_le, vaddr addr, int size,
+                               int mmu_idx, uintptr_t ra)
 {
-    CPUTLBEntryFull *full = p->full;
-    vaddr addr = p->addr;
-    int i, size = p->size;
-
-    QEMU_IOTHREAD_LOCK_GUARD();
-    for (i = 0; i < size; i++, val_le >>= 8) {
+    for (int i = 0; i < size; i++, val_le >>= 8) {
         io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
     }
     return val_le;
@@ -2698,7 +2695,9 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
     unsigned tmp, half_size;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        return do_st_mmio_leN(env, p->full, val_le, p->addr,
+                              p->size, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         return val_le >> (p->size * 8);
     }
@@ -2751,11 +2750,11 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
     MemOp atom;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        p->size = 8;
-        do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
-        p->size = size - 8;
-        p->addr += 8;
-        return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        do_st_mmio_leN(env, p->full, int128_getlo(val_le),
+                       p->addr, 8, mmu_idx, ra);
+        return do_st_mmio_leN(env, p->full, int128_gethi(val_le),
+                              p->addr + 8, size - 8, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         return int128_gethi(val_le) >> ((size - 8) * 8);
     }
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld,  st}_mmio_*
  2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
@ 2023-08-01 21:09   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 7+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-01 21:09 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

Hi Richard,

On 1/8/23 20:42, Richard Henderson wrote:
> Replace MMULookupPageData* with CPUTLBEntryFull, addr, size.
> Move QEMU_IOTHREAD_LOCK_GUARD to the caller.
> 
> This simplifies the usage from do_ld16_beN and do_st16_leN, where
> we weren't locking the entire operation, and required hoop jumping
> for passing addr and size.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   accel/tcg/cputlb.c | 65 +++++++++++++++++++++++-----------------------
>   1 file changed, 32 insertions(+), 33 deletions(-)
> 
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index ba44501a7c..d28606b93e 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -2066,24 +2066,21 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
>   /**
>    * do_ld_mmio_beN:
>    * @env: cpu context
> - * @p: translation parameters
> + * @full: page parameters
>    * @ret_be: accumulated data
> + * @addr: virtual address
> + * @size: number of bytes
>    * @mmu_idx: virtual address context
>    * @ra: return address into tcg generated code, or 0
>    *
> - * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
> + * Load @size bytes from @addr, which is memory-mapped i/o.
>    * The bytes are concatenated in big-endian order with @ret_be.

Do you mind adding:

      * Called with iothread lock held.

here and in do_st_mmio_leN()?

Otherwise:
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>

>    */



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_*
  2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
  2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
@ 2023-08-01 18:42 ` Richard Henderson
  2023-08-01 21:12   ` Philippe Mathieu-Daudé
  2023-08-01 18:42 ` [PATCH 3/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
  2 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
  To: qemu-devel

If the address and size are aligned, send larger chunks
to the memory subsystem.  This will be required to make
more use of these helpers.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 76 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index d28606b93e..c3e1fdbf37 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2080,10 +2080,40 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
                                uint64_t ret_be, vaddr addr, int size,
                                int mmu_idx, MMUAccessType type, uintptr_t ra)
 {
-    for (int i = 0; i < size; i++) {
-        uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
-        ret_be = (ret_be << 8) | x;
-    }
+    uint64_t t;
+
+    tcg_debug_assert(size > 0 && size <= 8);
+    do {
+        /* Read aligned pieces up to 8 bytes. */
+        switch ((size | (int)addr) & 7) {
+        case 1:
+        case 3:
+        case 5:
+        case 7:
+            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
+            ret_be = (ret_be << 8) | t;
+            size -= 1;
+            addr += 1;
+            break;
+        case 2:
+        case 6:
+            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
+            ret_be = (ret_be << 16) | t;
+            size -= 2;
+            addr += 2;
+            break;
+        case 4:
+            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
+            ret_be = (ret_be << 32) | t;
+            size -= 4;
+            addr += 4;
+            break;
+        case 0:
+            return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
+        default:
+            qemu_build_not_reached();
+        }
+    } while (size);
     return ret_be;
 }
 
@@ -2678,9 +2708,41 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
                                uint64_t val_le, vaddr addr, int size,
                                int mmu_idx, uintptr_t ra)
 {
-    for (int i = 0; i < size; i++, val_le >>= 8) {
-        io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
-    }
+    tcg_debug_assert(size > 0 && size <= 8);
+
+    do {
+        /* Store aligned pieces up to 8 bytes. */
+        switch ((size | (int)addr) & 7) {
+        case 1:
+        case 3:
+        case 5:
+        case 7:
+            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB);
+            val_le >>= 8;
+            size -= 1;
+            addr += 1;
+            break;
+        case 2:
+        case 6:
+            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW);
+            val_le >>= 16;
+            size -= 2;
+            addr += 2;
+            break;
+        case 4:
+            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL);
+            val_le >>= 32;
+            size -= 4;
+            addr += 4;
+            break;
+        case 0:
+            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ);
+            return 0;
+        default:
+            qemu_build_not_reached();
+        }
+    } while (size);
+
     return val_le;
 }
 
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_*
  2023-08-01 18:42 ` [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* Richard Henderson
@ 2023-08-01 21:12   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 7+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-01 21:12 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 1/8/23 20:42, Richard Henderson wrote:
> If the address and size are aligned, send larger chunks
> to the memory subsystem.  This will be required to make
> more use of these helpers.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   accel/tcg/cputlb.c | 76 +++++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 69 insertions(+), 7 deletions(-)

Super nice!

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] accel/tcg: Do not issue misaligned i/o
  2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
  2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
  2023-08-01 18:42 ` [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* Richard Henderson
@ 2023-08-01 18:42 ` Richard Henderson
  2023-08-01 21:22   ` Philippe Mathieu-Daudé
  2 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-01 18:42 UTC (permalink / raw)
  To: qemu-devel

In the single-page case we were issuing misaligned i/o to
the memory subsystem, which does not handle it properly.
Split such accesses via do_{ld,st}_mmio_*.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1800
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 118 +++++++++++++++++++++++++++------------------
 1 file changed, 72 insertions(+), 46 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index c3e1fdbf37..05d272f839 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2369,16 +2369,20 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
 static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
                         MMUAccessType type, MemOp memop, uintptr_t ra)
 {
-    uint64_t ret;
+    uint16_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
-    }
-
-    /* Perform the load host endian, then swap if necessary. */
-    ret = load_atom_2(env, ra, p->haddr, memop);
-    if (memop & MO_BSWAP) {
-        ret = bswap16(ret);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra);
+        if ((memop & MO_BSWAP) == MO_LE) {
+            ret = bswap16(ret);
+        }
+    } else {
+        /* Perform the load host endian, then swap if necessary. */
+        ret = load_atom_2(env, ra, p->haddr, memop);
+        if (memop & MO_BSWAP) {
+            ret = bswap16(ret);
+        }
     }
     return ret;
 }
@@ -2389,13 +2393,17 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint32_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
-    }
-
-    /* Perform the load host endian. */
-    ret = load_atom_4(env, ra, p->haddr, memop);
-    if (memop & MO_BSWAP) {
-        ret = bswap32(ret);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra);
+        if ((memop & MO_BSWAP) == MO_LE) {
+            ret = bswap32(ret);
+        }
+    } else {
+        /* Perform the load host endian. */
+        ret = load_atom_4(env, ra, p->haddr, memop);
+        if (memop & MO_BSWAP) {
+            ret = bswap32(ret);
+        }
     }
     return ret;
 }
@@ -2406,13 +2414,17 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint64_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
-    }
-
-    /* Perform the load host endian. */
-    ret = load_atom_8(env, ra, p->haddr, memop);
-    if (memop & MO_BSWAP) {
-        ret = bswap64(ret);
+        QEMU_IOTHREAD_LOCK_GUARD();
+        ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra);
+        if ((memop & MO_BSWAP) == MO_LE) {
+            ret = bswap64(ret);
+        }
+    } else {
+        /* Perform the load host endian. */
+        ret = load_atom_8(env, ra, p->haddr, memop);
+        if (memop & MO_BSWAP) {
+            ret = bswap64(ret);
+        }
     }
     return ret;
 }
@@ -2560,20 +2572,22 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
     if (likely(!crosspage)) {
-        /* Perform the load host endian. */
         if (unlikely(l.page[0].flags & TLB_MMIO)) {
             QEMU_IOTHREAD_LOCK_GUARD();
-            a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
-                         ra, MMU_DATA_LOAD, MO_64);
-            b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
-                         ra, MMU_DATA_LOAD, MO_64);
-            ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
-                                 HOST_BIG_ENDIAN ? a : b);
+            a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8,
+                               l.mmu_idx, MMU_DATA_LOAD, ra);
+            b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8,
+                               l.mmu_idx, MMU_DATA_LOAD, ra);
+            ret = int128_make128(b, a);
+            if ((l.memop & MO_BSWAP) == MO_LE) {
+                ret = bswap128(ret);
+            }
         } else {
+            /* Perform the load host endian. */
             ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
-        }
-        if (l.memop & MO_BSWAP) {
-            ret = bswap128(ret);
+            if (l.memop & MO_BSWAP) {
+                ret = bswap128(ret);
+            }
         }
         return ret;
     }
@@ -2872,7 +2886,11 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
                     int mmu_idx, MemOp memop, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+        if ((memop & MO_BSWAP) != MO_LE) {
+            val = bswap16(val);
+        }
+        QEMU_IOTHREAD_LOCK_GUARD();
+        do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
     } else {
@@ -2888,7 +2906,11 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
                     int mmu_idx, MemOp memop, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+        if ((memop & MO_BSWAP) != MO_LE) {
+            val = bswap32(val);
+        }
+        QEMU_IOTHREAD_LOCK_GUARD();
+        do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
     } else {
@@ -2904,7 +2926,11 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
                     int mmu_idx, MemOp memop, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+        if ((memop & MO_BSWAP) != MO_LE) {
+            val = bswap64(val);
+        }
+        QEMU_IOTHREAD_LOCK_GUARD();
+        do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
     } else {
@@ -3027,22 +3053,22 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
     if (likely(!crosspage)) {
-        /* Swap to host endian if necessary, then store. */
-        if (l.memop & MO_BSWAP) {
-            val = bswap128(val);
-        }
         if (unlikely(l.page[0].flags & TLB_MMIO)) {
-            QEMU_IOTHREAD_LOCK_GUARD();
-            if (HOST_BIG_ENDIAN) {
-                b = int128_getlo(val), a = int128_gethi(val);
-            } else {
-                a = int128_getlo(val), b = int128_gethi(val);
+            if ((l.memop & MO_BSWAP) != MO_LE) {
+                val = bswap128(val);
             }
-            io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
-            io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
+            a = int128_getlo(val);
+            b = int128_gethi(val);
+            QEMU_IOTHREAD_LOCK_GUARD();
+            do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra);
+            do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra);
         } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
             /* nothing */
         } else {
+            /* Swap to host endian if necessary, then store. */
+            if (l.memop & MO_BSWAP) {
+                val = bswap128(val);
+            }
             store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
         }
         return;
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] accel/tcg: Do not issue misaligned i/o
  2023-08-01 18:42 ` [PATCH 3/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
@ 2023-08-01 21:22   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 7+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-01 21:22 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 1/8/23 20:42, Richard Henderson wrote:
> In the single-page case we were issuing misaligned i/o to
> the memory subsystem, which does not handle it properly.
> Split such accesses via do_{ld,st}_mmio_*.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1800
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   accel/tcg/cputlb.c | 118 +++++++++++++++++++++++++++------------------
>   1 file changed, 72 insertions(+), 46 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-08-01 21:23 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-01 18:42 [PATCH for-8.1 0/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
2023-08-01 18:42 ` [PATCH 1/3] accel/tcg: Adjust parameters and locking with do_{ld, st}_mmio_* Richard Henderson
2023-08-01 21:09   ` Philippe Mathieu-Daudé
2023-08-01 18:42 ` [PATCH 2/3] accel/tcg: Issue wider aligned i/o in do_{ld,st}_mmio_* Richard Henderson
2023-08-01 21:12   ` Philippe Mathieu-Daudé
2023-08-01 18:42 ` [PATCH 3/3] accel/tcg: Do not issue misaligned i/o Richard Henderson
2023-08-01 21:22   ` Philippe Mathieu-Daudé

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).