qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Add support for PowerPC e500 little-endian pages
@ 2025-08-21 11:33 Danila Zhebryakov
  2025-08-21 11:33 ` [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops Danila Zhebryakov
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Danila Zhebryakov @ 2025-08-21 11:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth, Richard Henderson, Danila Zhebryakov

These changes make powerpc booke206 mmu pass TLB_BSWAP flag for pages marked as LE, and also fixes all the issues this causes.
 - added TLB_BSWAP to non-MMIO flags
 - inserted additional check for LE page when bswap-ing instructions
 - removed assert for cross-page TLB_BSWAP accesses
 - added gdbstub fix to account for TLB_BSWAP of the currently running code

BE- and LE- atomic operations (used by load-and-reserve and store-conditional instructions) were unified to support TLB_BSWAP

Some of the fixes are based on what is done in https://github.com/YetAnotherMod/qemu

Danila Zhebryakov (3):
  accel/tcg: Unify big- and little- endian atomic ops
  target/ppc: Add support for LE pages on PowerPC booke206 mmu
  target/ppc: fix GDB stub to work correctly with LE pages

 accel/tcg/atomic_common.c.inc       |  27 +--
 accel/tcg/atomic_template.h         | 246 +++++++++-------------------
 accel/tcg/cputlb.c                  |  22 ++-
 accel/tcg/tcg-runtime.h             |  36 +---
 accel/tcg/user-exec.c               |   2 +-
 include/accel/tcg/cpu-ldst-common.h |  39 ++---
 target/m68k/op_helper.c             |   4 +-
 target/ppc/cpu.h                    |   4 +
 target/ppc/gdbstub.c                |  17 +-
 target/ppc/mmu-booke.c              |   5 +
 target/ppc/mmu_helper.c             |  17 +-
 target/ppc/translate.c              |  29 +++-
 target/s390x/tcg/mem_helper.c       |   6 +-
 tcg/tcg-op-ldst.c                   |  31 ++--
 14 files changed, 204 insertions(+), 281 deletions(-)

-- 
2.47.2



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops
  2025-08-21 11:33 [PATCH 0/3] Add support for PowerPC e500 little-endian pages Danila Zhebryakov
@ 2025-08-21 11:33 ` Danila Zhebryakov
  2025-08-22  9:24   ` Richard Henderson
  2025-08-25  0:58   ` Richard Henderson
  2025-08-21 11:33 ` [PATCH 2/3] target/ppc: Add support for LE pages on PowerPC booke206 mmu Danila Zhebryakov
  2025-08-21 11:33 ` [PATCH 3/3] target/ppc: fix GDB stub to work correctly with LE pages Danila Zhebryakov
  2 siblings, 2 replies; 8+ messages in thread
From: Danila Zhebryakov @ 2025-08-21 11:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth, Richard Henderson, Danila Zhebryakov

Remove dedicated LE and BE atomic helpers. Use MO_BSWAP flag of the memop instead.
Adjust atomic_mmu_lookup to respect the TLB_BSWAP flag

Signed-off-by: Danila Zhebryakov <d.zhebryakov@yandex.ru>
---
 accel/tcg/atomic_common.c.inc       |  27 +--
 accel/tcg/atomic_template.h         | 246 +++++++++-------------------
 accel/tcg/cputlb.c                  |  10 +-
 accel/tcg/tcg-runtime.h             |  36 +---
 accel/tcg/user-exec.c               |   2 +-
 include/accel/tcg/cpu-ldst-common.h |  39 ++---
 target/m68k/op_helper.c             |   4 +-
 target/s390x/tcg/mem_helper.c       |   6 +-
 tcg/tcg-op-ldst.c                   |  31 ++--
 9 files changed, 131 insertions(+), 270 deletions(-)

diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
index 6056598c23..4ce3bc3da1 100644
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@@ -42,19 +42,15 @@ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
     { return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
 
 CMPXCHG_HELPER(cmpxchgb, uint32_t)
-CMPXCHG_HELPER(cmpxchgw_be, uint32_t)
-CMPXCHG_HELPER(cmpxchgw_le, uint32_t)
-CMPXCHG_HELPER(cmpxchgl_be, uint32_t)
-CMPXCHG_HELPER(cmpxchgl_le, uint32_t)
+CMPXCHG_HELPER(cmpxchgw, uint32_t)
+CMPXCHG_HELPER(cmpxchgl, uint32_t)
 
 #ifdef CONFIG_ATOMIC64
-CMPXCHG_HELPER(cmpxchgq_be, uint64_t)
-CMPXCHG_HELPER(cmpxchgq_le, uint64_t)
+CMPXCHG_HELPER(cmpxchgq, uint64_t)
 #endif
 
 #if HAVE_CMPXCHG128
-CMPXCHG_HELPER(cmpxchgo_be, Int128)
-CMPXCHG_HELPER(cmpxchgo_le, Int128)
+CMPXCHG_HELPER(cmpxchgo, Int128)
 #endif
 
 #undef CMPXCHG_HELPER
@@ -87,19 +83,14 @@ Int128 HELPER(nonatomic_cmpxchgo)(CPUArchState *env, uint64_t addr,
 #ifdef CONFIG_ATOMIC64
 #define GEN_ATOMIC_HELPERS(OP)              \
     ATOMIC_HELPER(glue(OP,b), uint32_t)     \
-    ATOMIC_HELPER(glue(OP,w_be), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,w_le), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,l_be), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,l_le), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,q_be), uint64_t)  \
-    ATOMIC_HELPER(glue(OP,q_le), uint64_t)
+    ATOMIC_HELPER(glue(OP,w), uint32_t)     \
+    ATOMIC_HELPER(glue(OP,l), uint32_t)     \
+    ATOMIC_HELPER(glue(OP,q), uint64_t)
 #else
 #define GEN_ATOMIC_HELPERS(OP)              \
     ATOMIC_HELPER(glue(OP,b), uint32_t)     \
-    ATOMIC_HELPER(glue(OP,w_be), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,w_le), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,l_be), uint32_t)  \
-    ATOMIC_HELPER(glue(OP,l_le), uint32_t)
+    ATOMIC_HELPER(glue(OP,w), uint32_t)     \
+    ATOMIC_HELPER(glue(OP,l), uint32_t)
 #endif
 
 GEN_ATOMIC_HELPERS(fetch_add)
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 08a475c10c..e51207c498 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -69,63 +69,84 @@
 
 /* Define host-endian atomic operations.  Note that END is used within
    the ATOMIC_NAME macro, and redefined below.  */
-#if DATA_SIZE == 1
 # define END
-#elif HOST_BIG_ENDIAN
-# define END  _be
-#else
-# define END  _le
-#endif
 
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr,
                               ABI_TYPE cmpv, ABI_TYPE newv,
                               MemOpIdx oi, uintptr_t retaddr)
 {
+    bool need_bswap = get_memop(oi) & MO_BSWAP;
     DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
-                                         DATA_SIZE, retaddr);
-    DATA_TYPE ret;
-
+                                         DATA_SIZE, retaddr, &need_bswap);
+    DATA_TYPE ret, ret_e;
+    if (need_bswap) {
+#if DATA_SIZE == 16
+        ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
+#else
+        ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
+#endif
+        ret_e = BSWAP(ret);
+    } else {
 #if DATA_SIZE == 16
-    ret = atomic16_cmpxchg(haddr, cmpv, newv);
+        ret = atomic16_cmpxchg(haddr, cmpv, newv);
 #else
-    ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
+        ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
 #endif
+        ret_e = ret;
+    }
     ATOMIC_MMU_CLEANUP;
     atomic_trace_rmw_post(env, addr,
-                          VALUE_LOW(ret),
-                          VALUE_HIGH(ret),
-                          VALUE_LOW(newv),
-                          VALUE_HIGH(newv),
-                          oi);
-    return ret;
+                        VALUE_LOW(ret),
+                        VALUE_HIGH(ret),
+                        VALUE_LOW(newv),
+                        VALUE_HIGH(newv),
+                        oi);
+    return ret_e;
 }
 
 #if DATA_SIZE < 16
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, vaddr addr, ABI_TYPE val,
                            MemOpIdx oi, uintptr_t retaddr)
 {
-    DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
-                                         DATA_SIZE, retaddr);
-    DATA_TYPE ret;
+    bool need_bswap = get_memop(oi) & MO_BSWAP;
 
-    ret = qatomic_xchg__nocheck(haddr, val);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
+                                         DATA_SIZE, retaddr, &need_bswap);
+    DATA_TYPE ret, ret_e;
+
+    if (need_bswap) {
+        ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
+        ret_e = BSWAP(ret);
+    } else {
+        ret = qatomic_xchg__nocheck(haddr, val);
+        ret_e = ret;
+    }
     ATOMIC_MMU_CLEANUP;
     atomic_trace_rmw_post(env, addr,
-                          VALUE_LOW(ret),
-                          VALUE_HIGH(ret),
-                          VALUE_LOW(val),
-                          VALUE_HIGH(val),
-                          oi);
-    return ret;
+                        VALUE_LOW(ret),
+                        VALUE_HIGH(ret),
+                        VALUE_LOW(val),
+                        VALUE_HIGH(val),
+                        oi);
+    return ret_e;
 }
 
 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr,              \
                         ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    DATA_TYPE *haddr, ret;                                          \
-    haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
-    ret = qatomic_##X(haddr, val);                                  \
+    DATA_TYPE *haddr, ret, ret_e;                                   \
+    bool need_bswap = get_memop(oi) & MO_BSWAP;                     \
+    haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE,    \
+                                             retaddr, &need_bswap); \
+    if (need_bswap) {                                               \
+        ret = qatomic_##X(haddr, BSWAP(val));                       \
+        ret_e = BSWAP(ret);                                         \
+    }                                                               \
+    else {                                                          \
+        ret = qatomic_##X(haddr, val);                              \
+        ret_e = ret;                                                \
+    }                                                               \
     ATOMIC_MMU_CLEANUP;                                             \
     atomic_trace_rmw_post(env, addr,                                \
                           VALUE_LOW(ret),                           \
@@ -133,7 +154,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr,              \
                           VALUE_LOW(val),                           \
                           VALUE_HIGH(val),                          \
                           oi);                                      \
-    return ret;                                                     \
+    return ret_e;                                                   \
 }
 
 GEN_ATOMIC_HELPER(fetch_add)
@@ -155,147 +176,38 @@ GEN_ATOMIC_HELPER(xor_fetch)
  * Trace this load + RMW loop as a single RMW op. This way, regardless
  * of CF_PARALLEL's value, we'll trace just a read and a write.
  */
-#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
-ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr,              \
-                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
-{                                                                   \
-    XDATA_TYPE *haddr, cmp, old, new, val = xval;                   \
-    haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
-    smp_mb();                                                       \
-    cmp = qatomic_read__nocheck(haddr);                             \
-    do {                                                            \
-        old = cmp; new = FN(old, val);                              \
-        cmp = qatomic_cmpxchg__nocheck(haddr, old, new);            \
-    } while (cmp != old);                                           \
-    ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr,                                \
-                          VALUE_LOW(old),                           \
-                          VALUE_HIGH(old),                          \
-                          VALUE_LOW(xval),                          \
-                          VALUE_HIGH(xval),                         \
-                          oi);                                      \
-    return RET;                                                     \
-}
-
-GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
-GEN_ATOMIC_HELPER_FN(fetch_umin, MIN,  DATA_TYPE, old)
-GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
-GEN_ATOMIC_HELPER_FN(fetch_umax, MAX,  DATA_TYPE, old)
 
-GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
-GEN_ATOMIC_HELPER_FN(umin_fetch, MIN,  DATA_TYPE, new)
-GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
-GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
-
-#undef GEN_ATOMIC_HELPER_FN
-#endif /* DATA SIZE < 16 */
-
-#undef END
-
-#if DATA_SIZE > 1
-
-/* Define reverse-host-endian atomic operations.  Note that END is used
-   within the ATOMIC_NAME macro.  */
-#if HOST_BIG_ENDIAN
-# define END  _le
-#else
-# define END  _be
-#endif
-
-ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr,
-                              ABI_TYPE cmpv, ABI_TYPE newv,
-                              MemOpIdx oi, uintptr_t retaddr)
-{
-    DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
-                                         DATA_SIZE, retaddr);
-    DATA_TYPE ret;
-
-#if DATA_SIZE == 16
-    ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
-#else
-    ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
-#endif
-    ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr,
-                          VALUE_LOW(ret),
-                          VALUE_HIGH(ret),
-                          VALUE_LOW(newv),
-                          VALUE_HIGH(newv),
-                          oi);
-    return BSWAP(ret);
-}
-
-#if DATA_SIZE < 16
-ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, vaddr addr, ABI_TYPE val,
-                           MemOpIdx oi, uintptr_t retaddr)
-{
-    DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
-                                         DATA_SIZE, retaddr);
-    ABI_TYPE ret;
-
-    ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
-    ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr,
-                          VALUE_LOW(ret),
-                          VALUE_HIGH(ret),
-                          VALUE_LOW(val),
-                          VALUE_HIGH(val),
-                          oi);
-    return BSWAP(ret);
-}
-
-#define GEN_ATOMIC_HELPER(X)                                        \
-ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr,              \
-                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
-{                                                                   \
-    DATA_TYPE *haddr, ret;                                          \
-    haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
-    ret = qatomic_##X(haddr, BSWAP(val));                           \
-    ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr,                                \
-                          VALUE_LOW(ret),                           \
-                          VALUE_HIGH(ret),                          \
-                          VALUE_LOW(val),                           \
-                          VALUE_HIGH(val),                          \
-                          oi);                                      \
-    return BSWAP(ret);                                              \
-}
-
-GEN_ATOMIC_HELPER(fetch_and)
-GEN_ATOMIC_HELPER(fetch_or)
-GEN_ATOMIC_HELPER(fetch_xor)
-GEN_ATOMIC_HELPER(and_fetch)
-GEN_ATOMIC_HELPER(or_fetch)
-GEN_ATOMIC_HELPER(xor_fetch)
-
-#undef GEN_ATOMIC_HELPER
-
-/* These helpers are, as a whole, full barriers.  Within the helper,
- * the leading barrier is explicit and the trailing barrier is within
- * cmpxchg primitive.
- *
- * Trace this load + RMW loop as a single RMW op. This way, regardless
- * of CF_PARALLEL's value, we'll trace just a read and a write.
- */
 #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr,              \
                         ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
     XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval;              \
-    haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
+    bool need_bswap = get_memop(oi) & MO_BSWAP;                     \
+    haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE,    \
+                              retaddr, &need_bswap);                \
     smp_mb();                                                       \
     ldn = qatomic_read__nocheck(haddr);                             \
-    do {                                                            \
-        ldo = ldn; old = BSWAP(ldo); new = FN(old, val);            \
-        ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));     \
-    } while (ldo != ldn);                                           \
+    if (need_bswap) {                                               \
+        do {                                                        \
+            ldo = ldn; old = BSWAP(ldo);                            \
+            new = FN(old, val);                                     \
+            ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \
+        } while (ldo != ldn);                                       \
+    }                                                               \
+    else{                                                           \
+        do {                                                        \
+            ldo = ldn; old = ldo;                                   \
+            new = FN(old, val);                                     \
+            ldn = qatomic_cmpxchg__nocheck(haddr, ldo, new);        \
+        } while (ldo != ldn);                                       \
+    }                                                               \
     ATOMIC_MMU_CLEANUP;                                             \
     atomic_trace_rmw_post(env, addr,                                \
-                          VALUE_LOW(old),                           \
-                          VALUE_HIGH(old),                          \
-                          VALUE_LOW(xval),                          \
-                          VALUE_HIGH(xval),                         \
-                          oi);                                      \
+                        VALUE_LOW(old),                             \
+                        VALUE_HIGH(old),                            \
+                        VALUE_LOW(xval),                            \
+                        VALUE_HIGH(xval),                           \
+                        oi);                                        \
     return RET;                                                     \
 }
 
@@ -309,18 +221,10 @@ GEN_ATOMIC_HELPER_FN(umin_fetch, MIN,  DATA_TYPE, new)
 GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
 GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
 
-/* Note that for addition, we need to use a separate cmpxchg loop instead
-   of bswaps for the reverse-host-endian helpers.  */
-#define ADD(X, Y)   (X + Y)
-GEN_ATOMIC_HELPER_FN(fetch_add, ADD, DATA_TYPE, old)
-GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
-#undef ADD
-
 #undef GEN_ATOMIC_HELPER_FN
-#endif /* DATA_SIZE < 16 */
+#endif /* DATA SIZE < 16 */
 
 #undef END
-#endif /* DATA_SIZE > 1 */
 
 #undef BSWAP
 #undef ABI_TYPE
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 87e14bde4f..841b54e41d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1808,7 +1808,7 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
  * or io operations to proceed.  Return the host address.
  */
 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
-                               int size, uintptr_t retaddr)
+                               int size, uintptr_t retaddr, bool *need_bswap)
 {
     uintptr_t mmu_idx = get_mmuidx(oi);
     MemOp mop = get_memop(oi);
@@ -1894,6 +1894,14 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
         notdirty_write(cpu, addr, size, full, retaddr);
     }
 
+    if (unlikely(tlb_addr & TLB_BSWAP)) {
+        assert(!( (  full->slow_flags[MMU_DATA_STORE]
+            ^ full->slow_flags[MMU_DATA_LOAD ])
+            & TLB_BSWAP));
+
+        *need_bswap = !need_bswap;
+    }
+
     if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
         int wp_flags = 0;
 
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index c23b5e66c4..02679b1fdb 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -44,24 +44,16 @@ DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
 
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
                    i32, env, i64, i32, i32, i32)
-DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_5(atomic_cmpxchgw, TCG_CALL_NO_WG,
                    i32, env, i64, i32, i32, i32)
-DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
-DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
-DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_5(atomic_cmpxchgl, TCG_CALL_NO_WG,
                    i32, env, i64, i32, i32, i32)
 #ifdef CONFIG_ATOMIC64
-DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
-                   i64, env, i64, i64, i64, i32)
-DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_5(atomic_cmpxchgq, TCG_CALL_NO_WG,
                    i64, env, i64, i64, i64, i32)
 #endif
 #if HAVE_CMPXCHG128
-DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
-                   i128, env, i64, i128, i128, i32)
-DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_5(atomic_cmpxchgo, TCG_CALL_NO_WG,
                    i128, env, i64, i128, i128, i32)
 #endif
 
@@ -72,29 +64,19 @@ DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo, TCG_CALL_NO_WG,
 #define GEN_ATOMIC_HELPERS(NAME)                                  \
     DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
                        TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w),              \
                        TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l),              \
                        TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le),           \
-                       TCG_CALL_NO_WG, i64, env, i64, i64, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be),           \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q),              \
                        TCG_CALL_NO_WG, i64, env, i64, i64, i32)
 #else
 #define GEN_ATOMIC_HELPERS(NAME)                                  \
     DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
                        TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w),              \
                        TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
-    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l),              \
                        TCG_CALL_NO_WG, i32, env, i64, i32, i32)
 #endif /* CONFIG_ATOMIC64 */
 
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index f25d80e2dc..0c7443a16e 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -1268,7 +1268,7 @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
  * Do not allow unaligned operations to proceed.  Return the host address.
  */
 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
-                               int size, uintptr_t retaddr)
+                               int size, uintptr_t retaddr, bool *need_bswap)
 {
     MemOp mop = get_memop(oi);
     int a_bits = memop_alignment_bits(mop);
diff --git a/include/accel/tcg/cpu-ldst-common.h b/include/accel/tcg/cpu-ldst-common.h
index 8bf17c2fab..b07f1827ce 100644
--- a/include/accel/tcg/cpu-ldst-common.h
+++ b/include/accel/tcg/cpu-ldst-common.h
@@ -36,22 +36,13 @@ void cpu_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
 uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, vaddr addr,
                                  uint32_t cmpv, uint32_t newv,
                                  MemOpIdx oi, uintptr_t retaddr);
-uint32_t cpu_atomic_cmpxchgw_le_mmu(CPUArchState *env, vaddr addr,
+uint32_t cpu_atomic_cmpxchgw_mmu(CPUArchState *env, vaddr addr,
                                     uint32_t cmpv, uint32_t newv,
                                     MemOpIdx oi, uintptr_t retaddr);
-uint32_t cpu_atomic_cmpxchgl_le_mmu(CPUArchState *env, vaddr addr,
+uint32_t cpu_atomic_cmpxchgl_mmu(CPUArchState *env, vaddr addr,
                                     uint32_t cmpv, uint32_t newv,
                                     MemOpIdx oi, uintptr_t retaddr);
-uint64_t cpu_atomic_cmpxchgq_le_mmu(CPUArchState *env, vaddr addr,
-                                    uint64_t cmpv, uint64_t newv,
-                                    MemOpIdx oi, uintptr_t retaddr);
-uint32_t cpu_atomic_cmpxchgw_be_mmu(CPUArchState *env, vaddr addr,
-                                    uint32_t cmpv, uint32_t newv,
-                                    MemOpIdx oi, uintptr_t retaddr);
-uint32_t cpu_atomic_cmpxchgl_be_mmu(CPUArchState *env, vaddr addr,
-                                    uint32_t cmpv, uint32_t newv,
-                                    MemOpIdx oi, uintptr_t retaddr);
-uint64_t cpu_atomic_cmpxchgq_be_mmu(CPUArchState *env, vaddr addr,
+uint64_t cpu_atomic_cmpxchgq_mmu(CPUArchState *env, vaddr addr,
                                     uint64_t cmpv, uint64_t newv,
                                     MemOpIdx oi, uintptr_t retaddr);
 
@@ -63,19 +54,14 @@ TYPE cpu_atomic_ ## NAME ## SUFFIX ## _mmu      \
 #ifdef CONFIG_ATOMIC64
 #define GEN_ATOMIC_HELPER_ALL(NAME)          \
     GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)  \
-    GEN_ATOMIC_HELPER(NAME, uint64_t, q_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l)     \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q)
 #else
 #define GEN_ATOMIC_HELPER_ALL(NAME)          \
     GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l)
 #endif
 
 GEN_ATOMIC_HELPER_ALL(fetch_add)
@@ -103,12 +89,9 @@ GEN_ATOMIC_HELPER_ALL(xchg)
 #undef GEN_ATOMIC_HELPER_ALL
 #undef GEN_ATOMIC_HELPER
 
-Int128 cpu_atomic_cmpxchgo_le_mmu(CPUArchState *env, vaddr addr,
-                                  Int128 cmpv, Int128 newv,
-                                  MemOpIdx oi, uintptr_t retaddr);
-Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, vaddr addr,
-                                  Int128 cmpv, Int128 newv,
-                                  MemOpIdx oi, uintptr_t retaddr);
+Int128 cpu_atomic_cmpxchgo_mmu(CPUArchState *env, vaddr addr,
+                               Int128 cmpv, Int128 newv,
+                               MemOpIdx oi, uintptr_t retaddr);
 
 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
                          MemOpIdx oi, uintptr_t ra);
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
index f29ae12af8..d138bb6742 100644
--- a/target/m68k/op_helper.c
+++ b/target/m68k/op_helper.c
@@ -806,13 +806,13 @@ static void do_cas2l(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2,
         if ((a1 & 7) == 0 && a2 == a1 + 4) {
             c = deposit64(c2, 32, 32, c1);
             u = deposit64(u2, 32, 32, u1);
-            l = cpu_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
+            l = cpu_atomic_cmpxchgq_mmu(env, a1, c, u, oi, ra);
             l1 = l >> 32;
             l2 = l;
         } else if ((a2 & 7) == 0 && a1 == a2 + 4) {
             c = deposit64(c1, 32, 32, c2);
             u = deposit64(u1, 32, 32, u2);
-            l = cpu_atomic_cmpxchgq_be_mmu(env, a2, c, u, oi, ra);
+            l = cpu_atomic_cmpxchgq_mmu(env, a2, c, u, oi, ra);
             l2 = l >> 32;
             l1 = l;
         } else
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index f1acb1618f..996e10ece3 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -1838,7 +1838,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
             uint32_t ov;
 
             if (parallel) {
-                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
+                ov = cpu_atomic_cmpxchgl_mmu(env, a1, cv, nv, oi4, ra);
             } else {
                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
@@ -1856,7 +1856,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
 
             if (parallel) {
 #ifdef CONFIG_ATOMIC64
-                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
+                ov = cpu_atomic_cmpxchgq_mmu(env, a1, cv, nv, oi8, ra);
 #else
                 /* Note that we asserted !parallel above.  */
                 g_assert_not_reached();
@@ -1884,7 +1884,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                 }
                 cpu_st16_mmu(env, a1, nv, oi16, ra);
             } else if (HAVE_CMPXCHG128) {
-                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
+                ov = cpu_atomic_cmpxchgo_mmu(env, a1, cv, nv, oi16, ra);
                 cc = !int128_eq(ov, cv);
             } else {
                 /* Note that we asserted !parallel above.  */
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index 548496002d..8db45d8f0c 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -813,16 +813,12 @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
 # define WITH_ATOMIC128(X)
 #endif
 
-static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
+static void * const table_cmpxchg[MO_SIZE + 1] = {
     [MO_8] = gen_helper_atomic_cmpxchgb,
-    [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
-    [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
-    [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
-    [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
-    WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
-    WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
-    WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
-    WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
+    [MO_16] = gen_helper_atomic_cmpxchgw,
+    [MO_32] = gen_helper_atomic_cmpxchgl,
+    WITH_ATOMIC64([MO_64] = gen_helper_atomic_cmpxchgq)
+    WITH_ATOMIC128([MO_128] = gen_helper_atomic_cmpxchgo)
 };
 
 static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
@@ -871,7 +867,7 @@ static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
     }
 
     memop = tcg_canonicalize_memop(memop, 0, 0);
-    gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+    gen = table_cmpxchg[memop & MO_SIZE];
     tcg_debug_assert(gen != NULL);
 
     oi = make_memop_idx(memop & ~MO_SIGN, idx);
@@ -952,7 +948,7 @@ static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
         gen_atomic_cx_i64 gen;
 
         memop = tcg_canonicalize_memop(memop, 1, 0);
-        gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+        gen = table_cmpxchg[memop & MO_SIZE];
         if (gen) {
             MemOpIdx oi = make_memop_idx(memop, idx);
             TCGv_i64 a64 = maybe_extend_addr64(addr);
@@ -1074,7 +1070,7 @@ static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
         return;
     }
 
-    gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+    gen = table_cmpxchg[memop & MO_SIZE];
     if (gen) {
         MemOpIdx oi = make_memop_idx(memop, idx);
         TCGv_i64 a64 = maybe_extend_addr64(addr);
@@ -1202,14 +1198,11 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
 }
 
 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
-static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
+static void * const table_##NAME[MO_SIZE + 1] = {                       \
     [MO_8] = gen_helper_atomic_##NAME##b,                               \
-    [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
-    [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
-    [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
-    [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
-    WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
-    WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
+    [MO_16] = gen_helper_atomic_##NAME##w,                              \
+    [MO_32] = gen_helper_atomic_##NAME##l,                              \
+    WITH_ATOMIC64([MO_64] = gen_helper_atomic_##NAME##q)                \
 };                                                                      \
 void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr,       \
                                      TCGv_i32 val, TCGArg idx,          \
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] target/ppc: Add support for LE pages on PowerPC booke206 mmu
  2025-08-21 11:33 [PATCH 0/3] Add support for PowerPC e500 little-endian pages Danila Zhebryakov
  2025-08-21 11:33 ` [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops Danila Zhebryakov
@ 2025-08-21 11:33 ` Danila Zhebryakov
  2025-08-25  1:06   ` Richard Henderson
  2025-08-21 11:33 ` [PATCH 3/3] target/ppc: fix GDB stub to work correctly with LE pages Danila Zhebryakov
  2 siblings, 1 reply; 8+ messages in thread
From: Danila Zhebryakov @ 2025-08-21 11:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth, Richard Henderson, Danila Zhebryakov

Add support for passing TLB_BSWAP flag from powerpc booke206 MMU
Fix instruction fetches from LE pages being treated as MMIO
This change should not affect SPARC, as its instruction fetches are always BE

Signed-off-by: Danila Zhebryakov <d.zhebryakov@yandex.ru>
---
 accel/tcg/cputlb.c      | 12 +++++-------
 target/ppc/cpu.h        |  4 ++++
 target/ppc/mmu-booke.c  |  5 +++++
 target/ppc/mmu_helper.c | 17 +++++++++++++++--
 target/ppc/translate.c  | 29 ++++++++++++++++++++++++++++-
 5 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 841b54e41d..396e510f1b 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1401,7 +1401,8 @@ static int probe_access_internal(CPUState *cpu, vaddr addr,
     flags |= full->slow_flags[access_type];
 
     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
-    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY | TLB_CHECK_ALIGNED))
+    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY
+                           | TLB_CHECK_ALIGNED | TLB_BSWAP))
         || (access_type != MMU_INST_FETCH && force_mmio)) {
         *phost = NULL;
         return TLB_MMIO;
@@ -1792,12 +1793,9 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
             mmu_watch_or_dirty(cpu, &l->page[1], type, ra);
         }
 
-        /*
-         * Since target/sparc is the only user of TLB_BSWAP, and all
-         * Sparc accesses are aligned, any treatment across two pages
-         * would be arbitrary.  Refuse it until there's a use.
-         */
-        tcg_debug_assert((flags & TLB_BSWAP) == 0);
+        if (unlikely(flags & TLB_BSWAP)) {
+            l->memop ^= MO_BSWAP;
+        }
     }
 
     return crosspage;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 6b90543811..127b05c865 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1186,6 +1186,10 @@ struct ppc_radix_page_info {
     uint32_t entries[PPC_PAGE_SIZES_MAX_SZ];
 };
 
+/*****************************************************************************/
+/* PowerPC usage of the PAGE_TARGET_1 bit for TLB little-endian bit */
+#define PAGE_LE  PAGE_TARGET_1
+
 /*****************************************************************************/
 /* Dynamic Execution Control Register */
 
diff --git a/target/ppc/mmu-booke.c b/target/ppc/mmu-booke.c
index 55e5dd7c6b..dc72bbf21f 100644
--- a/target/ppc/mmu-booke.c
+++ b/target/ppc/mmu-booke.c
@@ -357,6 +357,11 @@ found_tlb:
     }
 
     *prot = 0;
+
+    if (tlb->mas2 & MAS2_E) {
+        *prot |= PAGE_LE;
+    }
+
     if (pr) {
         if (tlb->mas7_3 & MAS3_UR) {
             *prot |= PAGE_READ;
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index ac60705402..20282edaaa 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -27,6 +27,7 @@
 #include "exec/cputlb.h"
 #include "exec/page-protection.h"
 #include "exec/target_page.h"
+#include "exec/tlb-flags.h"
 #include "exec/log.h"
 #include "helper_regs.h"
 #include "qemu/error-report.h"
@@ -1368,8 +1369,20 @@ bool ppc_cpu_tlb_fill(CPUState *cs, vaddr eaddr, int size,
 
     if (ppc_xlate(cpu, eaddr, access_type, &raddr,
                   &page_size, &prot, mmu_idx, !probe)) {
-        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                     prot, mmu_idx, 1UL << page_size);
+        if (prot & PAGE_LE) {
+            CPUTLBEntryFull full = {
+                .phys_addr = raddr & TARGET_PAGE_MASK,
+                .attrs = MEMTXATTRS_UNSPECIFIED,
+                .prot = prot,
+                .lg_page_size = ctz64(1UL << page_size),
+                .tlb_fill_flags = TLB_BSWAP
+            };
+            tlb_set_page_full(cs, mmu_idx, eaddr & TARGET_PAGE_MASK, &full);
+
+        } else {
+            tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
+                         prot, mmu_idx, 1UL << page_size);
+        }
         return true;
     }
     if (probe) {
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 27f90c3cc5..8cf50a0221 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -24,7 +24,9 @@
 #include "exec/target_page.h"
 #include "tcg/tcg-op.h"
 #include "tcg/tcg-op-gvec.h"
+#include "accel/tcg/probe.h"
 #include "qemu/host-utils.h"
+#include "exec/tlb-flags.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
@@ -219,6 +221,27 @@ static inline bool need_byteswap(const DisasContext *ctx)
 #endif
 }
 
+#ifndef CONFIG_USER_ONLY
+static bool is_page_little_endian(CPUPPCState *env, vaddr addr)
+{
+    CPUTLBEntryFull *full;
+    void *host;
+    int mmu_idx = ppc_env_mmu_index(env, true);
+    int flags;
+
+    flags = probe_access_full_mmu(env, addr, 0, MMU_INST_FETCH, mmu_idx,
+                                  &host, &full);
+    assert(!(flags & TLB_INVALID_MASK));
+
+    return full->tlb_fill_flags & TLB_BSWAP;
+}
+#else
+static bool is_page_little_endian(CPUPPCState *env, vaddr addr)
+{
+    return false;
+}
+#endif
+
 /* True when active word size < size of target_long.  */
 #ifdef TARGET_PPC64
 # define NARROW_MODE(C)  (!(C)->sf_mode)
@@ -6577,7 +6600,11 @@ static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
               ctx->base.pc_next, ctx->mem_idx, (int)msr_ir);
 
     ctx->cia = pc = ctx->base.pc_next;
-    insn = translator_ldl_swap(env, dcbase, pc, need_byteswap(ctx));
+    bool tlb_is_le = is_page_little_endian(env, ctx->base.pc_next);
+
+
+    insn = translator_ldl_swap(env, dcbase, pc, need_byteswap(ctx)
+                                                || tlb_is_le);
     ctx->base.pc_next = pc += 4;
 
     if (!is_prefix_insn(ctx, insn)) {
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] target/ppc: fix GDB stub to work correctly with LE pages
  2025-08-21 11:33 [PATCH 0/3] Add support for PowerPC e500 little-endian pages Danila Zhebryakov
  2025-08-21 11:33 ` [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops Danila Zhebryakov
  2025-08-21 11:33 ` [PATCH 2/3] target/ppc: Add support for LE pages on PowerPC booke206 mmu Danila Zhebryakov
@ 2025-08-21 11:33 ` Danila Zhebryakov
  2 siblings, 0 replies; 8+ messages in thread
From: Danila Zhebryakov @ 2025-08-21 11:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth, Richard Henderson, Danila Zhebryakov

GDB is expected to be set to the endianness of the currently running code, which may be in LE page. Bswap the registers accordingly.

Signed-off-by: Danila Zhebryakov <d.zhebryakov@yandex.ru>
---
 target/ppc/gdbstub.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index 3b28d4e21c..89c783894c 100644
--- a/target/ppc/gdbstub.c
+++ b/target/ppc/gdbstub.c
@@ -19,6 +19,8 @@
  */
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "accel/tcg/probe.h"
+#include "exec/tlb-flags.h"
 #include "exec/gdbstub.h"
 #include "gdbstub/helpers.h"
 #include "internal.h"
@@ -84,7 +86,20 @@ static int ppc_gdb_register_len(int n)
 void ppc_maybe_bswap_register(CPUPPCState *env, uint8_t *mem_buf, int len)
 {
 #ifndef CONFIG_USER_ONLY
-    if (!FIELD_EX64(env->msr, MSR, LE)) {
+    bool le_page = false;
+
+    if (env->mmu_model == POWERPC_MMU_BOOKE206) {
+        CPUTLBEntryFull *full;
+        void *host;
+        int mmu_idx = ppc_env_mmu_index(env, true);
+
+        probe_access_full_mmu(env, env->nip, 0, MMU_INST_FETCH, mmu_idx,
+                              &host, &full);
+
+        le_page = full->tlb_fill_flags & TLB_BSWAP;
+    }
+
+    if (!le_page && !FIELD_EX64(env->msr, MSR, LE)) {
         /* do nothing */
     } else if (len == 4) {
         bswap32s((uint32_t *)mem_buf);
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops
  2025-08-21 11:33 ` [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops Danila Zhebryakov
@ 2025-08-22  9:24   ` Richard Henderson
  2025-08-22  9:26     ` Richard Henderson
  2025-08-25  0:58   ` Richard Henderson
  1 sibling, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2025-08-22  9:24 UTC (permalink / raw)
  To: Danila Zhebryakov, qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth

On 8/21/25 21:33, Danila Zhebryakov wrote:
> @@ -1894,6 +1894,14 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
>           notdirty_write(cpu, addr, size, full, retaddr);
>       }
>   
> +    if (unlikely(tlb_addr & TLB_BSWAP)) {
> +        assert(!( (  full->slow_flags[MMU_DATA_STORE]
> +            ^ full->slow_flags[MMU_DATA_LOAD ])
> +            & TLB_BSWAP));
> +
> +        *need_bswap = !need_bswap;
> +    }
> +

How is this supposed to work?

(!) TLB_BSWAP is a slow-flags bit, so it will never be set in tlb_addr.
(2) Both MMU_DATA_STORE and MMU_DATA_LOAD will have the same value for TLB_BSWAP: X ^ X == 
0, so sure, that assert can't fire.


r~




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops
  2025-08-22  9:24   ` Richard Henderson
@ 2025-08-22  9:26     ` Richard Henderson
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2025-08-22  9:26 UTC (permalink / raw)
  To: Danila Zhebryakov, qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth

On 8/22/25 19:24, Richard Henderson wrote:
> On 8/21/25 21:33, Danila Zhebryakov wrote:
>> @@ -1894,6 +1894,14 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, 
>> MemOpIdx oi,
>>           notdirty_write(cpu, addr, size, full, retaddr);
>>       }
>> +    if (unlikely(tlb_addr & TLB_BSWAP)) {
>> +        assert(!( (  full->slow_flags[MMU_DATA_STORE]
>> +            ^ full->slow_flags[MMU_DATA_LOAD ])
>> +            & TLB_BSWAP));
>> +
>> +        *need_bswap = !need_bswap;
>> +    }
>> +
> 
> How is this supposed to work?
> 
> (!) TLB_BSWAP is a slow-flags bit, so it will never be set in tlb_addr.
Whoops, we've just got finished merging the slow flags back in.
I'd best do this again with more coffee tomorrow.


r~


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops
  2025-08-21 11:33 ` [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops Danila Zhebryakov
  2025-08-22  9:24   ` Richard Henderson
@ 2025-08-25  0:58   ` Richard Henderson
  1 sibling, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2025-08-25  0:58 UTC (permalink / raw)
  To: Danila Zhebryakov, qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth

On 8/21/25 21:33, Danila Zhebryakov wrote:
>   ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr,
>                                 ABI_TYPE cmpv, ABI_TYPE newv,
>                                 MemOpIdx oi, uintptr_t retaddr)
>   {
> +    bool need_bswap = get_memop(oi) & MO_BSWAP;
>       DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
> -                                         DATA_SIZE, retaddr);
> -    DATA_TYPE ret;
> -
> +                                         DATA_SIZE, retaddr, &need_bswap);
> +    DATA_TYPE ret, ret_e;
> +    if (need_bswap) {

I'd rather you do all of the bswap detection in the helper, not put part of it here and 
then modify the partial value.


> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 87e14bde4f..841b54e41d 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1808,7 +1808,7 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
>    * or io operations to proceed.  Return the host address.
>    */
>   static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
> -                               int size, uintptr_t retaddr)
> +                               int size, uintptr_t retaddr, bool *need_bswap)
>   {
>       uintptr_t mmu_idx = get_mmuidx(oi);
>       MemOp mop = get_memop(oi);
> @@ -1894,6 +1894,14 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
>           notdirty_write(cpu, addr, size, full, retaddr);
>       }
>   
> +    if (unlikely(tlb_addr & TLB_BSWAP)) {
> +        assert(!( (  full->slow_flags[MMU_DATA_STORE]
> +            ^ full->slow_flags[MMU_DATA_LOAD ])
> +            & TLB_BSWAP));

You can skip the assert. It's impossible to set a page with differing values.

> +
> +        *need_bswap = !need_bswap;
> +    }

This becomes

       mop ^= MO_BSWAP;
     }
     *need_bswap = mop & MO_BSWAP;


r~


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] target/ppc: Add support for LE pages on PowerPC booke206 mmu
  2025-08-21 11:33 ` [PATCH 2/3] target/ppc: Add support for LE pages on PowerPC booke206 mmu Danila Zhebryakov
@ 2025-08-25  1:06   ` Richard Henderson
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2025-08-25  1:06 UTC (permalink / raw)
  To: Danila Zhebryakov, qemu-devel
  Cc: Chinmay Rath, Nicholas Piggin, Riku Voipio, Ilya Leoshkevich,
	qemu-ppc, Paolo Bonzini, David Hildenbrand, qemu-s390x,
	Laurent Vivier, Thomas Huth

On 8/21/25 21:33, Danila Zhebryakov wrote:
> Add support for passing TLB_BSWAP flag from powerpc booke206 MMU
> Fix instruction fetches from LE pages being treated as MMIO
> This change should not affect SPARC, as its instruction fetches are always BE
> 
> Signed-off-by: Danila Zhebryakov <d.zhebryakov@yandex.ru>
> ---
>   accel/tcg/cputlb.c      | 12 +++++-------
>   target/ppc/cpu.h        |  4 ++++
>   target/ppc/mmu-booke.c  |  5 +++++
>   target/ppc/mmu_helper.c | 17 +++++++++++++++--
>   target/ppc/translate.c  | 29 ++++++++++++++++++++++++++++-
>   5 files changed, 57 insertions(+), 10 deletions(-)
> 
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 841b54e41d..396e510f1b 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1401,7 +1401,8 @@ static int probe_access_internal(CPUState *cpu, vaddr addr,
>       flags |= full->slow_flags[access_type];
>   
>       /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
> -    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY | TLB_CHECK_ALIGNED))
> +    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY
> +                           | TLB_CHECK_ALIGNED | TLB_BSWAP))
>           || (access_type != MMU_INST_FETCH && force_mmio)) {
>           *phost = NULL;
>           return TLB_MMIO;
> @@ -1792,12 +1793,9 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
>               mmu_watch_or_dirty(cpu, &l->page[1], type, ra);
>           }
>   
> -        /*
> -         * Since target/sparc is the only user of TLB_BSWAP, and all
> -         * Sparc accesses are aligned, any treatment across two pages
> -         * would be arbitrary.  Refuse it until there's a use.
> -         */
> -        tcg_debug_assert((flags & TLB_BSWAP) == 0);
> +        if (unlikely(flags & TLB_BSWAP)) {
> +            l->memop ^= MO_BSWAP;
> +        }

You should replace the comment, noting that E500 detects endianness on the lowest memory 
address.

> +/*****************************************************************************/
> +/* PowerPC usage of the PAGE_TARGET_1 bit for TLB little-endian bit */
> +#define PAGE_LE  PAGE_TARGET_1

PAGE_TARGET_1 is a user-only thing...


> diff --git a/target/ppc/mmu-booke.c b/target/ppc/mmu-booke.c
> index 55e5dd7c6b..dc72bbf21f 100644
> --- a/target/ppc/mmu-booke.c
> +++ b/target/ppc/mmu-booke.c
> @@ -357,6 +357,11 @@ found_tlb:
>       }
>   
>       *prot = 0;
> +
> +    if (tlb->mas2 & MAS2_E) {
> +        *prot |= PAGE_LE;
> +    }
> +
>       if (pr) {
>           if (tlb->mas7_3 & MAS3_UR) {
>               *prot |= PAGE_READ;
> diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
> index ac60705402..20282edaaa 100644
> --- a/target/ppc/mmu_helper.c
> +++ b/target/ppc/mmu_helper.c
> @@ -27,6 +27,7 @@
>   #include "exec/cputlb.h"
>   #include "exec/page-protection.h"
>   #include "exec/target_page.h"
> +#include "exec/tlb-flags.h"
>   #include "exec/log.h"
>   #include "helper_regs.h"
>   #include "qemu/error-report.h"
> @@ -1368,8 +1369,20 @@ bool ppc_cpu_tlb_fill(CPUState *cs, vaddr eaddr, int size,
>   
>       if (ppc_xlate(cpu, eaddr, access_type, &raddr,
>                     &page_size, &prot, mmu_idx, !probe)) {
> -        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
> -                     prot, mmu_idx, 1UL << page_size);
> +        if (prot & PAGE_LE) {
> +            CPUTLBEntryFull full = {
> +                .phys_addr = raddr & TARGET_PAGE_MASK,
> +                .attrs = MEMTXATTRS_UNSPECIFIED,
> +                .prot = prot,
> +                .lg_page_size = ctz64(1UL << page_size),
> +                .tlb_fill_flags = TLB_BSWAP
> +            };
> +            tlb_set_page_full(cs, mmu_idx, eaddr & TARGET_PAGE_MASK, &full);
> +
> +        } else {
> +            tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
> +                         prot, mmu_idx, 1UL << page_size);
> +        }

(0) ctz64(1 << x) == x.
(1) Ideally convert ppc to *always* use tlb_set_page_full.
(2) Pass the CPUTLBEntryFull struct down to the helpers to be
     filled in and then you don't need to abuse PAGE_TARGET_1;
     you can set .tlb_fill_flags directly for MAS2_E.

> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -24,7 +24,9 @@
>   #include "exec/target_page.h"
>   #include "tcg/tcg-op.h"
>   #include "tcg/tcg-op-gvec.h"
> +#include "accel/tcg/probe.h"
>   #include "qemu/host-utils.h"
> +#include "exec/tlb-flags.h"
>   
>   #include "exec/helper-proto.h"
>   #include "exec/helper-gen.h"
> @@ -219,6 +221,27 @@ static inline bool need_byteswap(const DisasContext *ctx)
>   #endif
>   }
>   
> +#ifndef CONFIG_USER_ONLY
> +static bool is_page_little_endian(CPUPPCState *env, vaddr addr)
> +{
> +    CPUTLBEntryFull *full;
> +    void *host;
> +    int mmu_idx = ppc_env_mmu_index(env, true);
> +    int flags;
> +
> +    flags = probe_access_full_mmu(env, addr, 0, MMU_INST_FETCH, mmu_idx,
> +                                  &host, &full);
> +    assert(!(flags & TLB_INVALID_MASK));
> +
> +    return full->tlb_fill_flags & TLB_BSWAP;
> +}
> +#else
> +static bool is_page_little_endian(CPUPPCState *env, vaddr addr)
> +{
> +    return false;
> +}
> +#endif
> +
>   /* True when active word size < size of target_long.  */
>   #ifdef TARGET_PPC64
>   # define NARROW_MODE(C)  (!(C)->sf_mode)
> @@ -6577,7 +6600,11 @@ static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
>                 ctx->base.pc_next, ctx->mem_idx, (int)msr_ir);
>   
>       ctx->cia = pc = ctx->base.pc_next;
> -    insn = translator_ldl_swap(env, dcbase, pc, need_byteswap(ctx));
> +    bool tlb_is_le = is_page_little_endian(env, ctx->base.pc_next);
> +
> +
> +    insn = translator_ldl_swap(env, dcbase, pc, need_byteswap(ctx)
> +                                                || tlb_is_le);

You should probe the page once during ppc_tr_init_disas_context, not for every insn.  You 
can skip this probe unless E500.


r~


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-08-25  1:07 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-21 11:33 [PATCH 0/3] Add support for PowerPC e500 little-endian pages Danila Zhebryakov
2025-08-21 11:33 ` [PATCH 1/3] accel/tcg: Unify big- and little- endian atomic ops Danila Zhebryakov
2025-08-22  9:24   ` Richard Henderson
2025-08-22  9:26     ` Richard Henderson
2025-08-25  0:58   ` Richard Henderson
2025-08-21 11:33 ` [PATCH 2/3] target/ppc: Add support for LE pages on PowerPC booke206 mmu Danila Zhebryakov
2025-08-25  1:06   ` Richard Henderson
2025-08-21 11:33 ` [PATCH 3/3] target/ppc: fix GDB stub to work correctly with LE pages Danila Zhebryakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).