qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
@ 2012-03-11 22:24 Blue Swirl
  2012-08-24 15:05 ` Aurelien Jarno
  0 siblings, 1 reply; 19+ messages in thread
From: Blue Swirl @ 2012-03-11 22:24 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 49303 bytes --]

Optionally, make memory access helpers take a parameter for CPUState
instead of relying on global env.

On most targets, perform simple moves to reorder registers. On i386,
switch from regparm(3) calling convention to standard stack-based
version.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 cpu-all.h              |    9 +++++
 exec-all.h             |    2 +
 exec.c                 |    4 ++
 softmmu_defs.h         |   28 ++++++++++++++++
 softmmu_header.h       |   60 ++++++++++++++++++++++++++--------
 softmmu_template.h     |   84 ++++++++++++++++++++++++++++++++---------------
 tcg/arm/tcg-target.c   |   53 ++++++++++++++++++++++++++++++
 tcg/hppa/tcg-target.c  |   44 +++++++++++++++++++++++++
 tcg/i386/tcg-target.c  |   57 ++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.c  |   46 ++++++++++++++++++++++++++
 tcg/mips/tcg-target.c  |   44 +++++++++++++++++++++++++
 tcg/ppc/tcg-target.c   |   45 +++++++++++++++++++++++++
 tcg/ppc64/tcg-target.c |   44 +++++++++++++++++++++++++
 tcg/s390/tcg-target.c  |   44 +++++++++++++++++++++++++
 tcg/sparc/tcg-target.c |   50 +++++++++++++++++++++++++++--
 tcg/tci/tcg-target.c   |    6 +++
 16 files changed, 576 insertions(+), 44 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 80e6d42..d247555 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -259,12 +259,21 @@ extern unsigned long reserved_va;
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)

+#ifndef CONFIG_TCG_PASS_AREG0
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
 #define ldl_code(p) ldl_raw(p)
 #define ldq_code(p) ldq_raw(p)
+#else
+#define cpu_ldub_code(env1, p) ldub_raw(p)
+#define cpu_ldsb_code(env1, p) ldsb_raw(p)
+#define cpu_lduw_code(env1, p) lduw_raw(p)
+#define cpu_ldsw_code(env1, p) ldsw_raw(p)
+#define cpu_ldl_code(env1, p) ldl_raw(p)
+#define cpu_ldq_code(env1, p) ldq_raw(p)
+#endif

 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
diff --git a/exec-all.h b/exec-all.h
index 51d01f2..88a57f7 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -311,7 +311,9 @@ void tlb_fill(CPUState *env1, target_ulong addr,
int is_write, int mmu_idx,

 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
+#ifndef CONFIG_TCG_PASS_AREG0
 #define env cpu_single_env
+#endif

 #define DATA_SIZE 1
 #include "softmmu_header.h"
diff --git a/exec.c b/exec.c
index 1e5bbd6..b302d53 100644
--- a/exec.c
+++ b/exec.c
@@ -4682,7 +4682,11 @@ tb_page_addr_t get_page_addr_code(CPUState
*env1, target_ulong addr)
     mmu_idx = cpu_mmu_index(env1);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
+#ifdef CONFIG_TCG_PASS_AREG0
+        cpu_ldub_code(env1, addr);
+#else
         ldub_code(addr);
+#endif
     }
     pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
     if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
diff --git a/softmmu_defs.h b/softmmu_defs.h
index d47d30d..6e69b05 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,6 +9,7 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H

+#ifndef CONFIG_TCG_PASS_AREG0
 uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
 void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
 uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
@@ -26,5 +27,32 @@ uint32_t __ldl_cmmu(target_ulong addr, int mmu_idx);
 void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
 uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
 void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+#else
+uint8_t helper_ldb_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stb_mmu(CPUState *env, target_ulong addr, uint8_t val,
+                    int mmu_idx);
+uint16_t helper_ldw_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stw_mmu(CPUState *env, target_ulong addr, uint16_t val,
+                    int mmu_idx);
+uint32_t helper_ldl_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stl_mmu(CPUState *env, target_ulong addr, uint32_t val,
+                    int mmu_idx);
+uint64_t helper_ldq_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stq_mmu(CPUState *env, target_ulong addr, uint64_t val,
+                    int mmu_idx);
+
+uint8_t helper_ldb_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stb_cmmu(CPUState *env, target_ulong addr, uint8_t val,
+int mmu_idx);
+uint16_t helper_ldw_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stw_cmmu(CPUState *env, target_ulong addr, uint16_t val,
+                     int mmu_idx);
+uint32_t helper_ldl_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stl_cmmu(CPUState *env, target_ulong addr, uint32_t val,
+                     int mmu_idx);
+uint64_t helper_ldq_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stq_cmmu(CPUState *env, target_ulong addr, uint64_t val,
+                     int mmu_idx);
+#endif

 #endif
diff --git a/softmmu_header.h b/softmmu_header.h
index 818d7b6..3c0b298 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -78,9 +78,23 @@
 #define ADDR_READ addr_read
 #endif

+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
 /* generic load/store macros */

-static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline RES_TYPE
+glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
@@ -93,7 +107,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX),
MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
+                                                                     addr,
+                                                                     mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
@@ -102,7 +118,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX),
MEMSUFFIX)(target_ulong ptr)
 }

 #if DATA_SIZE <= 2
-static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline int
+glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
@@ -114,7 +132,8 @@ static inline int glue(glue(lds, SUFFIX),
MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
+                               MMUSUFFIX)(ENV_VAR addr, mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
@@ -127,7 +146,9 @@ static inline int glue(glue(lds, SUFFIX),
MEMSUFFIX)(target_ulong ptr)

 /* generic store macro */

-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong
ptr, RES_TYPE v)
+static inline void
+glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
+                                                    RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
@@ -139,7 +160,8 @@ static inline void glue(glue(st, SUFFIX),
MEMSUFFIX)(target_ulong ptr, RES_TYPE
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx);
+        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
+                                                               mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
@@ -151,46 +173,52 @@ static inline void glue(glue(st, SUFFIX),
MEMSUFFIX)(target_ulong ptr, RES_TYPE
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)

 #if DATA_SIZE == 8
-static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+static inline float64 glue(glue(CPU_PREFIX, ldfq), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldq), MEMSUFFIX)(ENV_VAR ptr);
     return u.d;
 }

-static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+static inline void glue(glue(CPU_PREFIX, stfq), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(stq, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stq), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */

 #if DATA_SIZE == 4
-static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+static inline float32 glue(glue(CPU_PREFIX, ldfl), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldl), MEMSUFFIX)(ENV_VAR ptr);
     return u.f;
 }

-static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+static inline void glue(glue(CPU_PREFIX, stfl), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(stl, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stl), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */

@@ -205,3 +233,7 @@ static inline void glue(stfl,
MEMSUFFIX)(target_ulong ptr, float32 v)
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index 40fcf58..aa67dc3 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -54,10 +54,24 @@
 #define ADDR_READ addr_read
 #endif

-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                        target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
+static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
+                                              target_phys_addr_t physaddr,
                                               target_ulong addr,
                                               void *retaddr)
 {
@@ -89,7 +103,10 @@ static inline DATA_TYPE glue(io_read,
SUFFIX)(target_phys_addr_t physaddr,
 }

 /* handle all cases except unaligned access which span two pages */
-DATA_TYPE glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, int mmu_idx)
+DATA_TYPE
+glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -110,22 +127,22 @@ DATA_TYPE glue(glue(__ld, SUFFIX),
MMUSUFFIX)(target_ulong addr, int mmu_idx)
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
 #endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
                                                          mmu_idx, retaddr);
         } else {
             /* unaligned/aligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -136,7 +153,7 @@ DATA_TYPE glue(glue(__ld, SUFFIX),
MMUSUFFIX)(target_ulong addr, int mmu_idx)
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         goto redo;
@@ -145,9 +162,11 @@ DATA_TYPE glue(glue(__ld, SUFFIX),
MMUSUFFIX)(target_ulong addr, int mmu_idx)
 }

 /* handle all unaligned cases */
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                        int mmu_idx,
-                                                        void *retaddr)
+static DATA_TYPE
+glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                       target_ulong addr,
+                                       int mmu_idx,
+                                       void *retaddr)
 {
     DATA_TYPE res, res1, res2;
     int index, shift;
@@ -164,15 +183,15 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr1,
                                                           mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -196,12 +215,14 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,

 #ifndef SOFTMMU_CODE_ACCESS

-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr);

-static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
+static inline void glue(io_write, SUFFIX)(ENV_PARAM
+                                          target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
                                           void *retaddr)
@@ -231,8 +252,10 @@ static inline void glue(io_write,
SUFFIX)(target_phys_addr_t physaddr,
 #endif /* SHIFT > 2 */
 }

-void glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
-                                         int mmu_idx)
+void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                            target_ulong addr,
+                                                            DATA_TYPE val,
+                                                            int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     unsigned long addend;
@@ -250,21 +273,21 @@ void glue(glue(__st, SUFFIX),
MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -275,7 +298,7 @@ void glue(glue(__st, SUFFIX),
MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;
@@ -283,7 +306,8 @@ void glue(glue(__st, SUFFIX),
MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
 }

 /* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr)
@@ -302,7 +326,7 @@ static void glue(glue(slow_st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -310,10 +334,12 @@ static void glue(glue(slow_st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >>
(((DATA_SIZE - 1) * 8) - (i * 8)),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (((DATA_SIZE - 1) *
8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (i * 8),
                                           mmu_idx, retaddr);
 #endif
             }
@@ -338,3 +364,7 @@ static void glue(glue(slow_st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
 #undef USUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 5b233f5..711dc31 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -929,6 +929,27 @@ static inline void tcg_out_goto_label(TCGContext
*s, int cond, int label_index)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -936,6 +957,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -943,6 +966,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)

@@ -1075,6 +1099,19 @@ static inline void tcg_out_qemu_ld(TCGContext
*s, const TCGArg *args, int opc)
                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);

     switch (opc) {
@@ -1341,6 +1378,22 @@ static inline void tcg_out_qemu_st(TCGContext
*s, const TCGArg *args, int opc)
     }
 # endif

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[3], 0,
+                    tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 71f4a8a..3fb2a51 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,6 +882,27 @@ static void tcg_out_setcond2(TCGContext *s, int
cond, TCGArg ret,
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -889,12 +910,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1061,6 +1085,15 @@ static void tcg_out_qemu_ld(TCGContext *s,
const TCGArg *args, int opc)
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);

     switch (opc) {
@@ -1212,6 +1245,17 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args, int opc)
         tcg_abort();
     }

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_st_helpers[opc]);

     /* label2: */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 9776203..daa85b8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -178,6 +178,9 @@ static int
target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDX);
+#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
@@ -957,6 +960,27 @@ static void tcg_out_jmp(TCGContext *s,
tcg_target_long dest)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -964,12 +988,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 /* Perform the TLB load and compare.

@@ -1188,11 +1215,26 @@ static void tcg_out_qemu_ld(TCGContext *s,
const TCGArg *args,
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
 #else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 #endif

     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1386,11 +1428,26 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args,
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                 TCG_REG_RSI, data_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
     stack_adjust = 0;
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 #endif

     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e3de79f..213e9d8 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,12 +1452,25 @@ static inline void tcg_out_qemu_tlb(TCGContext
*s, TCGArg addr_reg,
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
     __ldl_mmu,
     __ldq_mmu,
 };
+#endif

 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1517,6 +1530,15 @@ static inline void tcg_out_qemu_ld(TCGContext
*s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1547,12 +1569,25 @@ static inline void tcg_out_qemu_ld(TCGContext
*s, const TCGArg *args, int opc)
     }
 }

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1622,6 +1657,17 @@ static inline void tcg_out_qemu_st(TCGContext
*s, const TCGArg *args, int opc)
         data_reg = TCG_REG_R2;
     }

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c5c3282..3681141 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -750,6 +750,27 @@ static void tcg_out_setcond2(TCGContext *s,
TCGCond cond, int ret,

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -757,6 +778,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -764,6 +787,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
@@ -858,6 +882,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
 # endif
     tcg_out_movi(s, TCG_TYPE_I32, sp_args++, mem_index);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9,
(tcg_target_long)qemu_ld_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);

@@ -1069,6 +1102,17 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args,
     }

     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9,
(tcg_target_long)qemu_st_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index f5d9bf3..d0702d6 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -508,6 +508,27 @@ static void tcg_out_call (TCGContext *s,
tcg_target_long arg, int const_arg)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -515,6 +536,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -522,6 +545,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 {
@@ -598,6 +622,16 @@ static void tcg_out_qemu_ld (TCGContext *s, const
TCGArg *args, int opc)
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
     switch (opc) {
     case 0|4:
@@ -829,6 +863,17 @@ static void tcg_out_qemu_st (TCGContext *s, const
TCGArg *args, int opc)
     ir++;

     tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 4419378..d9fb409 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -552,6 +552,27 @@ static void tcg_out_ldsta (TCGContext *s, int
ret, int addr,

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -559,12 +580,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -648,6 +672,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const
TCGArg *args, int opc)
     tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);

     switch (opc) {
@@ -796,6 +829,17 @@ static void tcg_out_qemu_st (TCGContext *s, const
TCGArg *args, int opc)
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);

     label2_ptr = s->code_ptr;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 9317fe8..3695213 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,6 +301,27 @@ static const uint8_t tcg_cond_to_ltr_cond[10] = {

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -308,6 +329,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -315,6 +338,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 static uint8_t *tb_ret_addr;

@@ -1483,9 +1507,29 @@ static void tcg_prepare_qemu_ldst(TCGContext*
s, TCGReg data_reg,
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                    tcg_target_call_iarg_regs[2]);
+        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);

         /* sign extension */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 4461fb4..3f65126 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@ static const char * const
tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif

+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -86,9 +92,9 @@ static const int tcg_target_call_iarg_regs[6] = {

 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O0,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_O1
-#endif
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
 };

 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
@@ -155,6 +161,9 @@ static int
target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
+#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -706,6 +715,27 @@ static void tcg_target_qemu_prologue(TCGContext *s)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -713,6 +743,8 @@ static const void * const qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -720,6 +752,7 @@ static const void * const qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 #if TARGET_LONG_BITS == 32
 #define TARGET_LD_OP LDUW
@@ -801,6 +834,17 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,

     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif

     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index fc0880c..72c83c9 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,6 +798,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode
opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -808,6 +811,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode
opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
-- 
1.7.9

[-- Attachment #2: 0002-softmmu-templates-optionally-pass-CPUState-to-memory.patch --]
[-- Type: text/x-patch, Size: 49908 bytes --]

From 617196343f80e33648bbfb6e522c2e1e230116af Mon Sep 17 00:00:00 2001
Message-Id: <617196343f80e33648bbfb6e522c2e1e230116af.1331504344.git.blauwirbel@gmail.com>
In-Reply-To: <e98a3f58574a8147e73aa278bb3c60b09106a2e2.1331504344.git.blauwirbel@gmail.com>
References: <e98a3f58574a8147e73aa278bb3c60b09106a2e2.1331504344.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Sun, 18 Sep 2011 14:55:46 +0000
Subject: [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions

Optionally, make memory access helpers take a parameter for CPUState
instead of relying on global env.

On most targets, perform simple moves to reorder registers. On i386,
switch from regparm(3) calling convention to standard stack-based
version.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 cpu-all.h              |    9 +++++
 exec-all.h             |    2 +
 exec.c                 |    4 ++
 softmmu_defs.h         |   28 ++++++++++++++++
 softmmu_header.h       |   60 ++++++++++++++++++++++++++--------
 softmmu_template.h     |   84 ++++++++++++++++++++++++++++++++---------------
 tcg/arm/tcg-target.c   |   53 ++++++++++++++++++++++++++++++
 tcg/hppa/tcg-target.c  |   44 +++++++++++++++++++++++++
 tcg/i386/tcg-target.c  |   57 ++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.c  |   46 ++++++++++++++++++++++++++
 tcg/mips/tcg-target.c  |   44 +++++++++++++++++++++++++
 tcg/ppc/tcg-target.c   |   45 +++++++++++++++++++++++++
 tcg/ppc64/tcg-target.c |   44 +++++++++++++++++++++++++
 tcg/s390/tcg-target.c  |   44 +++++++++++++++++++++++++
 tcg/sparc/tcg-target.c |   50 +++++++++++++++++++++++++++--
 tcg/tci/tcg-target.c   |    6 +++
 16 files changed, 576 insertions(+), 44 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 80e6d42..d247555 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -259,12 +259,21 @@ extern unsigned long reserved_va;
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
 
+#ifndef CONFIG_TCG_PASS_AREG0
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
 #define ldl_code(p) ldl_raw(p)
 #define ldq_code(p) ldq_raw(p)
+#else
+#define cpu_ldub_code(env1, p) ldub_raw(p)
+#define cpu_ldsb_code(env1, p) ldsb_raw(p)
+#define cpu_lduw_code(env1, p) lduw_raw(p)
+#define cpu_ldsw_code(env1, p) ldsw_raw(p)
+#define cpu_ldl_code(env1, p) ldl_raw(p)
+#define cpu_ldq_code(env1, p) ldq_raw(p)
+#endif
 
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
diff --git a/exec-all.h b/exec-all.h
index 51d01f2..88a57f7 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -311,7 +311,9 @@ void tlb_fill(CPUState *env1, target_ulong addr, int is_write, int mmu_idx,
 
 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
+#ifndef CONFIG_TCG_PASS_AREG0
 #define env cpu_single_env
+#endif
 
 #define DATA_SIZE 1
 #include "softmmu_header.h"
diff --git a/exec.c b/exec.c
index 1e5bbd6..b302d53 100644
--- a/exec.c
+++ b/exec.c
@@ -4682,7 +4682,11 @@ tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
     mmu_idx = cpu_mmu_index(env1);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
+#ifdef CONFIG_TCG_PASS_AREG0
+        cpu_ldub_code(env1, addr);
+#else
         ldub_code(addr);
+#endif
     }
     pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
     if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
diff --git a/softmmu_defs.h b/softmmu_defs.h
index d47d30d..6e69b05 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,6 +9,7 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H
 
+#ifndef CONFIG_TCG_PASS_AREG0
 uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
 void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
 uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
@@ -26,5 +27,32 @@ uint32_t __ldl_cmmu(target_ulong addr, int mmu_idx);
 void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
 uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
 void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+#else
+uint8_t helper_ldb_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stb_mmu(CPUState *env, target_ulong addr, uint8_t val,
+                    int mmu_idx);
+uint16_t helper_ldw_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stw_mmu(CPUState *env, target_ulong addr, uint16_t val,
+                    int mmu_idx);
+uint32_t helper_ldl_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stl_mmu(CPUState *env, target_ulong addr, uint32_t val,
+                    int mmu_idx);
+uint64_t helper_ldq_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stq_mmu(CPUState *env, target_ulong addr, uint64_t val,
+                    int mmu_idx);
+
+uint8_t helper_ldb_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stb_cmmu(CPUState *env, target_ulong addr, uint8_t val,
+int mmu_idx);
+uint16_t helper_ldw_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stw_cmmu(CPUState *env, target_ulong addr, uint16_t val,
+                     int mmu_idx);
+uint32_t helper_ldl_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stl_cmmu(CPUState *env, target_ulong addr, uint32_t val,
+                     int mmu_idx);
+uint64_t helper_ldq_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void helper_stq_cmmu(CPUState *env, target_ulong addr, uint64_t val,
+                     int mmu_idx);
+#endif
 
 #endif
diff --git a/softmmu_header.h b/softmmu_header.h
index 818d7b6..3c0b298 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -78,9 +78,23 @@
 #define ADDR_READ addr_read
 #endif
 
+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
 /* generic load/store macros */
 
-static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline RES_TYPE
+glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
@@ -93,7 +107,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
+                                                                     addr,
+                                                                     mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
@@ -102,7 +118,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
 }
 
 #if DATA_SIZE <= 2
-static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline int
+glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
@@ -114,7 +132,8 @@ static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
+                               MMUSUFFIX)(ENV_VAR addr, mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
@@ -127,7 +146,9 @@ static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 
 /* generic store macro */
 
-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+static inline void
+glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
+                                                    RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
@@ -139,7 +160,8 @@ static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx);
+        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
+                                                               mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
@@ -151,46 +173,52 @@ static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
 #if DATA_SIZE == 8
-static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+static inline float64 glue(glue(CPU_PREFIX, ldfq), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldq), MEMSUFFIX)(ENV_VAR ptr);
     return u.d;
 }
 
-static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+static inline void glue(glue(CPU_PREFIX, stfq), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(stq, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stq), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
-static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+static inline float32 glue(glue(CPU_PREFIX, ldfl), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldl), MEMSUFFIX)(ENV_VAR ptr);
     return u.f;
 }
 
-static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+static inline void glue(glue(CPU_PREFIX, stfl), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(stl, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stl), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */
 
@@ -205,3 +233,7 @@ static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index 40fcf58..aa67dc3 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -54,10 +54,24 @@
 #define ADDR_READ addr_read
 #endif
 
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                        target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
+static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
+                                              target_phys_addr_t physaddr,
                                               target_ulong addr,
                                               void *retaddr)
 {
@@ -89,7 +103,10 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
 }
 
 /* handle all cases except unaligned access which span two pages */
-DATA_TYPE glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, int mmu_idx)
+DATA_TYPE
+glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -110,22 +127,22 @@ DATA_TYPE glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, int mmu_idx)
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
                                                          mmu_idx, retaddr);
         } else {
             /* unaligned/aligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -136,7 +153,7 @@ DATA_TYPE glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, int mmu_idx)
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         goto redo;
@@ -145,9 +162,11 @@ DATA_TYPE glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, int mmu_idx)
 }
 
 /* handle all unaligned cases */
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                        int mmu_idx,
-                                                        void *retaddr)
+static DATA_TYPE
+glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                       target_ulong addr,
+                                       int mmu_idx,
+                                       void *retaddr)
 {
     DATA_TYPE res, res1, res2;
     int index, shift;
@@ -164,15 +183,15 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr1,
                                                           mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -196,12 +215,14 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
 
 #ifndef SOFTMMU_CODE_ACCESS
 
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr);
 
-static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
+static inline void glue(io_write, SUFFIX)(ENV_PARAM
+                                          target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
                                           void *retaddr)
@@ -231,8 +252,10 @@ static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
 #endif /* SHIFT > 2 */
 }
 
-void glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
-                                         int mmu_idx)
+void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                            target_ulong addr,
+                                                            DATA_TYPE val,
+                                                            int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     unsigned long addend;
@@ -250,21 +273,21 @@ void glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -275,7 +298,7 @@ void glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;
@@ -283,7 +306,8 @@ void glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
 }
 
 /* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr)
@@ -302,7 +326,7 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -310,10 +334,12 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (i * 8),
                                           mmu_idx, retaddr);
 #endif
             }
@@ -338,3 +364,7 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
 #undef USUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 5b233f5..711dc31 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -929,6 +929,27 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -936,6 +957,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -943,6 +966,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
@@ -1075,6 +1099,19 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
 
     switch (opc) {
@@ -1341,6 +1378,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     }
 # endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[3], 0,
+                    tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 71f4a8a..3fb2a51 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,6 +882,27 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -889,12 +910,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1061,6 +1085,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1212,6 +1245,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         tcg_abort();
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 9776203..daa85b8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -178,6 +178,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDX);
+#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
@@ -957,6 +960,27 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -964,12 +988,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Perform the TLB load and compare.
 
@@ -1188,11 +1215,26 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
 #else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1386,11 +1428,26 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                 TCG_REG_RSI, data_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
     stack_adjust = 0;
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e3de79f..213e9d8 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,12 +1452,25 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
     __ldl_mmu,
     __ldq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1517,6 +1530,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1547,12 +1569,25 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1622,6 +1657,17 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         data_reg = TCG_REG_R2;
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c5c3282..3681141 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -750,6 +750,27 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -757,6 +778,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -764,6 +787,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
@@ -858,6 +882,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 # endif
     tcg_out_movi(s, TCG_TYPE_I32, sp_args++, mem_index);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
@@ -1069,6 +1102,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index f5d9bf3..d0702d6 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -508,6 +508,27 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -515,6 +536,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -522,6 +545,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 {
@@ -598,6 +622,16 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
     switch (opc) {
     case 0|4:
@@ -829,6 +863,17 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     ir++;
 
     tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 4419378..d9fb409 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -552,6 +552,27 @@ static void tcg_out_ldsta (TCGContext *s, int ret, int addr,
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -559,12 +580,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -648,6 +672,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
 
     switch (opc) {
@@ -796,6 +829,17 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
 
     label2_ptr = s->code_ptr;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 9317fe8..3695213 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,6 +301,27 @@ static const uint8_t tcg_cond_to_ltr_cond[10] = {
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -308,6 +329,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -315,6 +338,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static uint8_t *tb_ret_addr;
 
@@ -1483,9 +1507,29 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                    tcg_target_call_iarg_regs[2]);
+        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 4461fb4..3f65126 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -86,9 +92,9 @@ static const int tcg_target_call_iarg_regs[6] = {
 
 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O0,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_O1
-#endif
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
 };
 
 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
@@ -155,6 +161,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
+#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -706,6 +715,27 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -713,6 +743,8 @@ static const void * const qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -720,6 +752,7 @@ static const void * const qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 #if TARGET_LONG_BITS == 32
 #define TARGET_LD_OP LDUW
@@ -801,6 +834,17 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 
     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index fc0880c..72c83c9 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,6 +798,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -808,6 +811,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
-- 
1.7.2.5


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-03-11 22:24 [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions Blue Swirl
@ 2012-08-24 15:05 ` Aurelien Jarno
  2012-08-24 15:33   ` malc
  2012-08-25  9:05   ` Blue Swirl
  0 siblings, 2 replies; 19+ messages in thread
From: Aurelien Jarno @ 2012-08-24 15:05 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-devel

On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
> Optionally, make memory access helpers take a parameter for CPUState
> instead of relying on global env.
> 
> On most targets, perform simple moves to reorder registers. On i386,
> switch from regparm(3) calling convention to standard stack-based
> version.
> 
> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> ---
>  cpu-all.h              |    9 +++++
>  exec-all.h             |    2 +
>  exec.c                 |    4 ++
>  softmmu_defs.h         |   28 ++++++++++++++++
>  softmmu_header.h       |   60 ++++++++++++++++++++++++++--------
>  softmmu_template.h     |   84 ++++++++++++++++++++++++++++++++---------------
>  tcg/arm/tcg-target.c   |   53 ++++++++++++++++++++++++++++++
>  tcg/hppa/tcg-target.c  |   44 +++++++++++++++++++++++++
>  tcg/i386/tcg-target.c  |   57 ++++++++++++++++++++++++++++++++
>  tcg/ia64/tcg-target.c  |   46 ++++++++++++++++++++++++++
>  tcg/mips/tcg-target.c  |   44 +++++++++++++++++++++++++
>  tcg/ppc/tcg-target.c   |   45 +++++++++++++++++++++++++
>  tcg/ppc64/tcg-target.c |   44 +++++++++++++++++++++++++
>  tcg/s390/tcg-target.c  |   44 +++++++++++++++++++++++++
>  tcg/sparc/tcg-target.c |   50 +++++++++++++++++++++++++++--
>  tcg/tci/tcg-target.c   |    6 +++
>  16 files changed, 576 insertions(+), 44 deletions(-)
> 

This commit completely broke arm and mips host support, not only for 64
bit targets as written in the comments, but even for 32 bit targets as
shifting arguments one by one doesn't work for qemu_st64 which needs 5
values, while only 4 can be passed in registers.

Moreover even on x86_64, this introduces some performance regressions by
emitting 4 additional moves in the slow path and adding some more
constraints on the registers that can be used for passing arguments to
ld/st ops.

While more and more targets needs AREG0 to be passed, I have started to
work on fixing that. I came to the conclusion that passing AREG0 as the
first argument, even if it is look the nice way to do it in C is
probably not the best option:
- On 32 bit hosts, which usually need register alignments for 64-bit
  values (at least on arm and mips), given AREG0 is a 32-bit value this
  makes the register usage very inefficient when the address or the value
  are 64 bits, in addition to making the code to handle quite complex. It
  would be better to place it close to mem_idx which is also 32 bits.
- On at least ppc, ppc64, sparc64 and x86_64, this adds some more 
  constraints to the ld/st ops arguments.
- On x86_64 This also means the address loading of the first argument done
  in the  TLB function can't be reused easily (it's not a problem right now
  due to registers shifting, but this problem appears when trying to clean
  the code).
- Finally on all hosts, this make the AREG0 / nonAREG0 load/store
  different, and thus the load/store code much more complex (this is
  something that should disappear when all targets are using the AREG0
  case).

That's why I would propose to move the env argument to the last
argument. It's better to place it after mem_idx, as it is usually easier
to store a register on the stack than an immediate value. It also means
we don't need any register shifting, the code change for most hosts 
would be only a few lines to either copy a value from one register to 
another, or to store a register on the stack, that is without additional
constraints (there is a call after that so the argument registers are 
already clobbered).

What do you think of that? If that seems the way to go, I can start
writing patches to do the changes and fix most hosts support.

Aurelien

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 15:05 ` Aurelien Jarno
@ 2012-08-24 15:33   ` malc
  2012-08-24 15:35     ` malc
  2012-08-25  9:05   ` Blue Swirl
  1 sibling, 1 reply; 19+ messages in thread
From: malc @ 2012-08-24 15:33 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: Blue Swirl, qemu-devel

On Fri, 24 Aug 2012, Aurelien Jarno wrote:

> On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
> > Optionally, make memory access helpers take a parameter for CPUState
> > instead of relying on global env.
> > 
> > On most targets, perform simple moves to reorder registers. On i386,
> > switch from regparm(3) calling convention to standard stack-based
> > version.
> > 
> > Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> > ---
> >  cpu-all.h              |    9 +++++
> >  exec-all.h             |    2 +
> >  exec.c                 |    4 ++
> >  softmmu_defs.h         |   28 ++++++++++++++++
> >  softmmu_header.h       |   60 ++++++++++++++++++++++++++--------
> >  softmmu_template.h     |   84 ++++++++++++++++++++++++++++++++---------------
> >  tcg/arm/tcg-target.c   |   53 ++++++++++++++++++++++++++++++
> >  tcg/hppa/tcg-target.c  |   44 +++++++++++++++++++++++++
> >  tcg/i386/tcg-target.c  |   57 ++++++++++++++++++++++++++++++++
> >  tcg/ia64/tcg-target.c  |   46 ++++++++++++++++++++++++++
> >  tcg/mips/tcg-target.c  |   44 +++++++++++++++++++++++++
> >  tcg/ppc/tcg-target.c   |   45 +++++++++++++++++++++++++
> >  tcg/ppc64/tcg-target.c |   44 +++++++++++++++++++++++++
> >  tcg/s390/tcg-target.c  |   44 +++++++++++++++++++++++++
> >  tcg/sparc/tcg-target.c |   50 +++++++++++++++++++++++++++--
> >  tcg/tci/tcg-target.c   |    6 +++
> >  16 files changed, 576 insertions(+), 44 deletions(-)
> > 
> 
> This commit completely broke arm and mips host support, not only for 64
> bit targets as written in the comments, but even for 32 bit targets as
> shifting arguments one by one doesn't work for qemu_st64 which needs 5
> values, while only 4 can be passed in registers.
> 
> Moreover even on x86_64, this introduces some performance regressions by
> emitting 4 additional moves in the slow path and adding some more
> constraints on the registers that can be used for passing arguments to
> ld/st ops.
> 
> While more and more targets needs AREG0 to be passed, I have started to
> work on fixing that. I came to the conclusion that passing AREG0 as the
> first argument, even if it is look the nice way to do it in C is
> probably not the best option:
> - On 32 bit hosts, which usually need register alignments for 64-bit
>   values (at least on arm and mips), given AREG0 is a 32-bit value this
            ditto ppc32

>   makes the register usage very inefficient when the address or the value
>   are 64 bits, in addition to making the code to handle quite complex. It
>   would be better to place it close to mem_idx which is also 32 bits.
> - On at least ppc, ppc64, sparc64 and x86_64, this adds some more 
>   constraints to the ld/st ops arguments.
> - On x86_64 This also means the address loading of the first argument done
>   in the  TLB function can't be reused easily (it's not a problem right now
>   due to registers shifting, but this problem appears when trying to clean
>   the code).
> - Finally on all hosts, this make the AREG0 / nonAREG0 load/store
>   different, and thus the load/store code much more complex (this is
>   something that should disappear when all targets are using the AREG0
>   case).
> 
> That's why I would propose to move the env argument to the last
> argument. It's better to place it after mem_idx, as it is usually easier
> to store a register on the stack than an immediate value. It also means
> we don't need any register shifting, the code change for most hosts 
> would be only a few lines to either copy a value from one register to 
> another, or to store a register on the stack, that is without additional
> constraints (there is a call after that so the argument registers are 
> already clobbered).
> 
> What do you think of that? If that seems the way to go, I can start
> writing patches to do the changes and fix most hosts support.
> 
> Aurelien
> 
> 

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 15:33   ` malc
@ 2012-08-24 15:35     ` malc
  2012-08-24 15:52       ` Andreas Färber
  0 siblings, 1 reply; 19+ messages in thread
From: malc @ 2012-08-24 15:35 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: Blue Swirl, qemu-devel

On Fri, 24 Aug 2012, malc wrote:

> On Fri, 24 Aug 2012, Aurelien Jarno wrote:
> 
> > On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:

[..snip..]

> > - On 32 bit hosts, which usually need register alignments for 64-bit
> >   values (at least on arm and mips), given AREG0 is a 32-bit value this
>             ditto ppc32, erm.. with sysv abi that is

[..snip..]

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 15:35     ` malc
@ 2012-08-24 15:52       ` Andreas Färber
  2012-08-24 16:26         ` malc
  2012-08-24 18:05         ` Aurelien Jarno
  0 siblings, 2 replies; 19+ messages in thread
From: Andreas Färber @ 2012-08-24 15:52 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: Blue Swirl, qemu-devel, Alexander Graf

Am 24.08.2012 17:35, schrieb malc:
> On Fri, 24 Aug 2012, malc wrote:
> 
>> On Fri, 24 Aug 2012, Aurelien Jarno wrote:
>>
>>> On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
> 
> [..snip..]
> 
>>> - On 32 bit hosts, which usually need register alignments for 64-bit
>>>   values (at least on arm and mips), given AREG0 is a 32-bit value this
>>             ditto ppc32, erm.. with sysv abi that is

...which have been fixed in the v1.1 release cycle. You can take a look
at tcg/ppc/ for how we've fixed that with alignment macros and variable.

Not opposed to changing the argument order, but given that we're inches
away from v1.2 (in Hard Freeze), it might be better to first get AREG0
as first argument working for your favorite hosts as a bugfix and then
do any larger optimization for v1.3.

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 15:52       ` Andreas Färber
@ 2012-08-24 16:26         ` malc
  2012-08-24 18:20           ` Andreas Färber
  2012-08-24 18:05         ` Aurelien Jarno
  1 sibling, 1 reply; 19+ messages in thread
From: malc @ 2012-08-24 16:26 UTC (permalink / raw)
  To: Andreas Färber
  Cc: Blue Swirl, qemu-devel, Aurelien Jarno, Alexander Graf

On Fri, 24 Aug 2012, Andreas F?rber wrote:

> Am 24.08.2012 17:35, schrieb malc:
> > On Fri, 24 Aug 2012, malc wrote:
> > 
> >> On Fri, 24 Aug 2012, Aurelien Jarno wrote:
> >>
> >>> On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
> > 
> > [..snip..]
> > 
> >>> - On 32 bit hosts, which usually need register alignments for 64-bit
> >>>   values (at least on arm and mips), given AREG0 is a 32-bit value this
> >>             ditto ppc32, erm.. with sysv abi that is
> 
> ...which have been fixed in the v1.1 release cycle. You can take a look
> at tcg/ppc/ for how we've fixed that with alignment macros and variable.

You are replying to the wrong person methinks.

> 
> Not opposed to changing the argument order, but given that we're inches
> away from v1.2 (in Hard Freeze), it might be better to first get AREG0
> as first argument working for your favorite hosts as a bugfix and then
> do any larger optimization for v1.3.
> 
> Regards,
> Andreas
> 
> 

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 15:52       ` Andreas Färber
  2012-08-24 16:26         ` malc
@ 2012-08-24 18:05         ` Aurelien Jarno
  2012-08-24 18:43           ` Andreas Färber
  1 sibling, 1 reply; 19+ messages in thread
From: Aurelien Jarno @ 2012-08-24 18:05 UTC (permalink / raw)
  To: Andreas Färber; +Cc: Blue Swirl, qemu-devel, Alexander Graf

On Fri, Aug 24, 2012 at 05:52:29PM +0200, Andreas Färber wrote:
> Am 24.08.2012 17:35, schrieb malc:
> > On Fri, 24 Aug 2012, malc wrote:
> > 
> >> On Fri, 24 Aug 2012, Aurelien Jarno wrote:
> >>
> >>> On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
> > 
> > [..snip..]
> > 
> >>> - On 32 bit hosts, which usually need register alignments for 64-bit
> >>>   values (at least on arm and mips), given AREG0 is a 32-bit value this
> >>             ditto ppc32, erm.. with sysv abi that is
> 
> ...which have been fixed in the v1.1 release cycle. You can take a look
> at tcg/ppc/ for how we've fixed that with alignment macros and variable.

The fix still includes one more constraint. Also the method used for PPC
doesn't apply to ARM or MIPS, as they only have 4 registers to pass
arguments, the remaining going on the stack.

> Not opposed to changing the argument order, but given that we're inches
> away from v1.2 (in Hard Freeze), it might be better to first get AREG0
> as first argument working for your favorite hosts as a bugfix and then
> do any larger optimization for v1.3.

It's what I tried to do first, but I don't think it is realistic to use
such a code for v1.2, it is complex to support all cases, and thus
likely full of bugs. Maybe we should simply disable ARM and MIPS support
for this release.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 16:26         ` malc
@ 2012-08-24 18:20           ` Andreas Färber
  0 siblings, 0 replies; 19+ messages in thread
From: Andreas Färber @ 2012-08-24 18:20 UTC (permalink / raw)
  To: malc; +Cc: Blue Swirl, qemu-devel, Aurelien Jarno, Alexander Graf

Am 24.08.2012 18:26, schrieb malc:
> On Fri, 24 Aug 2012, Andreas F?rber wrote:
> 
>> Am 24.08.2012 17:35, schrieb malc:
>>> On Fri, 24 Aug 2012, malc wrote:
>>>
>>>> On Fri, 24 Aug 2012, Aurelien Jarno wrote:
>>>>
>>>>> On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
>>>
>>> [..snip..]
>>>
>>>>> - On 32 bit hosts, which usually need register alignments for 64-bit
>>>>>   values (at least on arm and mips), given AREG0 is a 32-bit value this
>>>>             ditto ppc32, erm.. with sysv abi that is
>>
>> ...which have been fixed in the v1.1 release cycle. You can take a look
>> at tcg/ppc/ for how we've fixed that with alignment macros and variable.
> 
> You are replying to the wrong person methinks.

No, I replied to Aurélien just like you did. You don't need to look at
the code you already reviewed. :)

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 18:05         ` Aurelien Jarno
@ 2012-08-24 18:43           ` Andreas Färber
  2012-08-24 18:53             ` Aurelien Jarno
                               ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Andreas Färber @ 2012-08-24 18:43 UTC (permalink / raw)
  To: Aurelien Jarno, Peter Maydell; +Cc: Blue Swirl, qemu-devel, Alexander Graf

Am 24.08.2012 20:05, schrieb Aurelien Jarno:
> On Fri, Aug 24, 2012 at 05:52:29PM +0200, Andreas Färber wrote:
>> Not opposed to changing the argument order, but given that we're inches
>> away from v1.2 (in Hard Freeze), it might be better to first get AREG0
>> as first argument working for your favorite hosts as a bugfix and then
>> do any larger optimization for v1.3.
> 
> It's what I tried to do first, but I don't think it is realistic to use
> such a code for v1.2, it is complex to support all cases, and thus
> likely full of bugs. Maybe we should simply disable ARM and MIPS support
> for this release.

Depends on what you mean with "disable"? Adding an #error would hurt our
arm build just like earlier the ppc build, and I would hope from my last
testing that the problems would only affect the AREG0 targets,
especially not ARM on ARM (or MIPS on MIPS).

Aborting at runtime, only when really unsupported, would seem better.

I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
that there was a similar TODO or FIXME) but got distracted by other
projects. And your remarks wrt stack sound a bit frightening now. ;)
@Peter, have you looked into tcg/arm/ AREG0 support?

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 18:43           ` Andreas Färber
@ 2012-08-24 18:53             ` Aurelien Jarno
  2012-08-25  9:18               ` Blue Swirl
  2012-08-24 23:01             ` Peter Maydell
  2012-08-25 23:28             ` Peter Maydell
  2 siblings, 1 reply; 19+ messages in thread
From: Aurelien Jarno @ 2012-08-24 18:53 UTC (permalink / raw)
  To: Andreas Färber; +Cc: Blue Swirl, Peter Maydell, qemu-devel, Alexander Graf

On Fri, Aug 24, 2012 at 08:43:32PM +0200, Andreas Färber wrote:
> Am 24.08.2012 20:05, schrieb Aurelien Jarno:
> > On Fri, Aug 24, 2012 at 05:52:29PM +0200, Andreas Färber wrote:
> >> Not opposed to changing the argument order, but given that we're inches
> >> away from v1.2 (in Hard Freeze), it might be better to first get AREG0
> >> as first argument working for your favorite hosts as a bugfix and then
> >> do any larger optimization for v1.3.
> > 
> > It's what I tried to do first, but I don't think it is realistic to use
> > such a code for v1.2, it is complex to support all cases, and thus
> > likely full of bugs. Maybe we should simply disable ARM and MIPS support
> > for this release.
> 
> Depends on what you mean with "disable"? Adding an #error would hurt our
> arm build just like earlier the ppc build, and I would hope from my last
> testing that the problems would only affect the AREG0 targets,
> especially not ARM on ARM (or MIPS on MIPS).
> 

I mean basically not building qemu-system-{alpha,i386,x86_64,or32,sparc,
sparc64,xtensa,ppc,ppc64} on arm and mips hosts. 

> Aborting at runtime, only when really unsupported, would seem better.

What's the point of providing non working binaries, beside getting bug
reports?

> 
> I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
> that there was a similar TODO or FIXME) but got distracted by other
> projects. And your remarks wrt stack sound a bit frightening now. ;)
> @Peter, have you looked into tcg/arm/ AREG0 support?
> 


-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 18:43           ` Andreas Färber
  2012-08-24 18:53             ` Aurelien Jarno
@ 2012-08-24 23:01             ` Peter Maydell
  2012-08-25  9:52               ` Blue Swirl
  2012-08-25 12:53               ` Aurelien Jarno
  2012-08-25 23:28             ` Peter Maydell
  2 siblings, 2 replies; 19+ messages in thread
From: Peter Maydell @ 2012-08-24 23:01 UTC (permalink / raw)
  To: Andreas Färber
  Cc: Blue Swirl, qemu-devel, Aurelien Jarno, Alexander Graf

On 24 August 2012 19:43, Andreas Färber <afaerber@suse.de> wrote:
> Depends on what you mean with "disable"? Adding an #error would hurt our
> arm build just like earlier the ppc build, and I would hope from my last
> testing that the problems would only affect the AREG0 targets,
> especially not ARM on ARM (or MIPS on MIPS).
>
> Aborting at runtime, only when really unsupported, would seem better.
>
> I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
> that there was a similar TODO or FIXME) but got distracted by other
> projects. And your remarks wrt stack sound a bit frightening now. ;)
> @Peter, have you looked into tcg/arm/ AREG0 support?

Not yet. Why did we commit something that broke half our TCG
targets and why don't we just back it out for 1.2 ?

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 15:05 ` Aurelien Jarno
  2012-08-24 15:33   ` malc
@ 2012-08-25  9:05   ` Blue Swirl
  1 sibling, 0 replies; 19+ messages in thread
From: Blue Swirl @ 2012-08-25  9:05 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On Fri, Aug 24, 2012 at 3:05 PM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Sun, Mar 11, 2012 at 10:24:03PM +0000, Blue Swirl wrote:
>> Optionally, make memory access helpers take a parameter for CPUState
>> instead of relying on global env.
>>
>> On most targets, perform simple moves to reorder registers. On i386,
>> switch from regparm(3) calling convention to standard stack-based
>> version.
>>
>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>> ---
>>  cpu-all.h              |    9 +++++
>>  exec-all.h             |    2 +
>>  exec.c                 |    4 ++
>>  softmmu_defs.h         |   28 ++++++++++++++++
>>  softmmu_header.h       |   60 ++++++++++++++++++++++++++--------
>>  softmmu_template.h     |   84 ++++++++++++++++++++++++++++++++---------------
>>  tcg/arm/tcg-target.c   |   53 ++++++++++++++++++++++++++++++
>>  tcg/hppa/tcg-target.c  |   44 +++++++++++++++++++++++++
>>  tcg/i386/tcg-target.c  |   57 ++++++++++++++++++++++++++++++++
>>  tcg/ia64/tcg-target.c  |   46 ++++++++++++++++++++++++++
>>  tcg/mips/tcg-target.c  |   44 +++++++++++++++++++++++++
>>  tcg/ppc/tcg-target.c   |   45 +++++++++++++++++++++++++
>>  tcg/ppc64/tcg-target.c |   44 +++++++++++++++++++++++++
>>  tcg/s390/tcg-target.c  |   44 +++++++++++++++++++++++++
>>  tcg/sparc/tcg-target.c |   50 +++++++++++++++++++++++++++--
>>  tcg/tci/tcg-target.c   |    6 +++
>>  16 files changed, 576 insertions(+), 44 deletions(-)
>>
>
> This commit completely broke arm and mips host support, not only for 64
> bit targets as written in the comments, but even for 32 bit targets as
> shifting arguments one by one doesn't work for qemu_st64 which needs 5
> values, while only 4 can be passed in registers.

IIRC this was an earlier version, at least regparm() stuff was separated.

>
> Moreover even on x86_64, this introduces some performance regressions by
> emitting 4 additional moves in the slow path and adding some more
> constraints on the registers that can be used for passing arguments to
> ld/st ops.
>
> While more and more targets needs AREG0 to be passed, I have started to
> work on fixing that. I came to the conclusion that passing AREG0 as the
> first argument, even if it is look the nice way to do it in C is
> probably not the best option:
> - On 32 bit hosts, which usually need register alignments for 64-bit
>   values (at least on arm and mips), given AREG0 is a 32-bit value this
>   makes the register usage very inefficient when the address or the value
>   are 64 bits, in addition to making the code to handle quite complex. It
>   would be better to place it close to mem_idx which is also 32 bits.
> - On at least ppc, ppc64, sparc64 and x86_64, this adds some more
>   constraints to the ld/st ops arguments.
> - On x86_64 This also means the address loading of the first argument done
>   in the  TLB function can't be reused easily (it's not a problem right now
>   due to registers shifting, but this problem appears when trying to clean
>   the code).
> - Finally on all hosts, this make the AREG0 / nonAREG0 load/store
>   different, and thus the load/store code much more complex (this is
>   something that should disappear when all targets are using the AREG0
>   case).
>
> That's why I would propose to move the env argument to the last
> argument. It's better to place it after mem_idx, as it is usually easier
> to store a register on the stack than an immediate value. It also means
> we don't need any register shifting, the code change for most hosts
> would be only a few lines to either copy a value from one register to
> another, or to store a register on the stack, that is without additional
> constraints (there is a call after that so the argument registers are
> already clobbered).
>
> What do you think of that? If that seems the way to go, I can start
> writing patches to do the changes and fix most hosts support.

For 1.2, fixing host support would be most important. It's a good idea
to change the order, but I'd postpone it to 1.3.

>
> Aurelien
>
> --
> Aurelien Jarno                          GPG: 1024D/F1BCDB73
> aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 18:53             ` Aurelien Jarno
@ 2012-08-25  9:18               ` Blue Swirl
  2012-08-25 12:56                 ` Aurelien Jarno
  0 siblings, 1 reply; 19+ messages in thread
From: Blue Swirl @ 2012-08-25  9:18 UTC (permalink / raw)
  To: Aurelien Jarno
  Cc: Peter Maydell, Alexander Graf, Andreas Färber, qemu-devel

On Fri, Aug 24, 2012 at 6:53 PM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Fri, Aug 24, 2012 at 08:43:32PM +0200, Andreas Färber wrote:
>> Am 24.08.2012 20:05, schrieb Aurelien Jarno:
>> > On Fri, Aug 24, 2012 at 05:52:29PM +0200, Andreas Färber wrote:
>> >> Not opposed to changing the argument order, but given that we're inches
>> >> away from v1.2 (in Hard Freeze), it might be better to first get AREG0
>> >> as first argument working for your favorite hosts as a bugfix and then
>> >> do any larger optimization for v1.3.
>> >
>> > It's what I tried to do first, but I don't think it is realistic to use
>> > such a code for v1.2, it is complex to support all cases, and thus
>> > likely full of bugs. Maybe we should simply disable ARM and MIPS support
>> > for this release.
>>
>> Depends on what you mean with "disable"? Adding an #error would hurt our
>> arm build just like earlier the ppc build, and I would hope from my last
>> testing that the problems would only affect the AREG0 targets,
>> especially not ARM on ARM (or MIPS on MIPS).
>>
>
> I mean basically not building qemu-system-{alpha,i386,x86_64,or32,sparc,
> sparc64,xtensa,ppc,ppc64} on arm and mips hosts.

It should be easy to fix the call register problems by those who know
the ABIs and the fixes could be applied for stable series even after
release. Disabling the targets and enabling them later would be equal
to introducing new features which is not OK for stable branch. So I'd
just declare in release errata that there are known problem with these
less commonly used combinations and a fix may arrive later.

>
>> Aborting at runtime, only when really unsupported, would seem better.
>
> What's the point of providing non working binaries, beside getting bug
> reports?

I think the deeper problem is that most TCG targets are not actively
maintained. Does for example IA64 work at all? If we removed the
unmaintained targets, would anyone complain? Development should not be
stalled because a maintainer is absent for several months.

>
>>
>> I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
>> that there was a similar TODO or FIXME) but got distracted by other
>> projects. And your remarks wrt stack sound a bit frightening now. ;)
>> @Peter, have you looked into tcg/arm/ AREG0 support?
>>
>
>
> --
> Aurelien Jarno                          GPG: 1024D/F1BCDB73
> aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 23:01             ` Peter Maydell
@ 2012-08-25  9:52               ` Blue Swirl
  2012-09-20 11:46                 ` Alexander Graf
  2012-08-25 12:53               ` Aurelien Jarno
  1 sibling, 1 reply; 19+ messages in thread
From: Blue Swirl @ 2012-08-25  9:52 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Alexander Graf, Andreas Färber, Aurelien Jarno, qemu-devel

On Fri, Aug 24, 2012 at 11:01 PM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> On 24 August 2012 19:43, Andreas Färber <afaerber@suse.de> wrote:
>> Depends on what you mean with "disable"? Adding an #error would hurt our
>> arm build just like earlier the ppc build, and I would hope from my last
>> testing that the problems would only affect the AREG0 targets,
>> especially not ARM on ARM (or MIPS on MIPS).
>>
>> Aborting at runtime, only when really unsupported, would seem better.
>>
>> I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
>> that there was a similar TODO or FIXME) but got distracted by other
>> projects. And your remarks wrt stack sound a bit frightening now. ;)
>> @Peter, have you looked into tcg/arm/ AREG0 support?
>
> Not yet. Why did we commit something that broke half our TCG
> targets and why don't we just back it out for 1.2 ?

Why didn't you all complain earlier? There was enough time to review
the patches and plenty of time after the commits to test QEMU and
report problems. Backing out now is impossible and also useless if
someone who cares enough for the hosts affected steps forward and
fixes the broken targets in the coming weeks.

It should be also possible to disable non-working TCG targets for 1.2
or as I proposed in other message, simply declare the situation in
release errata and see if anyone ever cares.

I'd seriously also question how beneficial it is for QEMU project to
drag along TCG target support for poorly maintained targets or any
less maintained feature. As a comparison, there's ever growing
pressure to break non-Linux target support (not so much on purpose),
but since we have active people who detect problems early (even before
committing with the help of build bots), report immediately and fix
the problems, for example Win32 and OpenBSD builds are fine today even
if they need considerable support from core code. Compared to that,
the TCG target situation is pretty bad, bordering hopeless, even
though there's much less pressure for change and they are nicely
isolated. Do we even know which targets work and which don't?

>
> -- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 23:01             ` Peter Maydell
  2012-08-25  9:52               ` Blue Swirl
@ 2012-08-25 12:53               ` Aurelien Jarno
  1 sibling, 0 replies; 19+ messages in thread
From: Aurelien Jarno @ 2012-08-25 12:53 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Blue Swirl, Alexander Graf, Andreas Färber, qemu-devel

On Sat, Aug 25, 2012 at 12:01:26AM +0100, Peter Maydell wrote:
> On 24 August 2012 19:43, Andreas Färber <afaerber@suse.de> wrote:
> > Depends on what you mean with "disable"? Adding an #error would hurt our
> > arm build just like earlier the ppc build, and I would hope from my last
> > testing that the problems would only affect the AREG0 targets,
> > especially not ARM on ARM (or MIPS on MIPS).
> >
> > Aborting at runtime, only when really unsupported, would seem better.
> >
> > I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
> > that there was a similar TODO or FIXME) but got distracted by other
> > projects. And your remarks wrt stack sound a bit frightening now. ;)
> > @Peter, have you looked into tcg/arm/ AREG0 support?
> 
> Not yet. Why did we commit something that broke half our TCG
> targets and why don't we just back it out for 1.2 ?
> 

I haven't tried, but for what I can see in the commit log, it was already
broken in 1.1.


-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-25  9:18               ` Blue Swirl
@ 2012-08-25 12:56                 ` Aurelien Jarno
  0 siblings, 0 replies; 19+ messages in thread
From: Aurelien Jarno @ 2012-08-25 12:56 UTC (permalink / raw)
  To: Blue Swirl; +Cc: Peter Maydell, Alexander Graf, Andreas Färber, qemu-devel

On Sat, Aug 25, 2012 at 09:18:17AM +0000, Blue Swirl wrote:
> On Fri, Aug 24, 2012 at 6:53 PM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Fri, Aug 24, 2012 at 08:43:32PM +0200, Andreas Färber wrote:
> >> Am 24.08.2012 20:05, schrieb Aurelien Jarno:
> >> > On Fri, Aug 24, 2012 at 05:52:29PM +0200, Andreas Färber wrote:
> >> >> Not opposed to changing the argument order, but given that we're inches
> >> >> away from v1.2 (in Hard Freeze), it might be better to first get AREG0
> >> >> as first argument working for your favorite hosts as a bugfix and then
> >> >> do any larger optimization for v1.3.
> >> >
> >> > It's what I tried to do first, but I don't think it is realistic to use
> >> > such a code for v1.2, it is complex to support all cases, and thus
> >> > likely full of bugs. Maybe we should simply disable ARM and MIPS support
> >> > for this release.
> >>
> >> Depends on what you mean with "disable"? Adding an #error would hurt our
> >> arm build just like earlier the ppc build, and I would hope from my last
> >> testing that the problems would only affect the AREG0 targets,
> >> especially not ARM on ARM (or MIPS on MIPS).
> >>
> >
> > I mean basically not building qemu-system-{alpha,i386,x86_64,or32,sparc,
> > sparc64,xtensa,ppc,ppc64} on arm and mips hosts.
> 
> It should be easy to fix the call register problems by those who know
> the ABIs and the fixes could be applied for stable series even after
> release. Disabling the targets and enabling them later would be equal
> to introducing new features which is not OK for stable branch. So I'd
> just declare in release errata that there are known problem with these
> less commonly used combinations and a fix may arrive later.

The problem is that it is not that easy as said in my previous mails.
Having to support both AREG0 and non-AREG0 cases make this even worse.

My thought was to disable the support completely and add it back to 1.3
not to stable 1.2.x.

> >
> >> Aborting at runtime, only when really unsupported, would seem better.
> >
> > What's the point of providing non working binaries, beside getting bug
> > reports?
> 
> I think the deeper problem is that most TCG targets are not actively
> maintained. Does for example IA64 work at all? If we removed the
> unmaintained targets, would anyone complain? Development should not be
> stalled because a maintainer is absent for several months.

It is broken by recent TCG changes, but I am currently looking at that.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-24 18:43           ` Andreas Färber
  2012-08-24 18:53             ` Aurelien Jarno
  2012-08-24 23:01             ` Peter Maydell
@ 2012-08-25 23:28             ` Peter Maydell
  2012-08-26  1:03               ` Peter Maydell
  2 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2012-08-25 23:28 UTC (permalink / raw)
  To: Andreas Färber
  Cc: Blue Swirl, qemu-devel, Aurelien Jarno, Alexander Graf

On 24 August 2012 19:43, Andreas Färber <afaerber@suse.de> wrote:
> @Peter, have you looked into tcg/arm/ AREG0 support?

Currently working on a patch to fix things. Sneak preview,
setting up the helper arguments looks much nicer now:

    argreg = TCG_REG_R0;
#if CONFIG_TCG_PASS_AREG0
    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
#endif
#if TARGET_LONG_BITS == 64
    argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
#else
    argreg = tcg_out_arg_reg32(s, argreg, addr_reg);
#endif

    switch (opc) {
    case 0:
        argreg = tcg_out_arg_reg8(s, argreg, data_reg);
        break;
    case 1:
        argreg = tcg_out_arg_reg16(s, argreg, data_reg);
        break;
    case 2:
        argreg = tcg_out_arg_reg32(s, argreg, data_reg);
        break;
    case 3:
        argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2);
        break;
    }

    argreg = tcg_out_imm32(s, argreg, mem_index);
    tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
    tcg_out_arg_stacktidy(s, argreg);

(the tcg_out_arg* hide aligning to regpair and whether
we need to put the argument on the stack, etc).

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-25 23:28             ` Peter Maydell
@ 2012-08-26  1:03               ` Peter Maydell
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2012-08-26  1:03 UTC (permalink / raw)
  To: Andreas Färber
  Cc: Blue Swirl, qemu-devel, Aurelien Jarno, Alexander Graf

On 26 August 2012 00:28, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 24 August 2012 19:43, Andreas Färber <afaerber@suse.de> wrote:
>> @Peter, have you looked into tcg/arm/ AREG0 support?
>
> Currently working on a patch to fix things.

...does anybody have a 64 bit guest test image? The amd64
debian one on Aurelien's website is no good, because it has a guest
bug where it does a division by zero very early in the boot
process for slow hosts where the timestamp counter doesn't
advance fast enough.

thanks
-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions
  2012-08-25  9:52               ` Blue Swirl
@ 2012-09-20 11:46                 ` Alexander Graf
  0 siblings, 0 replies; 19+ messages in thread
From: Alexander Graf @ 2012-09-20 11:46 UTC (permalink / raw)
  To: Blue Swirl; +Cc: Peter Maydell, Andreas Färber, Aurelien Jarno, qemu-devel


On 25.08.2012, at 11:52, Blue Swirl wrote:

> On Fri, Aug 24, 2012 at 11:01 PM, Peter Maydell
> <peter.maydell@linaro.org> wrote:
>> On 24 August 2012 19:43, Andreas Färber <afaerber@suse.de> wrote:
>>> Depends on what you mean with "disable"? Adding an #error would hurt our
>>> arm build just like earlier the ppc build, and I would hope from my last
>>> testing that the problems would only affect the AREG0 targets,
>>> especially not ARM on ARM (or MIPS on MIPS).
>>> 
>>> Aborting at runtime, only when really unsupported, would seem better.
>>> 
>>> I had taken a look at tcg/arm/ shortly after having fixed ppc (seeing
>>> that there was a similar TODO or FIXME) but got distracted by other
>>> projects. And your remarks wrt stack sound a bit frightening now. ;)
>>> @Peter, have you looked into tcg/arm/ AREG0 support?
>> 
>> Not yet. Why did we commit something that broke half our TCG
>> targets and why don't we just back it out for 1.2 ?
> 
> Why didn't you all complain earlier? There was enough time to review
> the patches and plenty of time after the commits to test QEMU and
> report problems. Backing out now is impossible and also useless if
> someone who cares enough for the hosts affected steps forward and
> fixes the broken targets in the coming weeks.
> 
> It should be also possible to disable non-working TCG targets for 1.2
> or as I proposed in other message, simply declare the situation in
> release errata and see if anyone ever cares.
> 
> I'd seriously also question how beneficial it is for QEMU project to
> drag along TCG target support for poorly maintained targets or any
> less maintained feature. As a comparison, there's ever growing
> pressure to break non-Linux target support (not so much on purpose),
> but since we have active people who detect problems early (even before
> committing with the help of build bots), report immediately and fix
> the problems, for example Win32 and OpenBSD builds are fine today even
> if they need considerable support from core code. Compared to that,
> the TCG target situation is pretty bad, bordering hopeless, even
> though there's much less pressure for change and they are nicely
> isolated. Do we even know which targets work and which don't?

I would think the same approach we have with the built bots should work quite well here. If as part of the build bot builds, we also had smoke execution tests, we would catch a lot of problems early on.

In fact, I can even donate an IA64 box as build bot as soon as we have such a system in place.


Alex

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-09-20 11:47 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-11 22:24 [Qemu-devel] [PATCH 2/5] softmmu templates: optionally pass CPUState to memory access functions Blue Swirl
2012-08-24 15:05 ` Aurelien Jarno
2012-08-24 15:33   ` malc
2012-08-24 15:35     ` malc
2012-08-24 15:52       ` Andreas Färber
2012-08-24 16:26         ` malc
2012-08-24 18:20           ` Andreas Färber
2012-08-24 18:05         ` Aurelien Jarno
2012-08-24 18:43           ` Andreas Färber
2012-08-24 18:53             ` Aurelien Jarno
2012-08-25  9:18               ` Blue Swirl
2012-08-25 12:56                 ` Aurelien Jarno
2012-08-24 23:01             ` Peter Maydell
2012-08-25  9:52               ` Blue Swirl
2012-09-20 11:46                 ` Alexander Graf
2012-08-25 12:53               ` Aurelien Jarno
2012-08-25 23:28             ` Peter Maydell
2012-08-26  1:03               ` Peter Maydell
2012-08-25  9:05   ` Blue Swirl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).