From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([208.118.235.92]:52105) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TI1VE-0005Rm-4I for qemu-devel@nongnu.org; Sat, 29 Sep 2012 14:11:05 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TI1V7-00063q-LW for qemu-devel@nongnu.org; Sat, 29 Sep 2012 14:11:00 -0400 Received: from mail-pa0-f45.google.com ([209.85.220.45]:55258) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TI1V7-00063S-8Y for qemu-devel@nongnu.org; Sat, 29 Sep 2012 14:10:53 -0400 Received: by mail-pa0-f45.google.com with SMTP id fb10so3151910pad.4 for ; Sat, 29 Sep 2012 11:10:53 -0700 (PDT) Sender: Richard Henderson From: Richard Henderson Date: Sat, 29 Sep 2012 11:10:39 -0700 Message-Id: <1348942239-3081-4-git-send-email-rth@twiddle.net> In-Reply-To: <1348942239-3081-1-git-send-email-rth@twiddle.net> References: <1348942239-3081-1-git-send-email-rth@twiddle.net> Subject: [Qemu-devel] [PATCH 3/3] tcg-alpha: New TCG target. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Aurelien Jarno This began with a patch from Dong Weiyu , and was modified to fix problems and to adapt to changes in TCG. Signed-off-by: Richard Henderson --- configure | 17 +- exec-all.h | 7 +- qemu-common.h | 4 +- tcg/alpha/tcg-target.c | 1860 ++++++++++++++++++++++++++++++++++++++++++++++++ tcg/alpha/tcg-target.h | 142 ++++ 5 files changed, 2018 insertions(+), 12 deletions(-) create mode 100644 tcg/alpha/tcg-target.c create mode 100644 tcg/alpha/tcg-target.h diff --git a/configure b/configure index 8f99b7b..85e5efa 100755 --- a/configure +++ b/configure @@ -352,6 +352,8 @@ elif check_define __arm__ ; then cpu="arm" elif check_define __hppa__ ; then cpu="hppa" +elif check_define __alpha__ ; then + cpu="alpha" else cpu=`uname -m` fi @@ -381,6 +383,9 @@ case "$cpu" in sparc|sun4[cdmuv]) cpu="sparc" ;; + alpha*) + cpu="alpha" + ;; *) # This will result in either an error or falling back to TCI later ARCH=unknown @@ -895,6 +900,11 @@ case "$cpu" in cc_i386='$(CC) -m32' host_guest_base="yes" ;; + alpha) + QEMU_CFLAGS="-msmall-data $QEMU_CFLAGS" + LDFLAGS="-Wl,--warn-multiple-gp $LDFLAGS" + host_guest_base="yes" + ;; arm*) host_guest_base="yes" ;; @@ -4048,13 +4058,6 @@ if test "$tcg_interpreter" = "yes" ; then echo "CONFIG_TCI_DIS=y" >> $libdis_config_mak fi -case "$ARCH" in -alpha) - # Ensure there's only a single GP - cflags="-msmall-data $cflags" -;; -esac - if test "$target_softmmu" = "yes" ; then case "$TARGET_BASE_ARCH" in arm) diff --git a/exec-all.h b/exec-all.h index 6516da0..4e2f2e8 100644 --- a/exec-all.h +++ b/exec-all.h @@ -132,9 +132,8 @@ static inline void tlb_flush(CPUArchState *env, int flush_global) #define CODE_GEN_AVG_BLOCK_SIZE 64 #endif -#if defined(__arm__) || defined(_ARCH_PPC) \ - || defined(__x86_64__) || defined(__i386__) \ - || defined(__sparc__) \ +#if defined(__alpha__) || defined(__arm__) || defined(_ARCH_PPC) \ + || defined(__x86_64__) || defined(__i386__) || defined(__sparc__) \ || defined(CONFIG_TCG_INTERPRETER) #define USE_DIRECT_JUMP #endif @@ -245,7 +244,7 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); #endif } -#elif defined(__sparc__) +#elif defined(__alpha__) || defined(__sparc__) void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr); #else #error tb_set_jmp_target1 is missing diff --git a/qemu-common.h b/qemu-common.h index 15d9e4e..b46a9b0 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -6,7 +6,9 @@ #include "compiler.h" #include "config-host.h" -#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__ia64__) +#if defined(__alpha__) || defined(__arm__) \ + || defined(__sparc__) || defined(__mips__) \ + || defined(__hppa__) || defined(__ia64__) #define WORDS_ALIGNED #endif diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c new file mode 100644 index 0000000..3a9a354 --- /dev/null +++ b/tcg/alpha/tcg-target.c @@ -0,0 +1,1860 @@ +/* + * Tiny Code Generator for QEMU on ALPHA platform. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + [TCG_REG_V0] = "v0", + [TCG_REG_T0] = "t0", + [TCG_REG_T1] = "t1", + [TCG_REG_T2] = "t2", + [TCG_REG_T3] = "t3", + [TCG_REG_T4] = "t4", + [TCG_REG_T5] = "t5", + [TCG_REG_T6] = "t6", + [TCG_REG_T7] = "t7", + [TCG_REG_T8] = "t8", + [TCG_REG_T9] = "t9", + [TCG_REG_T10] = "t10", + [TCG_REG_T11] = "t11", + [TCG_REG_S0] = "s0", + [TCG_REG_S1] = "s1", + [TCG_REG_S2] = "s2", + [TCG_REG_S3] = "s3", + [TCG_REG_S4] = "s4", + [TCG_REG_S5] = "s5", + [TCG_REG_S6] = "s6", + [TCG_REG_A0] = "a0", + [TCG_REG_A1] = "a1", + [TCG_REG_A2] = "a2", + [TCG_REG_A3] = "a3", + [TCG_REG_A4] = "a4", + [TCG_REG_A5] = "a5", + [TCG_REG_RA] = "ra", + [TCG_REG_PV] = "pv", + [TCG_REG_AT] = "at", + [TCG_REG_GP] = "gp", + [TCG_REG_SP] = "sp", + [TCG_REG_ZERO] = "zero", +}; +#endif + +/* + * $29 is the global pointer, + * $30 is the stack pointer, + * $31 is the zero register, + */ +static const int tcg_target_reg_alloc_order[] = { + /* Call-saved registers. */ + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + /* Call-clobbered temporaries. */ + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + TCG_REG_T7, + TCG_REG_T8, + TCG_REG_T9, + TCG_REG_T10, + TCG_REG_T11, + TCG_REG_RA, + TCG_REG_PV, + TCG_REG_AT, + /* Call-clobbered argument and return registers. */ + TCG_REG_V0, + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, +}; + +/* + * According to alpha calling convention, these 6 registers are used for + * function parameter passing. if function has more than 6 parameters, + * remaining arguments are stored on the stack. + */ +static const int tcg_target_call_iarg_regs[6] = { + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, +}; + +/* + * According to alpha calling convention, $0 is used for returning function + * result. + */ +static const int tcg_target_call_oarg_regs[1] = { + TCG_REG_V0 +}; + +/* + * Temporary registers used within this translator. Note that T9 is + * selected because it is the division return address register. + */ +#define TMP_REG1 TCG_REG_AT +#define TMP_REG2 TCG_REG_T9 + +/* + * Save the address of TB's epilogue. + */ +#define TB_RET_OFS \ + (TCG_STATIC_CALL_ARGS_SIZE + CPU_TEMP_BUF_NLONGS * sizeof(long)) + +/* + * If the guest base gets placed in high memory, it's more efficient + * to use a register to hold the address. + */ +#ifndef CONFIG_USE_GUEST_BASE +#define GUEST_BASE 0 +#endif +#define USE_GUEST_BASE_REG (GUEST_BASE > 0x7fff0000) +#define TCG_GUEST_BASE_REG TCG_REG_S5 + +/* + * Constant constraint mask values. + */ +#define TCG_CT_CONST_U8 0x100 +#define TCG_CT_CONST_ZERO 0x200 +#define TCG_CT_CONST_ANDI 0x400 +#define TCG_CT_CONST_PN255 0x800 + +#define INSN_OP(x) (((x) & 0x3f) << 26) +#define INSN_FUNC1(x) (((x) & 0x3) << 14) +#define INSN_FUNC2(x) (((x) & 0x7f) << 5) +#define INSN_RA(x) (TCG_TO_HW_REGNO(x) << 21) +#define INSN_RB(x) (TCG_TO_HW_REGNO(x) << 16) +#define INSN_RC(x) (TCG_TO_HW_REGNO(x)) +#define INSN_LIT(x) (((x) & 0xff) << 13) +#define INSN_DISP16(x) ((x) & 0xffff) +#define INSN_DISP21(x) ((x) & 0x1fffff) +#define INSN_RSVED(x) ((x) & 0x3fff) + +typedef enum AlphaOpcode { + INSN_ADDL = INSN_OP(0x10) | INSN_FUNC2(0x00), + INSN_ADDQ = INSN_OP(0x10) | INSN_FUNC2(0x20), + INSN_AND = INSN_OP(0x11) | INSN_FUNC2(0x00), + INSN_BEQ = INSN_OP(0x39), + INSN_BGE = INSN_OP(0x3e), + INSN_BGT = INSN_OP(0x3f), + INSN_BIC = INSN_OP(0x11) | INSN_FUNC2(0x08), + INSN_BIS = INSN_OP(0x11) | INSN_FUNC2(0x20), + INSN_BLE = INSN_OP(0x3b), + INSN_BLT = INSN_OP(0x3a), + INSN_BNE = INSN_OP(0x3d), + INSN_BR = INSN_OP(0x30), + INSN_BSR = INSN_OP(0x34), + INSN_CMOVEQ = INSN_OP(0x11) | INSN_FUNC2(0x24), + INSN_CMOVGE = INSN_OP(0x11) | INSN_FUNC2(0x46), + INSN_CMOVGT = INSN_OP(0x11) | INSN_FUNC2(0x66), + INSN_CMOVLE = INSN_OP(0x11) | INSN_FUNC2(0x64), + INSN_CMOVLT = INSN_OP(0x11) | INSN_FUNC2(0x44), + INSN_CMOVNE = INSN_OP(0x11) | INSN_FUNC2(0x26), + INSN_CMPEQ = INSN_OP(0x10) | INSN_FUNC2(0x2d), + INSN_CMPLE = INSN_OP(0x10) | INSN_FUNC2(0x6d), + INSN_CMPLT = INSN_OP(0x10) | INSN_FUNC2(0x4d), + INSN_CMPULE = INSN_OP(0x10) | INSN_FUNC2(0x3d), + INSN_CMPULT = INSN_OP(0x10) | INSN_FUNC2(0x1d), + INSN_EQV = INSN_OP(0x11) | INSN_FUNC2(0x48), + INSN_EXTBL = INSN_OP(0x12) | INSN_FUNC2(0x06), + INSN_EXTWH = INSN_OP(0x12) | INSN_FUNC2(0x5a), + INSN_EXTWL = INSN_OP(0x12) | INSN_FUNC2(0x16), + INSN_INSBL = INSN_OP(0x12) | INSN_FUNC2(0x0b), + INSN_INSLH = INSN_OP(0x12) | INSN_FUNC2(0x67), + INSN_INSLL = INSN_OP(0x12) | INSN_FUNC2(0x2b), + INSN_INSWL = INSN_OP(0x12) | INSN_FUNC2(0x1b), + INSN_JMP = INSN_OP(0x1a) | INSN_FUNC1(0), + INSN_JSR = INSN_OP(0x1a) | INSN_FUNC1(1), + INSN_LDA = INSN_OP(0x08), + INSN_LDAH = INSN_OP(0x09), + INSN_LDBU = INSN_OP(0x0a), + INSN_LDL = INSN_OP(0x28), + INSN_LDQ = INSN_OP(0x29), + INSN_LDWU = INSN_OP(0x0c), + INSN_MSKBL = INSN_OP(0x12) | INSN_FUNC2(0x02), + INSN_MSKLL = INSN_OP(0x12) | INSN_FUNC2(0x22), + INSN_MSKWL = INSN_OP(0x12) | INSN_FUNC2(0x12), + INSN_MULL = INSN_OP(0x13) | INSN_FUNC2(0x00), + INSN_MULQ = INSN_OP(0x13) | INSN_FUNC2(0x20), + INSN_ORNOT = INSN_OP(0x11) | INSN_FUNC2(0x28), + INSN_RET = INSN_OP(0x1a) | INSN_FUNC1(2), + INSN_S4ADDL = INSN_OP(0x10) | INSN_FUNC2(0x02), + INSN_S8ADDL = INSN_OP(0x10) | INSN_FUNC2(0x12), + INSN_SEXTB = INSN_OP(0x1c) | INSN_FUNC2(0x00), + INSN_SEXTW = INSN_OP(0x1c) | INSN_FUNC2(0x01), + INSN_SLL = INSN_OP(0x12) | INSN_FUNC2(0x39), + INSN_SRA = INSN_OP(0x12) | INSN_FUNC2(0x3c), + INSN_SRL = INSN_OP(0x12) | INSN_FUNC2(0x34), + INSN_STB = INSN_OP(0x0e), + INSN_STL = INSN_OP(0x2c), + INSN_STQ = INSN_OP(0x2d), + INSN_STW = INSN_OP(0x0d), + INSN_SUBL = INSN_OP(0x10) | INSN_FUNC2(0x09), + INSN_SUBQ = INSN_OP(0x10) | INSN_FUNC2(0x29), + INSN_XOR = INSN_OP(0x11) | INSN_FUNC2(0x40), + INSN_ZAPNOT = INSN_OP(0x12) | INSN_FUNC2(0x31), + + INSN_BUGCHK = INSN_OP(0x00) | INSN_DISP16(0x81), + + INSN_NOP = INSN_BIS + | INSN_RA(TCG_REG_ZERO) + | INSN_RB(TCG_REG_ZERO) + | INSN_RC(TCG_REG_ZERO), +} AlphaOpcode; + +/* + * Given a constraint, fill in the available register set or constant range. + */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str = *pct_str; + + switch (ct_str[0]) { + case 'r': + /* Constaint 'r' means any register is okay. */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffffu); + break; + + case 'a': + /* Constraint 'a' means $24, one of the division inputs. */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_T10); + break; + + case 'b': + /* Constraint 'b' means $25, one of the division inputs. */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_T11); + break; + + case 'c': + /* Constraint 'c' means $27, the call procedure vector, + as well as the division output. */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_PV); + break; + + case 'L': + /* Constraint for qemu_ld/st. The extra reserved registers are + used for passing the parameters to the helper function. */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffffu); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + break; + + case 'I': + /* Constraint 'I' means an immediate 0 ... 255. */ + ct->ct |= TCG_CT_CONST_U8; + break; + + case 'J': + /* Constraint 'J' means the immediate 0. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + + case 'K': + /* Constraint 'K' means an immediate -255..255. */ + ct->ct |= TCG_CT_CONST_PN255; + break; + + case 'M': + /* Constraint 'M' means constants used with AND/BIC/ZAPNOT. */ + ct->ct |= TCG_CT_CONST_ANDI; + break; + + default: + return -1; + } + + ct_str++; + *pct_str = ct_str; + return 0; +} + +static int tcg_match_zapnot(tcg_target_long val) +{ + tcg_target_long mask0, maskff; + + /* Since we know this is an alpha host, speed the check by using + cmpbge to compare 8 bytes at once, and incidentally also + produce the zapnot mask. */ + /* ??? This builtin was implemented sometime in 2002, + perhaps in the GCC 3.1 timeframe. */ + mask0 = __builtin_alpha_cmpbge(0, val); + maskff = __builtin_alpha_cmpbge(val, -1); + + /* Here, mask0 contains the bytes that are 0, maskff contains + the bytes that are 0xff; that should cover the entire word. */ + if ((mask0 | maskff) == 0xff) { + return maskff; + } + return 0; +} + +static int tcg_match_andi(tcg_target_long val) +{ + if (val == (val & 0xff)) { + return 1; /* and */ + } else if (~val == (~val & 0xff)) { + return 1; /* bic */ + } else { + return tcg_match_zapnot(val) != 0; + } +} + +static inline int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } else if (ct & TCG_CT_CONST_U8) { + return val == (uint8_t)val; + } else if (ct & TCG_CT_CONST_ZERO) { + return val == 0; + } else if (ct & TCG_CT_CONST_ANDI) { + return tcg_match_andi(val); + } else if (ct & TCG_CT_CONST_PN255) { + return val >= -255 && val <= 255; + } else { + return 0; + } +} + +static inline void tcg_out_fmt_br(TCGContext *s, AlphaOpcode opc, + TCGReg ra, int disp) +{ + tcg_out32(s, opc | INSN_RA(ra) | INSN_DISP21(disp)); +} + +static inline void tcg_out_fmt_mem(TCGContext *s, AlphaOpcode opc, + TCGReg ra, TCGReg rb, int disp) +{ + assert(disp != (int16_t)disp); + tcg_out32(s, opc | INSN_RA(ra) | INSN_RB(rb) | INSN_DISP16(disp)); +} + +static inline void tcg_out_fmt_jmp(TCGContext *s, AlphaOpcode opc, + TCGReg ra, TCGReg rb, int rsved) +{ + tcg_out32(s, opc | INSN_RA(ra) | INSN_RB(rb) | INSN_RSVED(rsved)); +} + +static inline void tcg_out_fmt_opr(TCGContext *s, AlphaOpcode opc, + TCGReg ra, TCGReg rb, TCGReg rc) +{ + tcg_out32(s, opc | INSN_RA(ra) | INSN_RB(rb) | INSN_RC(rc)); +} + +static inline void tcg_out_fmt_opi(TCGContext *s, AlphaOpcode opc, + TCGReg ra, tcg_target_ulong lit, TCGReg rc) +{ + assert(lit <= 0xff); + tcg_out32(s, opc | INSN_RA(ra) | INSN_LIT(lit) | INSN_RC(rc) | (1<<12)); +} + +/* + * Move from one reg to another. This is called from tcg.c. + */ +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg rc, TCGReg rb) +{ + if (type == TCG_TYPE_I32) { + /* Also used for 64->32 bit truncation, so don't elide copies. */ + tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_ZERO, rb, rc); + } else if (rb != rc) { + tcg_out_fmt_opr(s, INSN_BIS, TCG_REG_ZERO, rb, rc); + } +} + +/* + * Helper function to emit a memory format operation with a displacement + * that may be larger than the 16 bits accepted by the real instruction. + */ +static void tcg_out_mem_long(TCGContext *s, AlphaOpcode opc, TCGReg ra, + TCGReg rb, tcg_target_long orig) +{ + tcg_target_long l0, l1, extra = 0, val = orig; + TCGReg rs; + + /* Pick a scratch register. Use the output register, if possible. */ + switch (opc) { + default: + if (ra != rb) { + rs = ra; + break; + } + /* FALLTHRU */ + + case INSN_STB: + case INSN_STW: + case INSN_STL: + case INSN_STQ: + assert(ra != TMP_REG1); + rs = TMP_REG1; + break; + } + + /* See if we can turn a large absolute address into an offset from $gp. + Note that we assert via -msmall-data and --warn-multiple-gp that + the $gp value is constant everywhere. Which means that the translated + code shares the same value as we have loaded right now. */ + if (rb == TCG_REG_ZERO && orig != (int32_t)orig) { + register tcg_target_long gp __asm__("$29"); + tcg_target_long gprel = orig - gp; + + if (gprel == (int32_t)gprel) { + orig = val = gprel; + rb = TCG_REG_GP; + } + } + + l0 = (int16_t)val; + val = (val - l0) >> 16; + l1 = (int16_t)val; + + if (orig == (int32_t)orig) { + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(val - 0x4000); + } + } else { + tcg_target_long l2, l3; + int rh = TCG_REG_ZERO; + + val = (val - l1) >> 16; + l2 = (int16_t)val; + val = (val - l2) >> 16; + l3 = (int16_t)val; + + if (l3) { + tcg_out_fmt_mem(s, INSN_LDAH, rs, rh, l3); + rh = rs; + } + if (l2) { + tcg_out_fmt_mem(s, INSN_LDA, rs, rh, l2); + rh = rs; + } + tcg_out_fmt_opi(s, INSN_SLL, rh, 32, rs); + + if (rb != TCG_REG_ZERO) { + tcg_out_fmt_opr(s, INSN_ADDQ, rs, rb, rs); + } + rb = rs; + } + + if (l1) { + tcg_out_fmt_mem(s, INSN_LDAH, rs, rb, l1); + rb = rs; + } + if (extra) { + tcg_out_fmt_mem(s, INSN_LDAH, rs, rb, extra); + rb = rs; + } + + if (opc != INSN_LDA || rb != ra || l0 != 0) { + tcg_out_fmt_mem(s, opc, ra, rb, l0); + } +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ra, + tcg_target_long val) +{ + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + tcg_out_mem_long(s, INSN_LDA, ra, TCG_REG_ZERO, val); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ra, + TCGReg rb, tcg_target_long disp) +{ + tcg_out_mem_long(s, type == TCG_TYPE_I32 ? INSN_LDL : INSN_LDQ, + ra, rb, disp); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg ra, + TCGReg rb, tcg_target_long disp) +{ + tcg_out_mem_long(s, type == TCG_TYPE_I32 ? INSN_STL : INSN_STQ, + ra, rb, disp); +} + +static void tgen_andi(TCGContext *s, TCGReg ra, tcg_target_long val, TCGReg rc) +{ + if (val == (val & 0xff)) { + tcg_out_fmt_opi(s, INSN_AND, ra, val, rc); + } else if (~val == (~val & 0xff)) { + tcg_out_fmt_opi(s, INSN_BIC, ra, ~val, rc); + } else { + int mask = tcg_match_zapnot(val); + assert(mask != 0); + tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, mask, rc); + } +} + +static inline void tgen_ext8u(TCGContext *s, TCGReg ra, TCGReg rc) +{ + tcg_out_fmt_opi(s, INSN_AND, ra, 0xff, rc); +} + +static inline void tgen_ext8s(TCGContext *s, TCGReg ra, TCGReg rc) +{ + tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_ZERO, ra, rc); +} + +static inline void tgen_ext16u(TCGContext *s, TCGReg ra, TCGReg rc) +{ + tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0x03, rc); +} + +static inline void tgen_ext16s(TCGContext *s, TCGReg ra, TCGReg rc) +{ + tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_ZERO, ra, rc); +} + +static inline void tgen_ext32u(TCGContext *s, TCGReg ra, TCGReg rc) +{ + tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0x0f, rc); +} + +static inline void tgen_ext32s(TCGContext *s, TCGReg ra, TCGReg rc) +{ + tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_ZERO, ra, rc); +} + +static void tgen_extend(TCGContext *s, int sizeop, TCGReg ra, TCGReg rc) +{ + switch (sizeop) { + case 0: + tgen_ext8u(s, ra, rc); + break; + case 0 | 4: + tgen_ext8s(s, ra, rc); + break; + case 1: + tgen_ext16u(s, ra, rc); + break; + case 1 | 4: + tgen_ext16s(s, ra, rc); + break; + case 2: + tgen_ext32u(s, ra, rc); + break; + case 2 | 4: + tgen_ext32s(s, ra, rc); + break; + case 3: + tcg_out_mov(s, TCG_TYPE_I64, ra, rc); + break; + default: + tcg_abort(); + } +} + +static void tgen_bswap(TCGContext *s, int sizeop, TCGReg ra, TCGReg rc) +{ + const TCGReg t0 = TMP_REG1, t1 = TMP_REG2; + + switch (sizeop) { + case 1: /* 16-bit swap, unsigned result */ + case 1 | 4: /* 16-bit swap, signed result */ + /* input value = xxxx xxAB */ + tcg_out_fmt_opi(s, INSN_EXTWH, ra, 7, t0); /* .... ..B. */ + tcg_out_fmt_opi(s, INSN_EXTBL, ra, 1, rc); /* .... ...A */ + tcg_out_fmt_opr(s, INSN_BIS, rc, t0, rc); /* .... ..BA */ + if (sizeop & 4) { + tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_ZERO, rc, rc); + } + break; + + case 2: /* 32-bit swap, unsigned result */ + case 2 | 4: /* 32-bit swap, signed result */ + /* input value = xxxx ABCD */ + tcg_out_fmt_opi(s, INSN_INSLH, ra, 7, t0); /* .... .ABC */ + tcg_out_fmt_opi(s, INSN_INSWL, ra, 3, rc); /* ...C D... */ + tcg_out_fmt_opr(s, INSN_BIS, t0, rc, rc); /* ...C DABC */ + tcg_out_fmt_opi(s, INSN_SRL, rc, 16, t0); /* .... .CDA */ + tcg_out_fmt_opi(s, INSN_ZAPNOT, rc, 0x0A, rc); /* .... D.B. */ + tcg_out_fmt_opi(s, INSN_ZAPNOT, t0, 0x05, t0); /* .... .C.A */ + tcg_out_fmt_opr(s, (sizeop & 4 ? INSN_ADDL : INSN_BIS), t0, rc, rc); + break; + + case 3: /* 64-bit swap */ + /* input value = ABCD EFGH */ + tcg_out_fmt_opi(s, INSN_SRL, ra, 24, t0); /* ...A BCDE */ + tcg_out_fmt_opi(s, INSN_SLL, ra, 24, t1); /* DEFG H... */ + tcg_out_fmt_opi(s, INSN_ZAPNOT, t0, 0x11, t0); /* ...A ...E */ + tcg_out_fmt_opi(s, INSN_ZAPNOT, t1, 0x88, t1); /* D... H... */ + tcg_out_fmt_opr(s, INSN_BIS, t0, t1, t1); /* D..A H..E */ + tcg_out_fmt_opi(s, INSN_SRL, ra, 8, t0); /* .ABC DEFG */ + tcg_out_fmt_opi(s, INSN_ZAPNOT, t0, 0x22, t0); /* ..B. ..F. */ + tcg_out_fmt_opr(s, INSN_BIS, t0, t1, t1); /* D.BA H.FE */ + tcg_out_fmt_opi(s, INSN_SLL, ra, 8, t0); /* BCDE FGH. */ + tcg_out_fmt_opi(s, INSN_ZAPNOT, t1, 0x44, t0); /* .C.. .G.. */ + tcg_out_fmt_opr(s, INSN_BIS, t1, t1, t1); /* DCBA HGFE */ + tcg_out_fmt_opi(s, INSN_SRL, t1, 32, t0); /* .... DCBA */ + tcg_out_fmt_opi(s, INSN_SLL, t1, 32, t1); /* HGFE .... */ + tcg_out_fmt_opr(s, INSN_BIS, t0, t1, rc); /* HGFE DCBA */ + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_ld_sz(TCGContext *s, int sizeop, TCGReg ra, TCGReg rb, + tcg_target_long disp) +{ + static const AlphaOpcode ld_opc[4] = { + INSN_LDBU, INSN_LDWU, INSN_LDL, INSN_LDQ + }; + + tcg_out_mem_long(s, ld_opc[sizeop & 3], ra, rb, disp); + + switch (sizeop) { + case 0 | 4 | 8: + case 0 | 4: + case 1 | 4: + case 2: + tgen_extend(s, sizeop & 7, ra, ra); + break; + + case 0: + case 0 | 8: + case 1: + case 2 | 4: + case 3: + break; + + case 1 | 8: + case 1 | 4 | 8: + case 2 | 8: + case 2 | 4 | 8: + case 3 | 8: + tgen_bswap(s, sizeop & 7, ra, ra); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_st_sz(TCGContext *s, int sizeop, TCGReg ra, TCGReg rb, + tcg_target_long disp) +{ + static const AlphaOpcode st_opc[4] = { + INSN_STB, INSN_STW, INSN_STL, INSN_STQ + }; + + tcg_out_mem_long(s, st_opc[sizeop & 3], ra, rb, disp); +} + +static void patch_reloc(uint8_t *x_ptr, int type, + tcg_target_long value, tcg_target_long addend) +{ + uint32_t *code_ptr = (uint32_t *)x_ptr; + uint32_t insn = *code_ptr; + + value += addend; + switch (type) { + case R_ALPHA_BRADDR: + value -= (tcg_target_long)x_ptr + 4; + if ((value & 3) || value < -0x400000 || value >= 0x400000) { + tcg_abort(); + } + *code_ptr = (insn & ~0x1fffff) | INSN_DISP21(value >> 2); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_br(TCGContext *s, int opc, TCGReg ra, int label_index) +{ + TCGLabel *l = &s->labels[label_index]; + tcg_target_long value; + + if (l->has_value) { + value = l->u.value; + value -= (tcg_target_long)s->code_ptr + 4; + if ((value & 3) || value < -0x400000 || value >= 0x400000) { + tcg_abort(); + } + value >>= 2; + } else { + tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0); + /* We need to keep the offset unchanged for retranslation. + The field loaded here will be masked in tcg_out_fmt_br. */ + value = *(uint32_t *) s->code_ptr; + } + tcg_out_fmt_br(s, opc, ra, value); +} + +static void tcg_out_const_call(TCGContext *s, tcg_target_long dest) +{ + const uint16_t *check = (const uint16_t *) dest; + tcg_target_long disp; + + /* ??? Ideally we'd have access to Elf64_Sym.st_other, which + would tell us definitively whether the target function uses + the incoming PV value. Make a simplifying assumption here + that all of the compiler-generated code that we're calling + either computes the GP from the PV in the first two insns + or it doesn't use the PV at all. This assumption holds in + general for just about anything except some hand-written + assembly, which we're not calling into. */ + + /* Note we access the insn stream as 16-bit units to avoid having + to mask out the offsets of the ldah and lda insns. */ + if (check[1] == 0x27bb && check[3] == 0x23bd) { + /* Skip the GP computation. We can do this even if the + direct branch is out of range. */ + dest += 8; + } + + disp = dest - ((tcg_target_long)s->code_ptr + 4); + if (disp >= -0x400000 && disp < 0x400000) { + tcg_out_fmt_br(s, INSN_BSR, TCG_REG_RA, disp >> 2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_PV, dest); + tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_RA, TCG_REG_PV, dest); + } +} + +static void tcg_out_deposit(TCGContext *s, TCGReg dest, TCGReg arg1, + TCGReg arg2, int ofs, int len, int is_64) +{ + AlphaOpcode ins_opc, msk_opc; + + switch (len) { + case 8: + ins_opc = INSN_INSBL; + msk_opc = INSN_MSKBL; + break; + case 16: + ins_opc = INSN_INSWL; + msk_opc = INSN_MSKWL; + break; + case 32: + ins_opc = INSN_INSLL; + msk_opc = INSN_MSKLL; + default: + tcg_abort(); + } + + /* Convert the bit offset to a byte offset. */ + ofs >>= 3; + + if (arg1 == TCG_REG_ZERO) { + tcg_out_fmt_opi(s, ins_opc, arg2, ofs, dest); + if (!is_64 && len + ofs * 8 == 32) { + tgen_ext32s(s, dest, dest); + } + } else if (arg2 == TCG_REG_ZERO) { + tcg_out_fmt_opi(s, msk_opc, arg1, ofs, dest); + } else { + tcg_out_fmt_opi(s, ins_opc, arg2, ofs, TMP_REG1); + tcg_out_fmt_opi(s, msk_opc, arg1, ofs, dest); + tcg_out_fmt_opr(s, is_64 ? INSN_BIS : INSN_ADDL, dest, TMP_REG1, dest); + } +} + +/* The low bit of these entries indicates that the result of + the comparison must be inverted. This bit should not be + output with the rest of the instruction. */ +static const int cmp_opc[] = { + [TCG_COND_EQ] = INSN_CMPEQ, + [TCG_COND_NE] = INSN_CMPEQ | 1, + [TCG_COND_LT] = INSN_CMPLT, + [TCG_COND_GE] = INSN_CMPLT | 1, + [TCG_COND_LE] = INSN_CMPLE, + [TCG_COND_GT] = INSN_CMPLE | 1, + [TCG_COND_LTU] = INSN_CMPULT, + [TCG_COND_GEU] = INSN_CMPULT | 1, + [TCG_COND_LEU] = INSN_CMPULE, + [TCG_COND_GTU] = INSN_CMPULE | 1 +}; + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg dest, + TCGReg c1, TCGArg c2, int c2const) +{ + AlphaOpcode opc = cmp_opc[cond] & ~1; + + if (c2const) { + tcg_out_fmt_opi(s, opc, c1, c2, dest); + } else { + tcg_out_fmt_opr(s, opc, c1, c2, dest); + } + + if (cmp_opc[cond] & 1) { + tcg_out_fmt_opi(s, INSN_XOR, dest, 1, dest); + } +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg dest, + TCGReg c1, TCGArg c2, int c2const, + TCGArg v1, int v1const) +{ + /* Note that unsigned comparisons are not present here, which means + that their entries will contain zeros. */ + static const AlphaOpcode cmov_opc[] = { + [TCG_COND_EQ] = INSN_CMOVEQ, + [TCG_COND_NE] = INSN_CMOVNE, + [TCG_COND_LT] = INSN_CMOVLT, + [TCG_COND_GE] = INSN_CMOVGE, + [TCG_COND_LE] = INSN_CMOVLE, + [TCG_COND_GT] = INSN_CMOVGT + }; + + AlphaOpcode opc = 0; + + /* Notice signed comparisons vs zero. These are handled by the + cmov instructions directly. */ + if (c2 == 0) { + opc = cmov_opc[cond]; + } + + /* Otherwise, generate a comparison into a temporary. */ + if (opc == 0) { + opc = cmp_opc[cond] & ~1; + if (c2const) { + tcg_out_fmt_opi(s, opc, c1, c2, TMP_REG1); + } else { + tcg_out_fmt_opr(s, opc, c1, c2, TMP_REG1); + } + + opc = (cmp_opc[cond] & 1 ? INSN_CMOVEQ : INSN_CMOVNE); + c1 = TMP_REG1; + } + + if (v1const) { + tcg_out_fmt_opi(s, opc, c1, v1, dest); + } else { + tcg_out_fmt_opr(s, opc, c1, v1, dest); + } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGArg arg2, int const_arg2, int label_index) +{ + /* Note that unsigned comparisons are not present here, which means + that their entries will contain zeros. */ + static const AlphaOpcode br_opc[] = { + [TCG_COND_EQ] = INSN_BEQ, + [TCG_COND_NE] = INSN_BNE, + [TCG_COND_LT] = INSN_BLT, + [TCG_COND_GE] = INSN_BGE, + [TCG_COND_LE] = INSN_BLE, + [TCG_COND_GT] = INSN_BGT + }; + + AlphaOpcode opc = 0; + + /* Notice signed comparisons vs zero. These are handled by the + branch instructions directly. */ + if (arg2 == 0) { + opc = br_opc[cond]; + } + + /* Otherwise, generate a comparison into a temporary. */ + if (opc == 0) { + opc = cmp_opc[cond] & ~1; + if (const_arg2) { + tcg_out_fmt_opi(s, opc, arg1, arg2, TMP_REG1); + } else { + tcg_out_fmt_opr(s, opc, arg1, arg2, TMP_REG1); + } + + opc = (cmp_opc[cond] & 1 ? INSN_BEQ : INSN_BNE); + arg1 = TMP_REG1; + } + + tcg_out_br(s, opc, arg1, label_index); +} + +/* Note that these functions don't have normal C calling conventions. */ +typedef long divfn(long, long); +extern divfn __divl, __divlu, __reml, __remlu; +extern divfn __divq, __divqu, __remq, __remqu; + +static void tcg_out_div(TCGContext *s, int sizeop) +{ + static divfn * const libc_div[16] = { + [2] = __divlu, + [2 | 8] = __remlu, + [2 | 4] = __divl, + [2 | 4 | 8] = __reml, + + [3] = __divqu, + [3 | 8] = __remqu, + [3 | 4] = __divq, + [3 | 4 | 8] = __remq, + }; + + tcg_target_long val, disp; + + val = (tcg_target_long) libc_div[sizeop]; + assert(val != 0); + + disp = val - ((tcg_target_long)s->code_ptr + 4); + if (disp >= -0x400000 && disp < 0x400000) { + tcg_out_fmt_br(s, INSN_BSR, TCG_REG_T9, disp >> 2); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TMP_REG1, val); + tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_T9, TMP_REG1, val); + } +} + +#if defined(CONFIG_SOFTMMU) + +#include "../../softmmu_defs.h" + +static void *qemu_ld_helpers[4] = { + helper_ldb_mmu, + helper_ldw_mmu, + helper_ldl_mmu, + helper_ldq_mmu, +}; + +static void *qemu_st_helpers[4] = { + helper_stb_mmu, + helper_stw_mmu, + helper_stl_mmu, + helper_stq_mmu, +}; + +static void tgen_andi_tmp(TCGContext *s, TCGReg ra, uint64_t val, TCGReg rc) +{ + if (!tcg_match_andi(val)) { + tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val); + tcg_out_fmt_opr(s, INSN_AND, ra, TMP_REG1, rc); + } else { + tgen_andi(s, ra, val, rc); + } +} + +static void tcg_out_tlb_cmp(TCGContext *s, int sizeop, TCGReg r0, TCGReg r1, + TCGReg addr_reg, int label1, long tlb_offset) +{ + int addrsizeop = TARGET_LONG_BITS == 32 ? 2 : 3; + unsigned long val; + + /* Mask the page, plus the low bits of the access, into TMP3. Note + that the low bits are added in order to catch unaligned accesses, + as those bits won't be set in the TLB entry. For 32-bit targets, + force the high bits of the mask to be zero, as the high bits of + the input register are garbage. */ + val = TARGET_PAGE_MASK | ((1 << (sizeop & 3)) - 1); + if (TARGET_LONG_BITS == 32) { + val &= 0xfffffffful; + } + tgen_andi_tmp(s, addr_reg, val, TMP_REG1); + + /* Compute the index into the TLB into R1. Again, note that the + high bits of a 32-bit address must be cleared. */ + tcg_out_fmt_opi(s, INSN_SRL, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, r1); + + val = (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS; + if (TARGET_LONG_BITS == 32) { + val &= 0xfffffffful >> (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + } + tgen_andi_tmp(s, r1, val, r1); + + /* Load the word at (R1 + CPU_ENV + TLB_OFFSET). Note that we + arrange for a 32-bit load to be zero-extended. */ + tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_AREG0, r1); + tcg_out_ld_sz(s, addrsizeop, TMP_REG2, r1, tlb_offset); + + /* Copy the original address into R0. This is needed on the + slow path through the helper function. */ + tgen_extend(s, addrsizeop, addr_reg, r0); + + /* Compare TMP1 with the value loaded from the TLB. */ + tcg_out_brcond(s, TCG_COND_NE, TMP_REG2, TMP_REG1, 0, label1); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) +{ + TCGReg addr_reg, data_reg, r0; + long ofs; + int bswap; +#if defined(CONFIG_SOFTMMU) + TCGReg r1; + int label1, label2, mem_index; +#endif + + data_reg = *args++; + addr_reg = *args++; + +#if defined(CONFIG_SOFTMMU) + mem_index = *args; + r0 = TCG_REG_A1; + r1 = TCG_REG_A0; + + label1 = gen_new_label(); + label2 = gen_new_label(); + + tcg_out_tlb_cmp(s, sizeop, r0, r1, addr_reg, label1, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); + + /* TLB Hit. Note that Alpha statically predicts forward branch as + not taken, so arrange the fallthru as the common case. + + R0 contains the guest address, and R1 contains the pointer + to CPU_ENV plus the TLB entry offset. */ + + tcg_out_ld(s, TCG_TYPE_I64, r1, r1, + offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + tcg_out_fmt_opr(s, INSN_ADDQ, r0, r1, r0); + ofs = 0; +#else + if (TARGET_LONG_BITS == 32) { + r0 = TCG_REG_A1; + tgen_ext32u(s, addr_reg, r0); + } else { + r0 = addr_reg; + } + if (USE_GUEST_BASE_REG) { + tcg_out_fmt_opr(s, INSN_ADDQ, r0, TCG_GUEST_BASE_REG, TCG_REG_A1); + r0 = TCG_REG_A1; + ofs = 0; + } else { + ofs = GUEST_BASE; + } +#endif + +#if defined(TARGET_WORDS_BIGENDIAN) + /* Signal byte swap necessary. */ + bswap = 8; +#else + bswap = 0; +#endif + + /* Perform the actual load. */ + tcg_out_ld_sz(s, sizeop | bswap, data_reg, r0, ofs); + +#if defined(CONFIG_SOFTMMU) + tcg_out_br(s, INSN_BR, TCG_REG_ZERO, label2); + + /* TLB miss. Call the helper function. */ + tcg_out_label(s, label1, s->code_ptr); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_A2, mem_index); + + tcg_out_const_call(s, (tcg_target_long)qemu_ld_helpers[sizeop & 3]); + + /* The helper routines have no defined data extension. + Properly extend the result to whatever data type we need. */ + tgen_extend(s, sizeop, TCG_REG_V0, data_reg); + + tcg_out_label(s, label2, s->code_ptr); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) +{ + TCGReg addr_reg, data_reg, r0; + long ofs; +#if defined(CONFIG_SOFTMMU) + TCGReg r1; + int label1, label2, mem_index; +#endif + + data_reg = *args++; + addr_reg = *args++; + +#if defined(CONFIG_SOFTMMU) + mem_index = *args; + r0 = TCG_REG_A1; + r1 = TCG_REG_A0; + + label1 = gen_new_label(); + label2 = gen_new_label(); + + tcg_out_tlb_cmp(s, sizeop, r0, r1, addr_reg, label1, + offsetof(CPUArchState, + tlb_table[mem_index][0].addr_write)); + + /* TLB Hit. Note that Alpha statically predicts forward branch as + not taken, so arrange the fallthru as the common case. + + R0 contains the guest address, and R1 contains the pointer + to CPU_ENV plus the TLB entry offset. */ + + tcg_out_ld(s, TCG_TYPE_I64, r1, r1, + offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + tcg_out_fmt_opr(s, INSN_ADDQ, r0, r1, r0); + ofs = 0; +#else + if (TARGET_LONG_BITS == 32) { + r0 = TCG_REG_A1; + tgen_ext32u(s, addr_reg, r0); + } else { + r0 = addr_reg; + } + if (USE_GUEST_BASE_REG) { + tcg_out_fmt_opr(s, INSN_ADDQ, r0, TCG_GUEST_BASE_REG, TCG_REG_A1); + r0 = TCG_REG_A1; + ofs = 0; + } else { + ofs = GUEST_BASE; + } +#endif + +#if defined(TARGET_WORDS_BIGENDIAN) + /* Byte swap if necessary. */ + if ((sizeop & 3) > 0) { + tgen_bswap(s, sizeop & 3, data_reg, TCG_REG_A0); + data_reg = TCG_REG_A0; + } +#endif + + /* Perform the actual store. */ + tcg_out_st_sz(s, sizeop, data_reg, r0, ofs); + +#if defined(CONFIG_SOFTMMU) + tcg_out_br(s, INSN_BR, TCG_REG_ZERO, label2); + + /* TLB miss. Call the helper function. */ + tcg_out_label(s, label1, s->code_ptr); + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_A3, mem_index); + + tcg_out_const_call(s, (tcg_target_long)qemu_st_helpers[sizeop & 3]); + + tcg_out_label(s, label2, s->code_ptr); +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + TCGArg arg0, arg1, arg2; + AlphaOpcode insn; + int c; + + arg0 = args[0]; + arg1 = args[1]; + arg2 = args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out_ld(s, TCG_TYPE_PTR, TMP_REG1, TCG_REG_SP, TB_RET_OFS); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_V0, arg0); + tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_ZERO, TMP_REG1, 0); + break; + + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* Direct jump method. In the general case we output: + br $at,.+4 + ldah $at,hi($at) + lda $at,lo($at) + jmp $31,($at),0 + We need to modify two instructions to set the link. + We want that modification to be atomic, so we arrange + for the ldah+lda pair to be 8-byte aligned. Which + means that the first branch should be 4 mod 8. */ + if (((uintptr_t)s->code_ptr & 7) == 0) { + tcg_out32(s, INSN_NOP); + } + tcg_out_fmt_br(s, INSN_BR, TMP_REG1, 0); + s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + s->code_ptr += 8; + } else { + /* Indirect jump method. */ + tcg_out_ld(s, TCG_TYPE_PTR, TMP_REG1, TCG_REG_ZERO, + (tcg_target_long)(s->tb_next + arg0)); + } + tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_ZERO, TMP_REG1, 0); + s->tb_next_offset[arg0] = s->code_ptr - s->code_buf; + break; + + case INDEX_op_call: + if (const_args[0]) { + tcg_out_const_call(s, arg0); + } else { + tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_RA, TCG_REG_PV, 0); + } + break; + + case INDEX_op_jmp: + tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_ZERO, arg0, 0); + break; + + case INDEX_op_br: + tcg_out_br(s, INSN_BR, TCG_REG_ZERO, arg0); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + c = 0; + goto do_load; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + c = 0 | 4; + goto do_load; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + c = 1; + goto do_load; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + c = 1 | 4; + goto do_load; + case INDEX_op_ld32u_i64: + c = 2; + goto do_load; + case INDEX_op_ld_i32: + case INDEX_op_ld32s_i64: + c = 2 | 4; + goto do_load; + case INDEX_op_ld_i64: + c = 3; + do_load: + tcg_out_ld_sz(s, c, arg0, arg1, arg2); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + c = 0; + goto do_store; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + c = 1; + goto do_store; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + c = 2; + goto do_store; + case INDEX_op_st_i64: + c = 3; + do_store: + tcg_out_st_sz(s, c, arg0, arg1, arg2); + break; + + case INDEX_op_sub_i32: + if (const_args[2]) { + arg2 = -arg2; + } else { + insn = INSN_SUBL; + goto do_arith; + } + /* FALLTHRU */ + + case INDEX_op_add_i32: + if (const_args[2]) { + if ((int32_t)arg2 >= 0) { + tcg_out_fmt_opi(s, INSN_ADDL, arg1, (int32_t)arg2, arg0); + } else { + tcg_out_fmt_opi(s, INSN_SUBL, arg1, -(int32_t)arg2, arg0); + } + } else { + insn = INSN_ADDL; + goto do_arith; + } + break; + + case INDEX_op_sub_i64: + if (const_args[2]) { + arg2 = -arg2; + } else { + insn = INSN_SUBQ; + goto do_arith; + } + /* FALLTHRU */ + + case INDEX_op_add_i64: + if (const_args[2]) { + tcg_out_mem_long(s, INSN_LDA, arg0, arg1, arg2); + } else { + insn = INSN_ADDQ; + goto do_arith; + } + break; + + case INDEX_op_mul_i32: + insn = INSN_MULL; + goto do_arith; + + case INDEX_op_mul_i64: + insn = INSN_MULQ; + goto do_arith; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + if (const_args[2]) { + if (opc == INDEX_op_and_i32) { + arg2 = (int32_t)arg2; + } + tgen_andi(s, arg1, arg2, arg0); + break; + } + insn = INSN_AND; + goto do_arith; + + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + if (const_args[2]) { + if (opc == INDEX_op_andc_i32) { + arg2 = (int32_t)arg2; + } + tgen_andi(s, arg1, ~arg2, arg0); + break; + } + insn = INSN_BIC; + goto do_arith; + + case INDEX_op_or_i32: + case INDEX_op_or_i64: + insn = INSN_BIS; + goto do_arith; + + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + insn = INSN_ORNOT; + goto do_arith; + + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + insn = INSN_XOR; + goto do_arith; + + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: + insn = INSN_EQV; + goto do_arith; + + case INDEX_op_shl_i32: + /* Make sure to preserve the sign-extension in the result. + Thus the special casing of shifts by 1, 2 and 3. */ + if (const_args[2]) { + arg2 &= 31; + switch (arg2) { + case 0: + tcg_out_mov(s, TCG_TYPE_I32, arg0, arg1); + break; + case 1: + tcg_out_fmt_opr(s, INSN_ADDL, arg1, arg1, arg0); + break; + case 2: + tcg_out_fmt_opr(s, INSN_S4ADDL, arg1, TCG_REG_ZERO, arg0); + break; + case 3: + tcg_out_fmt_opr(s, INSN_S8ADDL, arg1, TCG_REG_ZERO, arg0); + break; + default: + tcg_out_fmt_opi(s, INSN_SLL, arg1, arg2, arg0); + tgen_ext32s(s, arg0, arg0); + break; + } + } else { + /* ??? TCG has no requirement to truncate the shift yet. */ + tcg_out_fmt_opr(s, INSN_SLL, arg1, arg2, arg0); + tgen_ext32s(s, arg0, arg0); + } + break; + + case INDEX_op_shl_i64: + insn = INSN_SLL; + goto do_arith; + + case INDEX_op_shr_i32: + /* Recall that the input is sign-extended, which means that we + need to mask the high bits that we'll be shifting in. There + are three common cases that can perform the shift+mask in + one instruction. Otherwise, we'll need a separate mask. */ + if (const_args[2]) { + arg2 &= 31; + switch (arg2) { + case 0: + tcg_out_mov(s, TCG_TYPE_I32, arg0, arg1); + break; + case 8: + tcg_out_fmt_opi(s, INSN_INSLH, arg1, 7, arg0); + break; + case 16: + tcg_out_fmt_opi(s, INSN_EXTWL, arg1, 2, arg0); + break; + case 24: + tcg_out_fmt_opi(s, INSN_EXTBL, arg1, 3, arg0); + break; + case 25 ... 31: + tcg_out_fmt_opi(s, INSN_SRL, arg1, arg2, arg0); + tcg_out_fmt_opi(s, INSN_AND, arg0, + (1 << (32 - arg2)) - 1, arg0); + break; + default: + tgen_ext32u(s, arg1, arg0); + tcg_out_fmt_opi(s, INSN_SRL, arg0, arg2, arg0); + break; + } + } else { + /* Here we need to be careful about a shift of zero, + for which we'd need to re-sign-extend the output. */ + tgen_ext32u(s, arg1, TMP_REG1); + tcg_out_fmt_opr(s, INSN_SRL, TMP_REG1, arg2, arg0); + tgen_ext32s(s, arg0, arg0); + } + break; + + case INDEX_op_shr_i64: + insn = INSN_SRL; + goto do_arith; + + case INDEX_op_sar_i32: + /* Note that since the input is already sign-extended, + we need not do anything special here. */ + case INDEX_op_sar_i64: + insn = INSN_SRA; + goto do_arith; + + do_arith: + if (const_args[2]) { + tcg_out_fmt_opi(s, insn, arg1, arg2, arg0); + } else { + tcg_out_fmt_opr(s, insn, arg1, arg2, arg0); + } + break; + + case INDEX_op_not_i32: + case INDEX_op_not_i64: + if (const_args[1]) { + tcg_out_fmt_opi(s, INSN_ORNOT, TCG_REG_ZERO, arg1, arg0); + } else { + tcg_out_fmt_opr(s, INSN_ORNOT, TCG_REG_ZERO, arg1, arg0); + } + break; + + case INDEX_op_deposit_i32: + tcg_out_deposit(s, arg0, arg1, arg2, args[3], args[4], 0); + break; + case INDEX_op_deposit_i64: + tcg_out_deposit(s, arg0, arg1, arg2, args[3], args[4], 1); + break; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + tcg_out_brcond(s, arg2, arg0, arg1, const_args[1], args[3]); + break; + + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + tcg_out_setcond(s, args[3], arg0, arg1, arg2, const_args[2]); + break; + + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], arg0, arg1, arg2, const_args[2], + args[3], const_args[3]); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + c = 0 | 4; + goto do_sign_extend; + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + c = 1 | 4; + goto do_sign_extend; + case INDEX_op_ext32s_i64: + c = 2 | 4; + do_sign_extend: + tgen_extend(s, c, arg1, arg0); + break; + + case INDEX_op_div_i32: + c = 2 | 4; + goto do_div; + case INDEX_op_rem_i32: + c = 2 | 4 | 8; + goto do_div; + case INDEX_op_divu_i32: + c = 2; + goto do_div; + case INDEX_op_remu_i32: + c = 2 | 8; + goto do_div; + case INDEX_op_div_i64: + c = 3 | 4; + goto do_div; + case INDEX_op_rem_i64: + c = 3 | 4 | 8; + goto do_div; + case INDEX_op_divu_i64: + c = 3; + goto do_div; + case INDEX_op_remu_i64: + c = 3 | 8; + do_div: + tcg_out_div(s, c); + break; + + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + c = 1; + goto do_bswap; + case INDEX_op_bswap32_i32: + c = 2 | 4; + goto do_bswap; + case INDEX_op_bswap32_i64: + c = 2; + goto do_bswap; + case INDEX_op_bswap64_i64: + c = 3; + do_bswap: + tgen_bswap(s, c, arg1, arg0); + break; + + case INDEX_op_qemu_ld8u: + c = 0; + goto do_qemu_load; + case INDEX_op_qemu_ld8s: + c = 0 | 4; + goto do_qemu_load; + case INDEX_op_qemu_ld16u: + c = 1; + goto do_qemu_load; + case INDEX_op_qemu_ld16s: + c = 1 | 4; + goto do_qemu_load; + case INDEX_op_qemu_ld32: + case INDEX_op_qemu_ld32s: + c = 2 | 4; + goto do_qemu_load; + case INDEX_op_qemu_ld32u: + c = 2; + goto do_qemu_load; + case INDEX_op_qemu_ld64: + c = 3; + do_qemu_load: + tcg_out_qemu_ld(s, args, c); + break; + + case INDEX_op_qemu_st8: + c = 0; + goto do_qemu_store; + case INDEX_op_qemu_st16: + c = 1; + goto do_qemu_store; + case INDEX_op_qemu_st32: + c = 2; + goto do_qemu_store; + case INDEX_op_qemu_st64: + c = 3; + do_qemu_store: + tcg_out_qemu_st(s, args, c); + break; + + case INDEX_op_mov_i32: + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: + case INDEX_op_movi_i64: + /* These four are handled by tcg.c directly. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef alpha_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_call, { "ci" } }, + { INDEX_op_jmp, { "r" } }, + { INDEX_op_br, { } }, + + { INDEX_op_mov_i32, { "r", "r" } }, + { INDEX_op_movi_i32, { "r" } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rJ", "r" } }, + { INDEX_op_st16_i32, { "rJ", "r" } }, + { INDEX_op_st_i32, { "rJ", "r" } }, + + { INDEX_op_add_i32, { "r", "rJ", "rK" } }, + { INDEX_op_mul_i32, { "r", "rJ", "rI" } }, + { INDEX_op_sub_i32, { "r", "rJ", "rK" } }, + { INDEX_op_and_i32, { "r", "rJ", "rM" } }, + { INDEX_op_or_i32, { "r", "rJ", "rI" } }, + { INDEX_op_xor_i32, { "r", "rJ", "rI" } }, + { INDEX_op_andc_i32, { "r", "rJ", "rM" } }, + { INDEX_op_orc_i32, { "r", "rJ", "rI" } }, + { INDEX_op_eqv_i32, { "r", "rJ", "rI" } }, + { INDEX_op_not_i32, { "r", "rI" } }, + + { INDEX_op_shl_i32, { "r", "rJ", "rI" } }, + { INDEX_op_shr_i32, { "r", "rJ", "rI" } }, + { INDEX_op_sar_i32, { "r", "rJ", "rI" } }, + + { INDEX_op_deposit_i32, { "r", "rJ", "rJ" } }, + + { INDEX_op_div_i32, { "c", "a", "b" } }, + { INDEX_op_rem_i32, { "c", "a", "b" } }, + { INDEX_op_divu_i32, { "c", "a", "b" } }, + { INDEX_op_remu_i32, { "c", "a", "b" } }, + + { INDEX_op_brcond_i32, { "rJ", "rI" } }, + { INDEX_op_setcond_i32, { "r", "rJ", "rI" } }, + { INDEX_op_movcond_i32, { "r", "rJ", "rI", "rI", "0" } }, + + { INDEX_op_mov_i64, { "r", "r" } }, + { INDEX_op_movi_i64, { "r" } }, + + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "rJ", "r" } }, + { INDEX_op_st16_i64, { "rJ", "r" } }, + { INDEX_op_st32_i64, { "rJ", "r" } }, + { INDEX_op_st_i64, { "rJ", "r" } }, + + { INDEX_op_add_i64, { "r", "rJ", "ri" } }, + { INDEX_op_mul_i64, { "r", "rJ", "rI" } }, + { INDEX_op_sub_i64, { "r", "rJ", "ri" } }, + { INDEX_op_and_i64, { "r", "rJ", "rM" } }, + { INDEX_op_or_i64, { "r", "rJ", "rI" } }, + { INDEX_op_xor_i64, { "r", "rJ", "rI" } }, + { INDEX_op_andc_i64, { "r", "rJ", "rM" } }, + { INDEX_op_orc_i64, { "r", "rJ", "rI" } }, + { INDEX_op_eqv_i64, { "r", "rJ", "rI" } }, + { INDEX_op_not_i64, { "r", "rI" } }, + + { INDEX_op_shl_i64, { "r", "rJ", "rI" } }, + { INDEX_op_shr_i64, { "r", "rJ", "rI" } }, + { INDEX_op_sar_i64, { "r", "rJ", "rI" } }, + + { INDEX_op_deposit_i64, { "r", "rJ", "rJ" } }, + + { INDEX_op_div_i64, { "c", "a", "b" } }, + { INDEX_op_rem_i64, { "c", "a", "b" } }, + { INDEX_op_divu_i64, { "c", "a", "b" } }, + { INDEX_op_remu_i64, { "c", "a", "b" } }, + + { INDEX_op_brcond_i64, { "rJ", "rI" } }, + { INDEX_op_setcond_i64, { "r", "rJ", "rI" } }, + { INDEX_op_movcond_i64, { "r", "rJ", "rI", "rI", "0" } }, + + { INDEX_op_ext8s_i32, { "r", "rJ" } }, + { INDEX_op_ext16s_i32, { "r", "rJ" } }, + { INDEX_op_ext8s_i64, { "r", "rJ" } }, + { INDEX_op_ext16s_i64, { "r", "rJ" } }, + { INDEX_op_ext32s_i64, { "r", "rJ" } }, + + { INDEX_op_bswap16_i32, { "r", "rJ" } }, + { INDEX_op_bswap32_i32, { "r", "rJ" } }, + { INDEX_op_bswap16_i64, { "r", "rJ" } }, + { INDEX_op_bswap32_i64, { "r", "rJ" } }, + { INDEX_op_bswap64_i64, { "r", "rJ" } }, + + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L" } }, + { -1 }, +}; + + +/* + * Generate global QEMU prologue and epilogue code + */ +void tcg_target_qemu_prologue(TCGContext *s) +{ + static const TCGReg save_regs[] = { + TCG_REG_RA, + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + /* TCG_REG_S5 -- currently used for the global env. */ + TCG_REG_S6, + }; + + long i, frame_size, save_ofs; + uint8_t *ret_loc, *ent_loc; + + /* The shape of the stack frame is: + input sp + [ Register save area ] + [ TB return address ] + [ CPU_TEMP_BUF_NLONGS ] + [ TCG_STATIC_CALL_ARGS_SIZE ] + sp + */ + + save_ofs = TB_RET_OFS + 8; + frame_size = save_ofs + ARRAY_SIZE(save_regs) * 8; + frame_size += TCG_TARGET_STACK_ALIGN - 1; + frame_size &= -TCG_TARGET_STACK_ALIGN; + + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* TB Prologue. */ + ent_loc = s->code_ptr; + + /* Allocate the stack frame. */ + tcg_out_fmt_mem(s, INSN_LDA, TCG_REG_SP, TCG_REG_SP, -frame_size); + + /* Save all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(save_regs); i++) { + tcg_out_fmt_mem(s, INSN_STQ, save_regs[i], TCG_REG_SP, save_ofs + i*8); + } + + /* Store the return address of the TB. */ + ret_loc = s->code_ptr; + tcg_out_fmt_mem(s, INSN_LDA, TMP_REG1, TCG_REG_PV, 0); + tcg_out_fmt_mem(s, INSN_STQ, TMP_REG1, TCG_REG_SP, TB_RET_OFS); + + /* Copy the ENV pointer into place. */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_A0); + + /* Setup TCG_GUEST_BASE_REG if desired. */ + if (USE_GUEST_BASE_REG) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } + + /* Invoke the TB. */ + tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_ZERO, TCG_REG_A1, 0); + + /* Fill in the offset for the TB return address, as described above. */ + i = s->code_ptr - ent_loc; + assert(i == (int16_t)i); + *(int16_t *)ret_loc = i; + + /* TB epilogue. */ + + /* Restore all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(save_regs); i++) { + tcg_out_fmt_mem(s, INSN_LDQ, save_regs[i], TCG_REG_SP, save_ofs + i*8); + } + + /* Deallocate the stack frame. */ + tcg_out_fmt_mem(s, INSN_LDA, TCG_REG_SP, TCG_REG_SP, frame_size); + + tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_ZERO, TCG_REG_RA, 0); +} + + +void tcg_target_init(TCGContext *s) +{ +#if !defined(CONFIG_USER_ONLY) + /* fail safe */ + assert((1 << CPU_TLB_ENTRY_BITS) == sizeof(CPUTLBEntry)); +#endif + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + + tcg_regset_set32(tcg_target_call_clobber_regs, 0, 0xffffffff); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_GP); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_SP); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); + tcg_regset_set_reg(s->reserved_regs, TMP_REG1); + tcg_regset_set_reg(s->reserved_regs, TMP_REG2); + + tcg_add_target_add_op_defs(alpha_op_defs); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + long disp, hi, lo, insn1, insn2; + + /* Try a direct branch first. */ + disp = addr - (jmp_addr + 4); + if (disp >= -0x400000 && disp < 0x400000) { + insn1 = INSN_BR | INSN_RA(TCG_REG_ZERO) | INSN_DISP21(disp >> 2); + /* The second insn is dead code, but don't leave the memory totally + uninitialized. If the garbage is an illegal insn the prefetch + unit can flush the pipeline in order to prove the illegal insn + isn't executed. */ + insn2 = INSN_NOP; + } else { + /* Failing that, do an ldah+lda pair to make the distance. + Given that the code buffer is limited to 2G, this should + always reach. */ + disp = addr - jmp_addr; + lo = (int16_t)disp; + hi = (int16_t)((disp - lo) >> 16); + assert((hi << 16) + lo != disp); + insn1 = INSN_LDAH | INSN_RA(TMP_REG1) + | INSN_RB(TMP_REG1) | INSN_DISP16(hi); + insn2 = INSN_LDA | INSN_RA(TMP_REG1) + | INSN_RB(TMP_REG1) | INSN_DISP16(lo); + } + *(uint64_t *)jmp_addr = insn1 + (insn2 << 32); + + flush_icache_range(jmp_addr, jmp_addr + 8); +} diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h new file mode 100644 index 0000000..3611687 --- /dev/null +++ b/tcg/alpha/tcg-target.h @@ -0,0 +1,142 @@ +/* + * Tiny Code Generator for QEMU + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define TCG_TARGET_ALPHA 1 + +#define TCG_TARGET_NB_REGS 32 + +/* Having the zero register ($31) == 0 within TCG simplifies a few things. + Thus we have a mapping between TCG regno and hardware regno. */ +#define HW_TO_TCG_REGNO(x) ((x) ^ 0x1f) +#define TCG_TO_HW_REGNO(x) ((x) ^ 0x1f) + +typedef enum TCGReg { + TCG_REG_V0 = HW_TO_TCG_REGNO(0), + + TCG_REG_T0 = HW_TO_TCG_REGNO(1), + TCG_REG_T1 = HW_TO_TCG_REGNO(2), + TCG_REG_T2 = HW_TO_TCG_REGNO(3), + TCG_REG_T3 = HW_TO_TCG_REGNO(4), + TCG_REG_T4 = HW_TO_TCG_REGNO(5), + TCG_REG_T5 = HW_TO_TCG_REGNO(6), + TCG_REG_T6 = HW_TO_TCG_REGNO(7), + TCG_REG_T7 = HW_TO_TCG_REGNO(8), + + TCG_REG_S0 = HW_TO_TCG_REGNO(9), + TCG_REG_S1 = HW_TO_TCG_REGNO(10), + TCG_REG_S2 = HW_TO_TCG_REGNO(11), + TCG_REG_S3 = HW_TO_TCG_REGNO(12), + TCG_REG_S4 = HW_TO_TCG_REGNO(13), + TCG_REG_S5 = HW_TO_TCG_REGNO(14), + TCG_REG_S6 = HW_TO_TCG_REGNO(15), + + TCG_REG_A0 = HW_TO_TCG_REGNO(16), + TCG_REG_A1 = HW_TO_TCG_REGNO(17), + TCG_REG_A2 = HW_TO_TCG_REGNO(18), + TCG_REG_A3 = HW_TO_TCG_REGNO(19), + TCG_REG_A4 = HW_TO_TCG_REGNO(20), + TCG_REG_A5 = HW_TO_TCG_REGNO(21), + + TCG_REG_T8 = HW_TO_TCG_REGNO(22), + TCG_REG_T9 = HW_TO_TCG_REGNO(23), + TCG_REG_T10 = HW_TO_TCG_REGNO(24), + TCG_REG_T11 = HW_TO_TCG_REGNO(25), + + TCG_REG_RA = HW_TO_TCG_REGNO(26), + TCG_REG_PV = HW_TO_TCG_REGNO(27), + TCG_REG_AT = HW_TO_TCG_REGNO(28), + TCG_REG_GP = HW_TO_TCG_REGNO(29), + TCG_REG_SP = HW_TO_TCG_REGNO(30), + + TCG_REG_ZERO = HW_TO_TCG_REGNO(31) +} TCGReg; + +/* Used for function call generation. */ +#define TCG_REG_CALL_STACK TCG_REG_SP +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_STACK_OFFSET 0 + +/* We have signed extension instructions. */ +#define TCG_TARGET_HAS_ext8s_i32 1 +#define TCG_TARGET_HAS_ext16s_i32 1 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 + +/* We have single-output division routines. */ +#define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_div_i64 1 + +/* We have conditional move. */ +#define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_movcond_i64 1 + +/* We have optimized bswap routines. */ +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_bswap16_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 + +/* We have NOT via ORNOT. */ +#define TCG_TARGET_HAS_not_i32 1 +#define TCG_TARGET_HAS_not_i64 1 + +/* We have some compound logical instructions. */ +#define TCG_TARGET_HAS_andc_i32 1 +#define TCG_TARGET_HAS_andc_i64 1 +#define TCG_TARGET_HAS_orc_i32 1 +#define TCG_TARGET_HAS_orc_i64 1 +#define TCG_TARGET_HAS_eqv_i32 1 +#define TCG_TARGET_HAS_eqv_i64 1 +#define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_nor_i64 0 + +/* We can do better for specific cases of deposit. */ +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_deposit_i64 1 + +#define TCG_TARGET_deposit_i32_valid(ofs, len) \ + (((ofs) & 7) == 0 && ((len) == 8 || (len) == 16 || (len) == 32)) + +/* The default implementations of these are fine. */ +#define TCG_TARGET_HAS_neg_i32 0 +#define TCG_TARGET_HAS_neg_i64 0 +#define TCG_TARGET_HAS_ext8u_i32 0 +#define TCG_TARGET_HAS_ext16u_i32 0 +#define TCG_TARGET_HAS_ext8u_i64 0 +#define TCG_TARGET_HAS_ext16u_i64 0 +#define TCG_TARGET_HAS_ext32u_i64 0 +#define TCG_TARGET_HAS_rot_i32 0 +#define TCG_TARGET_HAS_rot_i64 0 + +#define TCG_TARGET_HAS_GUEST_BASE + +#define TCG_AREG0 TCG_REG_S6 + +static inline void flush_icache_range(unsigned long start, unsigned long stop) +{ + __asm__ __volatile__ ("call_pal 0x86"); +} -- 1.7.11.4