qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] Porting TCG to alpha platform
@ 2010-01-19  8:47 identifier scorpio
  2010-01-19 20:18 ` Richard Henderson
  2010-01-19 21:42 ` Stefan Weil
  0 siblings, 2 replies; 21+ messages in thread
From: identifier scorpio @ 2010-01-19  8:47 UTC (permalink / raw)
  To: qemu-devel

Hello.

I ported TCG to alpha platform, the patch is currently based on stable-0.10 branch, and now it can run linux-0.2.img testing image on my alpha XP1000 workstation. but it still can't run MS-windows, and I hope someone, especially those guys that are working on target-alpha, may help me to find the bugs.

From 0ee33ea1e43298e6045e16dfcf07cb7a530dfd56 Mon Sep 17 00:00:00 2001
From: Dong Weiyu <cidentifier@yahoo.com.cn>
Date: Tue, 19 Jan 2010 16:22:54 +0800
Subject: [PATCH] porting TCG to alpha platform.

---
 cpu-all.h              |    2 +-
 tcg/alpha/tcg-target.c | 1335 ++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/alpha/tcg-target.h |   70 +++
 3 files changed, 1406 insertions(+), 1 deletions(-)
 create mode 100644 tcg/alpha/tcg-target.c
 create mode 100644 tcg/alpha/tcg-target.h

diff --git a/cpu-all.h b/cpu-all.h
index e0c3efd..bdf6fb2 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,7 +22,7 @@
 
 #include "qemu-common.h"
 
-#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__)
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__alpha__)
 #define WORDS_ALIGNED
 #endif
 
diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c
new file mode 100644
index 0000000..6bbd69f
--- /dev/null
+++ b/tcg/alpha/tcg-target.c
@@ -0,0 +1,1335 @@
+/*

+ * Tiny Code Generator for QEMU on ALPHA platform
+*/

+

+#ifndef NDEBUG

+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {

+    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",

+    "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",

+    "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",

+    "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",

+};

+#endif

+

+/* 

+ * $26 ~ $31 are special, reserved, 

+ * and $25 is deliberately reserved for jcc operation

+ * and $0 is usually used for return function result, better allocate it later

+ * and $15 is used for cpu_env pointer, allocate it at last

+*/

+static const int tcg_target_reg_alloc_order[] = {

+    TCG_REG_1, TCG_REG_2, TCG_REG_3, TCG_REG_4, TCG_REG_5, TCG_REG_6,

+    TCG_REG_7, TCG_REG_8, TCG_REG_22, 
+    TCG_REG_9, TCG_REG_10, TCG_REG_11, TCG_REG_12, TCG_REG_13, TCG_REG_14,

+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};

+

+/*

+ * according to alpha calling convention, these 6 registers are used for 

+ * function parameter passing. if function has more than 6 parameters, remained

+ * ones are stored on stack.

+*/

+static const int tcg_target_call_iarg_regs[6] = { 

+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21

+};

+

+/*

+ * according to alpha calling convention, $0 is used for returning function result.

+*/

+static const int tcg_target_call_oarg_regs[1] = { TCG_REG_0 };

+

+/*

+ * save the address of TB's epilogue.

+*/

+static uint8_t *tb_ret_addr;

+

+/* 

+ * op-code and func-code for jump insn 

+*/

+#define OP_CALL        0x01A

+#define OP_RET         0x01A

+#define OP_JMP         0x01A

+

+#define FUNC_JMP       0x00

+#define FUNC_CALL      0x01

+#define FUNC_RET       0x02

+

+#define OP_BR          0x30

+#define OP_BEQ	       0x39

+#define OP_BNE	       0x3D

+#define OP_BLBC        0x38

+#define OP_BLBS        0x3C

+

+#define OP_ADDSUBCMP   0x10

+

+#define FUNC_ADDL      0x00

+#define FUNC_SUBL      0x09

+#define FUNC_ADDQ      0x20

+#define FUNC_SUBQ      0x29

+#define FUNC_CMPEQ     0x2D

+#define FUNC_CMPLT     0x4D

+#define FUNC_CMPLE     0x6D

+#define FUNC_CMPULT    0x1D

+#define FUNC_CMPULE    0x3D

+

+#define OP_MUL         0x13

+

+#define FUNC_MULL      0x00

+#define FUNC_MULQ      0x20

+

+#define OP_LOGIC       0x11

+

+#define FUNC_AND       0x00

+#define FUNC_BIS       0x20

+#define FUNC_XOR       0x40

+

+#define OP_SHIFT       0x12

+

+#define FUNC_SLL       0x39

+#define FUNC_SRL       0x34

+#define FUNC_SRA       0x3C

+

+#define OP_SEXT        0x1C

+

+#define FUNC_SEXTB     0x00

+#define FUNC_SEXTW     0x01

+

+#define OP_LDA         0x08

+#define OP_LDAH        0x09

+#define OP_LDBU        0x0A

+#define OP_LDWU        0x0C

+#define OP_LDL         0x28

+#define OP_LDQ         0x29

+#define OP_STB         0x0E

+#define OP_STW         0x0D

+#define OP_STL         0x2C

+#define OP_STQ         0x2D
+

+/*

+ * return the # of regs used for parameter passing on procedure calling.

+ * note that alpha use $16~$21 to transfer the first 6 paramenters of a procedure.

+*/

+static inline int tcg_target_get_call_iarg_regs_count(int flags)

+{

+    return 6;

+}

+

+/*

+ * given constraint, return available register set. this function is called once

+ * for each op at qemu's initialization stage.

+*/

+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)

+{

+    const char *ct_str = *pct_str;

+

+    switch(ct_str[0]) 

+    {

+    case 'r':

+        /* constaint 'r' means any register is okay */

+        ct->ct |= TCG_CT_REG;

+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);

+        break;

+

+    case 'L':

+        /* 

+        * constranit 'L' is used for qemu_ld/st, which has 2 meanings:

+        * 1st, we the argument need to be allocated a register.

+        * 2nd, we should reserve some registers that belong to caller-clobbered 

+        * list for qemu_ld/st local usage, so these registers must not be 

+        * allocated to the argument that the 'L' constraint is describing.

+        *

+        * note that op qemu_ld/st has the TCG_OPF_CALL_CLOBBER flag, and 

+        * tcg will free all callee-clobbered registers before generate target

+        * insn for qemu_ld/st, so we can use these register directly without

+        * warrying about destroying their content.

+        */

+        ct->ct |= TCG_CT_REG;

+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_16);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_17);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_18);
+        break;
+

+    default:

+        return -1;

+    }

+

+    ct_str++;

+    *pct_str = ct_str;

+    return 0;

+}

+

+/*

+ * whether op's input argument may use constant 

+*/

+static inline int tcg_target_const_match( \

+	tcg_target_long val, const TCGArgConstraint *arg_ct)

+{

+    int ct = arg_ct->ct;
+    return (ct & TCG_CT_CONST) ? 1 : 0;
+}

+

+static inline void tcg_out_inst2(TCGContext *s, int Opcode, int Ra, int Disp)

+{

+    uint32_t v = 0;

+    v = ( ( Opcode & 0x3f ) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( Disp & 0x1fffff) ;

+    tcg_out32(s, v);	

+}

+

+static inline void tcg_out_inst3_disp(TCGContext *s, int Opcode, int Ra, int Rb, int Disp)

+{

+    uint32_t v = 0;

+    v = ( ( Opcode & 0x3f ) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( ( Rb & 0x1f ) << 16 )

+        | ( Disp & 0xffff) ;

+    tcg_out32(s, v);	

+}

+

+static inline void tcg_out_inst3_func(TCGContext *s, int Opcode, int Ra, int Rb, int Func, int Disp)

+{

+    uint32_t v = 0;
+    v = ( ( Opcode & 0x3f ) << 26  )

+       | ( ( Ra & 0x1f ) << 21 )

+       | ( ( Rb & 0x1f ) << 16 )

+       | ( ( Func & 0x3 ) << 14 )

+       | ( Disp & 0x3fff) ;

+    tcg_out32(s, v);	

+}

+

+static inline void tcg_out_inst4(TCGContext *s, int Opcode, int Ra, int Rb, int Func, int Rc)

+{

+    uint32_t v = 0;
+    v = ( (Opcode & 0x3f) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( ( Rb & 0x1f ) << 16 )

+        | ( ( Func & 0x7f ) << 5 )

+        | ( Rc & 0x1f ) ;

+    tcg_out32(s, v);	

+}

+

+static inline void tcg_out_inst4i(TCGContext *s, int Opcode, int Ra, int Lit, int Func, int Rc)

+{

+    uint32_t v = 0;
+    v = ( (Opcode & 0x3f) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( ( Lit & 0xff ) << 13 )

+        | ( ( Func & 0x7f ) << 5 )

+        | ( 1 << 12 )

+        | ( Rc & 0x1f ) ;

+    tcg_out32(s, v);	

+}

+

+/*

+ * mov from a reg to another

+*/

+static inline void tcg_out_mov(TCGContext *s, int Rc, int Rb)

+{  

+    if ( Rb != Rc ) {

+        tcg_out_inst4(s, OP_LOGIC, TCG_REG_31, Rb, FUNC_BIS, Rc);
+    }

+}

+

+/*
+ * mov a 64-bit immediate 'arg' to regsiter 'Ra', this function will
+ * generate fixed length (8 insns, 32 bytes) of target insn sequence.
+*/
+static void tcg_out_movi_fixl( \
+    TCGContext *s, TCGType type, int Ra, tcg_target_long arg)
+{
+    tcg_target_long l0, l1, l2, l3;
+    tcg_target_long l1_tmp, l2_tmp, l3_tmp;
+
+    l0 = arg & 0xffffu;
+    l1_tmp = l1 = ( arg >> 16) & 0xffffu;
+    l2_tmp = l2 = ( arg >> 32) & 0xffffu;
+    l3_tmp = l3 = ( arg >> 48) & 0xffffu;
+
+    if ( l0 & 0x8000u)
+        l1_tmp = (l1 + 1) & 0xffffu;
+    if ( (l1_tmp & 0x8000u) || ((l1_tmp == 0) && (l1_tmp != l1)))
+        l2_tmp = (l2 + 1) & 0xffffu;
+    if ( (l2_tmp & 0x8000u) || ((l2_tmp == 0) && (l2_tmp != l2)))
+        l3_tmp = (l3 + 1) & 0xffffu;
+
+    tcg_out_inst3_disp( s, OP_LDAH, Ra, TCG_REG_31, l3_tmp);
+    tcg_out_inst3_disp( s, OP_LDA, Ra, Ra, l2_tmp);
+    tcg_out_inst4i( s, OP_SHIFT, Ra, 32, FUNC_SLL, Ra);
+    tcg_out_inst3_disp( s, OP_LDAH, Ra, Ra, l1_tmp);
+    tcg_out_inst3_disp( s, OP_LDA, Ra, Ra, l0);
+}
+
+/*

+ * mov 64-bit immediate 'arg' to regsiter 'Ra'. this function will

+ * generate variable length of target insn sequence.

+*/

+static inline void tcg_out_movi( \

+    TCGContext *s, TCGType type, int Ra, tcg_target_long arg)

+{
+    if (type == TCG_TYPE_I32) {
+        if ( arg != (int32_t)arg)
+            tcg_abort();
+    }
+
+    if (arg == 0) {

+        tcg_out_inst4(s, OP_LOGIC, Ra, Ra, FUNC_XOR, Ra);
+    }

+    else if( arg == (int16_t)arg ) {

+        tcg_out_inst3_disp(s, OP_LDA, Ra, TCG_REG_31, arg );
+    }

+    else if( arg == (int32_t)arg ) {
+        tcg_out_inst3_disp(s, OP_LDAH, Ra, TCG_REG_31, (arg>>16));
+        if( arg & ((tcg_target_ulong)0x8000) ) {
+            tcg_out_inst3_disp(s, OP_LDAH, Ra, Ra, 1);
+        }
+        tcg_out_inst3_disp(s, OP_LDA, Ra, Ra, arg);
+    } else {

+        tcg_out_movi_fixl(s, type, Ra, arg);

+    }
+}
+
+static inline int _is_tmp_reg( int r)
+{
+    if ( r == TMP_REG1 || r == TMP_REG2 || r == TMP_REG3)
+        return 1;
+    else
+        return 0;
+}
+
+/*

+ * load value in disp(Rb) to Ra.

+*/

+static inline void tcg_out_ld( \

+    TCGContext *s, TCGType type, int Ra, int Rb, tcg_target_long disp)

+{

+    int Opcode;

+    
+    if ( _is_tmp_reg(Ra) || _is_tmp_reg(Rb))
+        tcg_abort();
+
+    Opcode = ((type == TCG_TYPE_I32) ? OP_LDL : OP_LDQ);
+
+    if( disp != (int16_t)disp ) {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);	

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, Opcode, Ra, TMP_REG1, 0);
+    }

+    else

+        tcg_out_inst3_disp(s, Opcode, Ra, Rb, disp);
+}

+
+/*

+ * store value in Ra to disp(Rb).

+*/

+static inline void tcg_out_st( \

+    TCGContext *s, TCGType type, int Ra, int Rb, tcg_target_long disp)

+{

+    int Opcode;

+
+    if ( _is_tmp_reg(Ra) || _is_tmp_reg(Rb))
+        tcg_abort();
+    
+    Opcode = ((type == TCG_TYPE_I32) ? OP_STL : OP_STQ);
+

+    if( disp != (int16_t)disp ) {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, Opcode, Ra, TMP_REG1, 0);
+    }

+    else

+        tcg_out_inst3_disp(s, Opcode, Ra, Rb, disp);
+}

+

+/*

+ * generate arithmatic instruction with immediate. Ra is used as both

+ * input and output, and val is used as another input.

+*/

+static inline void tgen_arithi( \

+    TCGContext *s, int Opcode, int Func, int Ra, tcg_target_long val)

+{
+    if ( _is_tmp_reg(Ra))
+        tcg_abort();
+
+    if (val == (uint8_t)val) {

+        tcg_out_inst4i(s, Opcode, Ra, val, Func, Ra);
+    } else {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val);

+        tcg_out_inst4(s, Opcode, Ra, TMP_REG1, Func, Ra);
+    }

+}

+

+/*

+ * generate addq instruction with immediate.

+*/

+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)

+{

+    if (val != 0)

+        tgen_arithi(s, OP_ADDSUBCMP, FUNC_ADDQ, reg, val);

+}

+

+/*

+ * generate insn to push reg onto stack.

+*/

+static inline void tcg_out_push(TCGContext *s, int reg)

+{

+    tcg_out_inst4i(s, OP_ADDSUBCMP, TCG_REG_30, 8, FUNC_SUBQ, TCG_REG_30);

+    tcg_out_inst3_disp(s, OP_STQ, reg, TCG_REG_30, 0);

+}

+

+/*

+ * generate insn to pop value from stack to reg.

+*/

+static inline void tcg_out_pop(TCGContext *s, int reg)

+{

+    tcg_out_inst3_disp(s, OP_LDQ, reg, TCG_REG_30, 0);

+    tcg_out_inst4i(s, OP_ADDSUBCMP, TCG_REG_30, 8, FUNC_ADDQ, TCG_REG_30);

+}

+
+static const uint8_t tcg_cond_to_jcc[10] = {

+    [TCG_COND_EQ] = FUNC_CMPEQ,

+    [TCG_COND_NE] = FUNC_CMPEQ,

+    [TCG_COND_LT] = FUNC_CMPLT,

+    [TCG_COND_GE] = FUNC_CMPLT,

+    [TCG_COND_LE] = FUNC_CMPLE,

+    [TCG_COND_GT] = FUNC_CMPLE,

+    [TCG_COND_LTU] = FUNC_CMPULT,

+    [TCG_COND_GEU] = FUNC_CMPULT,

+    [TCG_COND_LEU] = FUNC_CMPULE,

+    [TCG_COND_GTU] = FUNC_CMPULE

+};

+

+/*

+ * called by tcg_out_reloc() when the label address is determined, 

+ * i.e., label->has_value is true. what should be done is to patch 

+ * the jmp insn that reference this label.

+ *

+ * code_ptr - position need to patch

+ * type - relocation type

+ * value - label address

+ * addend - not used

+*/

+static void patch_reloc(uint8_t *code_ptr, \

+    int type, tcg_target_long value, tcg_target_long addend)

+{

+    TCGContext s;
+    tcg_target_long val;
+
+    if ( type != R_ALPHA_REFQUAD)
+        tcg_abort();
+    if ( value & 3)
+        tcg_abort();
+
+    s.code_ptr = code_ptr;
+    val = (value - (tcg_target_long)s.code_ptr - 4) >> 2; 
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+
+    tcg_out_inst2(&s, OP_BR, TCG_REG_31, val);
+}

+
+/*

+ * generate insns for BR 

+*/

+static void tcg_out_br(TCGContext *s, int label_index)
+{

+    TCGLabel *l = &s->labels[label_index];
+
+    if (l->has_value) {
+        tcg_target_long val;
+        if ( l->u.value & 0x3)
+            tcg_abort();
+        val = ((tcg_target_long)(l->u.value) - (tcg_target_long)s->code_ptr - 4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            tcg_out_inst2(s, OP_BR, TMP_REG1, val);
+	} else {
+            tcg_abort();
+	}
+    } else {
+        /* record relocation infor */
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_REFQUAD, label_index, 0);
+        s->code_ptr += 4;
+    }
+}
+
+/*

+ * generate insn for INDEX_op_brcond

+*/

+static void tcg_out_brcond( TCGContext *s, int cond, \

+    TCGArg arg1, TCGArg arg2, int const_arg2, int label_index)

+{
+    int func, opc;
+    TCGLabel *l = &s->labels[label_index];
+
+    if ( cond < TCG_COND_EQ || cond > TCG_COND_GTU || const_arg2)
+        tcg_abort();
+

+    func = tcg_cond_to_jcc[cond];
+    tcg_out_inst4(s, OP_ADDSUBCMP, arg1, arg2, func, TMP_REG1);
+
+    // if cond is an odd number, TMP_REG1 = 0 means true
+    opc = (cond & 1) ? OP_BLBC : OP_BLBS;  
+
+    if (l->has_value) {
+        tcg_target_long val;
+        if ( l->u.value & 3)
+            tcg_abort();
+        val = ((tcg_target_long)l->u.value - (tcg_target_long)s->code_ptr - 4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            tcg_out_inst2(s, opc, TMP_REG1, val);
+	} else {
+            tcg_abort();
+	}
+    } else {
+        tcg_out_inst2(s, opc^4, TMP_REG1, 1);
+	/* record relocation infor */
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_REFQUAD, label_index, 0);
+        s->code_ptr += 4;
+    }
+}

+

+

+#if defined(CONFIG_SOFTMMU)

+

+#include "../../softmmu_defs.h"

+

+static void *qemu_ld_helpers[4] = {

+    __ldb_mmu,

+    __ldw_mmu,

+    __ldl_mmu,

+    __ldq_mmu,

+};

+

+static void *qemu_st_helpers[4] = {

+    __stb_mmu,

+    __stw_mmu,

+    __stl_mmu,

+    __stq_mmu,

+};

+

+#endif

+

+/* 

+ * XXX: qemu_ld and qemu_st could be modified to clobber only EDX and

+ * EAX. It will be useful once fixed registers globals are less common. 

+ *

+ * output host insn for op 'qemu_ldxx t0, t1, flags', which means fetching value from t1 to t0.

+ * flags gives the current CPU mode, kernel or user.

+ *

+ * opc argument determines the data width and extension type (zero or signed), and has the 

+ * following layout:

+ *                            2  1  0

+ * ------------------------------------

+ * |                        | E |  W  |

+ * ------------------------------------

+ *

+ * E = 0 means zero extention, 1 means signed extension

+ * W = 0 means byte, 1 means word, 2 means dword.

+ *

+ * Note that VM addr space may be 32-bit or 64-bit, below, we take 32-bit addr space as example

+ * when doing the comment.

+*/

+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)

+{

+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;

+    tcg_target_long val;
+

+#if defined(CONFIG_SOFTMMU)

+    uint8_t *label1_ptr, *label2_ptr;

+#endif

+

+    data_reg = *args++;

+    addr_reg = *args++;

+    mem_index = *args;

+    s_bits = opc & 3;

+

+    r0 = TCG_REG_16;

+    r1 = TCG_REG_17;

+

+#if defined(CONFIG_SOFTMMU)

+

+    tcg_out_mov(s, r1, addr_reg); 

+    tcg_out_mov(s, r0, addr_reg); 

+ 

+#if TARGET_LONG_BITS == 32

+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);

+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SLL, r1);

+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SRL, r1);
+#endif

+

+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r0, TARGET_PAGE_MASK|((1<<s_bits)-1));
+

+    tgen_arithi(s, OP_SHIFT, FUNC_SRL, r1, TARGET_PAGE_BITS-CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r1, (CPU_TLB_SIZE-1)<<CPU_TLB_ENTRY_BITS);
+    
+    tcg_out_addi(s, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_read));	// addq r1, offset, r1
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TCG_REG_15, FUNC_ADDQ, r1);		// addq r1, $15, r1

+#if TARGET_LONG_BITS == 32
+    tcg_out_inst3_disp(s, OP_LDL, TMP_REG1, r1, 0);				// ldl TMP_REG1, 0(r1)

+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SLL, TMP_REG1);
+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SRL, TMP_REG1);
+#else

+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1, 0);				// ldq TMP_REG1, 0(r1)

+#endif

+		

+    //

+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read

+    // we below will compare them
+    //

+    tcg_out_inst4(s, OP_ADDSUBCMP, TMP_REG1, r0, FUNC_CMPEQ, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);
+#endif
+
+    //

+    // if equal, we jump to label1. since label1 is not resolved yet, 

+    // we just record a relocation.

+    //

+    label1_ptr = s->code_ptr;

+    s->code_ptr += 4;

+

+    //

+    // here, unequal, TLB-miss.

+    //

+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_17, mem_index);		// pass argument 2

+    tcg_out_movi(s, TCG_TYPE_I64, \

+        TMP_REG1, (tcg_target_long)qemu_ld_helpers[s_bits]);		// get helper func entry
+    tcg_out_push(s, addr_reg);
+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27, TMP_REG1);
+    tcg_out_inst3_func(s, OP_CALL, TCG_REG_26, TMP_REG1, FUNC_CALL, 0); // call helper func

+    tcg_out_pop(s, TCG_REG_15);
+    tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+	

+    //

+    // after helper function call, the result of ld is saved in $0

+    //

+    switch(opc) {

+    case 0 | 4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, TCG_REG_0, FUNC_SEXTB, data_reg);
+        break;

+    case 1 | 4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, TCG_REG_0, FUNC_SEXTW, data_reg);
+        break;

+    case 2 | 4:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 32, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SRA, data_reg);
+        break;

+    case 0:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 56, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 56, FUNC_SRL, data_reg);
+        break;

+    case 1:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 48, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 48, FUNC_SRL, data_reg);
+        break;

+    case 2:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 32, FUNC_SLL, data_reg);
+	tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SRL, data_reg);
+        break;

+    case 3:

+        tcg_out_mov(s, data_reg, TCG_REG_0);

+        break;
+    default:
+        tcg_abort();
+        break;
+    }

+

+    //

+    // we have done, jmp to label2. label2 is not resolved yet, 

+    // we record a relocation.

+    //

+    label2_ptr = s->code_ptr;

+    s->code_ptr += 4;

+    

+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \

+        ( ( OP_BNE << 26 ) | ( TMP_REG1 << 21 ) \

+        | ( val & 0x1fffff) );
+

+    //

+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 

+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should

+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 

+    // r0 to get the host VA

+    //

+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \

+	offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read));
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TMP_REG1, FUNC_ADDQ, r1);
+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1, 0);
+    tcg_out_inst4(s, OP_ADDSUBCMP, r0, TMP_REG1, FUNC_ADDQ, r0);
+	

+#else

+    r0 = addr_reg;

+#endif // endif defined(CONFIG_SOFTMMU)

+

+#ifdef TARGET_WORDS_BIGENDIAN

+    tcg_abort();
+#endif

+

+    //

+    // when we get here, r0 contains the host VA that can be used to access guest PA

+    //

+    switch(opc) {
+    case 0:
+        tcg_out_inst3_disp(s, OP_LDBU, data_reg, r0, 0);
+        break;
+    case 0 | 4:
+        tcg_out_inst3_disp(s, OP_LDBU, data_reg, r0, 0);
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, data_reg, FUNC_SEXTB, data_reg);
+        break;
+    case 1:
+        tcg_out_inst3_disp(s, OP_LDWU, data_reg, r0, 0);
+        break;
+    case 1 | 4:
+        tcg_out_inst3_disp(s, OP_LDWU, data_reg, r0, 0);
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, data_reg, FUNC_SEXTW, data_reg);
+        break;
+    case 2:

+        tcg_out_inst3_disp(s, OP_LDL, data_reg, r0, 0);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SRL, data_reg);
+        break;

+    case 2 | 4:

+        tcg_out_inst3_disp(s, OP_LDL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_inst3_disp(s, OP_LDQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)

+    /* label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( ( OP_BR << 26 ) \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff) );
+#endif

+}

+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)

+{

+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;

+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)

+    uint8_t *label1_ptr, *label2_ptr;

+#endif

+

+    data_reg = *args++;

+    addr_reg = *args++;

+    mem_index = *args;

+    s_bits = opc&3;

+

+    r0 = TCG_REG_16;

+    r1 = TCG_REG_17;

+

+#if defined(CONFIG_SOFTMMU)

+

+    tcg_out_mov(s, r1, addr_reg); 

+    tcg_out_mov(s, r0, addr_reg); 

+ 

+#if TARGET_LONG_BITS == 32

+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SLL, r1);
+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SRL, r1);
+#endif

+

+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+

+    tgen_arithi(s, OP_SHIFT, FUNC_SRL, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r1, (CPU_TLB_SIZE-1) << CPU_TLB_ENTRY_BITS);
+

+    tcg_out_addi(s, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TCG_REG_15, FUNC_ADDQ, r1);
+

+#if TARGET_LONG_BITS == 32

+    tcg_out_inst3_disp(s, OP_LDL, TMP_REG1, r1, 0);
+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SLL, TMP_REG1);
+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SRL, TMP_REG1);
+#else

+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1, 0);
+#endif

+

+    //

+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read

+    // we below will compare them

+    //    

+    tcg_out_inst4(s, OP_ADDSUBCMP, TMP_REG1, r0, FUNC_CMPEQ, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);
+#endif
+
+    //

+    // if equal, we jump to label1. since label1 is not resolved yet, 

+    // we just record a relocation.

+    //

+    label1_ptr = s->code_ptr;

+    s->code_ptr += 4;

+

+    // here, unequal, TLB-miss, ...

+    tcg_out_mov(s, TCG_REG_17, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_18, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)qemu_st_helpers[s_bits]);
+        
+    tcg_out_push(s, data_reg);
+    tcg_out_push(s, addr_reg);
+    tcg_out_push(s, TCG_REG_26);

+    tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27,TMP_REG1);
+    tcg_out_inst3_func(s, OP_CALL, TCG_REG_26, TMP_REG1, FUNC_CALL, 0);
+    tcg_out_pop(s, TCG_REG_15);
+    tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+    tcg_out_pop(s, data_reg);
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet,
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    

+    // patch jmp to label1

+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( ( OP_BNE << 26) | ( TMP_REG1  << 21 )
+        | ( val & 0x1fffff) );
+

+    //

+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 

+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should

+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 

+    // r0 to get the host VA

+    //

+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \

+        offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write));
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TMP_REG1, FUNC_ADDQ, r1);
+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1,  0);
+    tcg_out_inst4(s, OP_ADDSUBCMP, r0, TMP_REG1, FUNC_ADDQ, r0);
+

+#else

+    r0 = addr_reg;

+#endif

+

+#ifdef TARGET_WORDS_BIGENDIAN

+    tcg_abort();
+#endif

+

+    //

+    // when we get here, r0 contains the host VA that can be used to access guest PA

+    //

+    switch(opc) {

+    case 0:

+        tcg_out_inst3_disp(s, OP_STB, data_reg, r0, 0);
+        break;

+    case 1:

+        tcg_out_inst3_disp(s, OP_STW, data_reg, r0, 0);
+        break;
+    case 2:
+        tcg_out_inst3_disp(s, OP_STL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_inst3_disp(s, OP_STQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+

+#if defined(CONFIG_SOFTMMU)

+    /* patch jmp to label2: */

+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( ( OP_BR << 26 ) \

+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff));

+#endif
+}

+
+static inline void tgen_ldxx( TCGContext *s, int Ra, int Rb, tcg_target_long disp, int flags)

+{

+    int opc_array[4] = { OP_LDBU, OP_LDWU, OP_LDL, OP_LDQ};

+    int opc = opc_array[flags & 3];

+
+    if ( _is_tmp_reg(Ra) || _is_tmp_reg(Rb))
+        tcg_abort();
+
+    if( disp != (int16_t)disp ) {
+        /* disp cannot be stored in insn directly */

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);	

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, opc, Ra, TMP_REG1, 0);
+    } else {

+        tcg_out_inst3_disp(s, opc, Ra, Rb, disp);
+    }
+

+    switch ( flags & 7)	{

+    case 0:

+    case 1:

+    case 2|4:

+    case 3:

+        break;

+    case 0|4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, Ra, FUNC_SEXTB, Ra);
+        break;
+    case 1|4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, Ra, FUNC_SEXTW, Ra);
+        break;
+    case 2:
+        tcg_out_inst4i(s, OP_SHIFT, Ra, 32, FUNC_SLL, Ra);
+        tcg_out_inst4i(s, OP_SHIFT, Ra, 32, FUNC_SRL, Ra);
+        break;
+    default:
+        tcg_abort();
+    }
+}
+

+static inline void tgen_stxx( TCGContext *s, int Ra, int Rb, tcg_target_long disp, int flags)

+{

+    int opc_array[4] = { OP_STB, OP_STW, OP_STL, OP_STQ};

+    int opc = opc_array[flags & 3];

+

+    if( disp != (int16_t)disp ) {

+        /* disp cannot be stored in insn directly */

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, opc, Ra, TMP_REG1, 0);
+    } else {
+        tcg_out_inst3_disp(s, opc, Ra, Rb, disp);
+    }

+}

+

+static inline void tcg_out_op(TCGContext *s, \

+	int opc, const TCGArg *args, const int *const_args)

+{

+    int oc, c;

+    switch(opc)
+    {
+    case INDEX_op_exit_tb:
+        /*
+         * exit_tb t0, where t0 is always constant and should be returned to engine
+         * since we'll back to engine soon, $0 and $1 will never be used
+        */
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_0, args[0]);
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)tb_ret_addr);
+    	tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, TMP_REG1, FUNC_JMP, 0);
+        break;

+

+    case INDEX_op_goto_tb:

+        /* goto_tb idx, where idx is constant 0 or 1, indicating the branch # */

+        if (s->tb_jmp_offset) {

+            /* we don't support direct jmp */

+            tcg_abort();

+        } else {

+            tcg_out_movi( s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, TMP_REG1, 0);
+            tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, TMP_REG1, FUNC_JMP, 0);
+        }

+        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;

+        break;

+

+    case INDEX_op_call:

+        if (const_args[0]) {

+            tcg_abort();

+	} else {

+            tcg_out_push( s, TCG_REG_26);
+            tcg_out_push( s, TCG_REG_15);
+            tcg_out_mov( s, TCG_REG_27, args[0]);
+            tcg_out_inst3_func(s, OP_CALL, TCG_REG_26, args[0], FUNC_CALL, 0);

+            tcg_out_pop( s, TCG_REG_15);
+            tcg_out_pop( s, TCG_REG_26);

+        }
+        break;

+

+    case INDEX_op_jmp: 

+        if (const_args[0]) {

+            tcg_abort();

+        } else {

+            tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, args[0], FUNC_JMP, 0);

+        }

+        break;

+

+    case INDEX_op_br:
+        tcg_out_br(s, args[0]);
+        break;

+

+    case INDEX_op_ld8u_i32: 

+    case INDEX_op_ld8u_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 0);

+        break;


+    case INDEX_op_ld8s_i32: 

+    case INDEX_op_ld8s_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 0|4);

+        break;

+    case INDEX_op_ld16u_i32:

+    case INDEX_op_ld16u_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 1);

+        break;

+    case INDEX_op_ld16s_i32:

+    case INDEX_op_ld16s_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 1|4);

+        break;
	

+    case INDEX_op_ld32u_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 2);

+        break;

+    case INDEX_op_ld_i32: 

+    case INDEX_op_ld32s_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 2|4);

+        break;

+    case INDEX_op_ld_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 3);

+        break;

+		

+    case INDEX_op_st8_i32:

+    case INDEX_op_st8_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 0);

+        break;


+    case INDEX_op_st16_i32:

+    case INDEX_op_st16_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 1);

+        break;

+    case INDEX_op_st_i32:

+    case INDEX_op_st32_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 2);

+        break;


+    case INDEX_op_st_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 3);

+        break;

+

+    case INDEX_op_add_i32: 

+    case INDEX_op_add_i64: 

+        oc = OP_ADDSUBCMP;

+        c = FUNC_ADDQ;

+        goto gen_arith;
+    case INDEX_op_sub_i32: 

+    case INDEX_op_sub_i64:
+        oc = OP_ADDSUBCMP;

+        c = FUNC_SUBQ;
+        goto gen_arith;
+    case INDEX_op_mul_i32: 

+        oc = OP_MUL;
+        c = FUNC_MULL;
+	goto gen_arith;
+    case INDEX_op_mul_i64: 

+        oc = OP_MUL;
+        c = FUNC_MULQ;
+        goto gen_arith;

+    case INDEX_op_and_i32:

+    case INDEX_op_and_i64:

+        oc = OP_LOGIC;

+        c = FUNC_AND;

+        goto gen_arith;

+    case INDEX_op_or_i32:

+    case INDEX_op_or_i64: 

+        oc = OP_LOGIC;

+        c = FUNC_BIS;

+        goto gen_arith;

+    case INDEX_op_xor_i32:

+    case INDEX_op_xor_i64:

+        oc = OP_LOGIC;

+        c = FUNC_XOR;

+	goto gen_arith;

+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:

+        oc = OP_SHIFT;

+        c = FUNC_SLL;

+	goto gen_arith;

+    case INDEX_op_shr_i32:
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SLL, args[1]);
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SRL, args[1]);
+    case INDEX_op_shr_i64: 

+        oc = OP_SHIFT;

+        c = FUNC_SRL;

+        goto gen_arith;

+    case INDEX_op_sar_i32:
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SLL, args[1]);
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SRA, args[1]);
+    case INDEX_op_sar_i64:

+        oc = OP_SHIFT;

+        c = FUNC_SRA;

+    gen_arith:

+        if (const_args[2]) {
+            tcg_abort();

+        } else {

+            tcg_out_inst4(s, oc, args[1], args[2], c, args[0]);
+        }
+        break;

+

+    case INDEX_op_brcond_i32:
+        tcg_out_mov(s, TMP_REG2, args[0]);
+        tcg_out_mov(s, TMP_REG3, args[1]);
+        if ( args[2] >= TCG_COND_LTU && args[2] <= TCG_COND_GTU) {
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SLL, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SRL, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SLL, TMP_REG3);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SRL, TMP_REG3);
+        } else { 
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SLL, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SRA, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SLL, TMP_REG3);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SRA, TMP_REG3);
+	}
+        tcg_out_brcond(s, args[2], TMP_REG2, TMP_REG3, const_args[1], args[3]);
+        break;
+    case INDEX_op_brcond_i64:

+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);

+        break;

+

+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, args[1], FUNC_SEXTB, args[0]);
+        printf("ext8s met\n");
+        break;
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, args[1], FUNC_SEXTW, args[0]);
+        printf("ext16s met\n");
+        break;
+    
+    case INDEX_op_qemu_ld8u:

+        tcg_out_qemu_ld(s, args, 0);

+        break;

+    case INDEX_op_qemu_ld8s:

+        tcg_out_qemu_ld(s, args, 0 | 4);

+        break;

+    case INDEX_op_qemu_ld16u:

+        tcg_out_qemu_ld(s, args, 1);

+        break;

+    case INDEX_op_qemu_ld16s:

+        tcg_out_qemu_ld(s, args, 1 | 4);

+        break;

+    case INDEX_op_qemu_ld32u:

+        tcg_out_qemu_ld(s, args, 2);

+        break;

+    case INDEX_op_qemu_ld32s:

+        tcg_out_qemu_ld(s, args, 2 | 4);

+        break;

+    case INDEX_op_qemu_ld64:

+        tcg_out_qemu_ld(s, args, 3);

+        break;

+

+    case INDEX_op_qemu_st8:

+        tcg_out_qemu_st(s, args, 0);

+        break;

+    case INDEX_op_qemu_st16:

+        tcg_out_qemu_st(s, args, 1);

+        break;

+    case INDEX_op_qemu_st32:

+        tcg_out_qemu_st(s, args, 2);

+        break;

+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);

+        break;

+

+    case INDEX_op_movi_i32: 

+    case INDEX_op_movi_i64: 

+    case INDEX_op_mov_i32: 

+    case INDEX_op_mov_i64:

+    case INDEX_op_div2_i32:

+    case INDEX_op_divu2_i32:

+    default:

+        tcg_abort();

+    }

+}

+

+static const TCGTargetOpDef alpha_op_defs[] = {

+    { INDEX_op_exit_tb, { } },

+    { INDEX_op_goto_tb, { } },

+    { INDEX_op_call, { "r" } },
+    { INDEX_op_jmp, { "r" } },
+    { INDEX_op_br, { } },

+

+    { INDEX_op_mov_i32, { "r", "r" } },
+    { INDEX_op_movi_i32, { "r" } },

+    { INDEX_op_ld8u_i32, { "r", "r" } },
+    { INDEX_op_ld8s_i32, { "r", "r" } },

+    { INDEX_op_ld16u_i32, { "r", "r" } },

+    { INDEX_op_ld16s_i32, { "r", "r" } },

+    { INDEX_op_ld_i32, { "r", "r" } },

+    { INDEX_op_st8_i32, { "r", "r" } },
+    { INDEX_op_st16_i32, { "r", "r" } },

+    { INDEX_op_st_i32, { "r", "r" } },

+

+    { INDEX_op_add_i32, { "r", "0", "r" } },

+    { INDEX_op_mul_i32, { "r", "0", "r" } },

+    //{ INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },

+    //{ INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },

+    { INDEX_op_sub_i32, { "r", "0", "r" } },

+    { INDEX_op_and_i32, { "r", "0", "r" } },

+    { INDEX_op_or_i32, { "r", "0", "r" } },

+    { INDEX_op_xor_i32, { "r", "0", "r" } },

+

+    { INDEX_op_shl_i32, { "r", "0", "r" } },

+    { INDEX_op_shr_i32, { "r", "0", "r" } },

+    { INDEX_op_sar_i32, { "r", "0", "r" } },

+

+    { INDEX_op_brcond_i32, { "r", "r" } },		

+

+    { INDEX_op_mov_i64, { "r", "r" } },	

+    { INDEX_op_movi_i64, { "r" } },

+    { INDEX_op_ld8u_i64, { "r", "r" } },

+    { INDEX_op_ld8s_i64, { "r", "r" } },

+    { INDEX_op_ld16u_i64, { "r", "r" } },

+    { INDEX_op_ld16s_i64, { "r", "r" } },

+    { INDEX_op_ld32u_i64, { "r", "r" } },

+    { INDEX_op_ld32s_i64, { "r", "r" } },

+    { INDEX_op_ld_i64, { "r", "r" } },

+    { INDEX_op_st8_i64, { "r", "r" } },	

+    { INDEX_op_st16_i64, { "r", "r" } },

+    { INDEX_op_st32_i64, { "r", "r" } },

+    { INDEX_op_st_i64, { "r", "r" } },

+

+    { INDEX_op_add_i64, { "r", "0", "r" } },

+    { INDEX_op_mul_i64, { "r", "0", "r" } },

+    //{ INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },

+    //{ INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },

+    { INDEX_op_sub_i64, { "r", "0", "r" } },

+    { INDEX_op_and_i64, { "r", "0", "r" } },

+    { INDEX_op_or_i64, { "r", "0", "r" } },

+    { INDEX_op_xor_i64, { "r", "0", "r" } },

+

+    { INDEX_op_shl_i64, { "r", "0", "r" } },

+    { INDEX_op_shr_i64, { "r", "0", "r" } },

+    { INDEX_op_sar_i64, { "r", "0", "r" } },

+

+    { INDEX_op_brcond_i64, { "r", "r" } },

+
+    { INDEX_op_ext8s_i32, { "r", "r"} },
+    { INDEX_op_ext16s_i32, { "r", "r"} },
+    { INDEX_op_ext8s_i64, { "r", "r"} },
+    { INDEX_op_ext16s_i64, { "r", "r"} },
+
+    { INDEX_op_qemu_ld8u, { "r", "L" } },

+    { INDEX_op_qemu_ld8s, { "r", "L" } },

+    { INDEX_op_qemu_ld16u, { "r", "L" } },

+    { INDEX_op_qemu_ld16s, { "r", "L" } },

+    { INDEX_op_qemu_ld32u, { "r", "L" } },

+    { INDEX_op_qemu_ld32s, { "r", "L" } },

+    { INDEX_op_qemu_ld64, { "r", "L" } },

+

+    { INDEX_op_qemu_st8, { "L", "L" } },

+    { INDEX_op_qemu_st16, { "L", "L" } },

+    { INDEX_op_qemu_st32, { "L", "L" } },

+    //{ INDEX_op_qemu_st64, { "L", "L", "L"} },

+    { INDEX_op_qemu_st64, { "L", "L"} },

+    { -1 },

+};

+

+

+static int tcg_target_callee_save_regs[] = {

+    TCG_REG_15,		// used for the global env, so no need to save

+    TCG_REG_9,

+    TCG_REG_10,

+    TCG_REG_11,

+    TCG_REG_12,

+    TCG_REG_13,

+    TCG_REG_14

+};

+

+/*

+ * Generate global QEMU prologue and epilogue code 

+*/

+void tcg_target_qemu_prologue(TCGContext *s)

+{

+    int i, frame_size, push_size, stack_addend;

+   

+    /* TB prologue */

+    /*printf("TB prologue @ %lx\n", s->code_ptr);*/
+	

+    /* save TCG_REG_26 */

+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_27);
+    tcg_out_push(s, TCG_REG_28);
+    tcg_out_push(s, TCG_REG_29);
+

+    /* save all callee saved registers */

+    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {

+        tcg_out_push(s, tcg_target_callee_save_regs[i]);

+    }

+	

+    /* reserve some stack space */

+    push_size = 8 + (4 + ARRAY_SIZE(tcg_target_callee_save_regs)) * 8;

+    frame_size = push_size + 4*TCG_STATIC_CALL_ARGS_SIZE;

+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1);

+    stack_addend = frame_size - push_size;

+    tcg_out_addi(s, TCG_REG_30, -stack_addend);

+

+    tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, TCG_REG_16, FUNC_JMP, 0);		/* jmp $16 */

+

+    /* TB epilogue */

+    tb_ret_addr = s->code_ptr;

+    tcg_out_addi(s, TCG_REG_30, stack_addend);

+    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {

+        tcg_out_pop(s, tcg_target_callee_save_regs[i]);

+    }

+

+    tcg_out_pop(s, TCG_REG_29);
+    tcg_out_pop(s, TCG_REG_28);
+    tcg_out_pop(s, TCG_REG_27);
+    tcg_out_pop(s, TCG_REG_26);

+    tcg_out_inst3_func(s, OP_RET, TCG_REG_31, TCG_REG_26, FUNC_RET, 0);		/* ret */

+}

+

+

+void tcg_target_init(TCGContext *s)

+{

+    /* fail safe */

+    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))

+        tcg_abort();

+

+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);

+    tcg_regset_set32(tcg_target_call_clobber_regs, 0,

+		(1 << TCG_REG_1  ) | (1 << TCG_REG_2 ) | (1 << TCG_REG_3  ) | (1 << TCG_REG_4 ) |

+		(1 << TCG_REG_5  ) | (1 << TCG_REG_6 ) | (1 << TCG_REG_7  ) | (1 << TCG_REG_8 ) | 

+		(1 << TCG_REG_22) | (1 << TCG_REG_23) | (1 << TCG_REG_24) | (1 << TCG_REG_25) | 

+              (1 << TCG_REG_16) | (1 << TCG_REG_17) | (1 << TCG_REG_18) | (1 << TCG_REG_19) | 

+              (1 << TCG_REG_20) | (1 << TCG_REG_21) | (1 << TCG_REG_0 ));
+
+    //tcg_regset_set32( tcg_target_call_clobber_regs, 0, 0xffffffff);
+    

+    tcg_regset_clear(s->reserved_regs);
+    // $26~$31 not allocated by tcg.c
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_26);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_27);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_28);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_29);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_30);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_31);
+    // resved registers for tmp usage
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG1);

+    tcg_regset_set_reg(s->reserved_regs, TMP_REG2);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG3);
+

+    tcg_add_target_add_op_defs(alpha_op_defs);
+}

+

diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h
new file mode 100644
index 0000000..3c15a15
--- /dev/null
+++ b/tcg/alpha/tcg-target.h
@@ -0,0 +1,70 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define TCG_TARGET_ALPHA 1
+
+#define TCG_TARGET_REG_BITS 64
+
+#define TCG_TARGET_NB_REGS 32
+
+enum {
+    TCG_REG_0 = 0, TCG_REG_1, TCG_REG_2, TCG_REG_3,
+    TCG_REG_4, TCG_REG_5, TCG_REG_6, TCG_REG_7,
+    TCG_REG_8, TCG_REG_9, TCG_REG_10, TCG_REG_11,
+    TCG_REG_12, TCG_REG_13, TCG_REG_14, TCG_REG_15,
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19,
+    TCG_REG_20, TCG_REG_21, TCG_REG_22, TCG_REG_23,
+    TCG_REG_24, TCG_REG_25, TCG_REG_26, TCG_REG_27,
+    TCG_REG_28, TCG_REG_29, TCG_REG_30, TCG_REG_31
+};
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_30
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* we have signed extension instructions */
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+//#define TCG_TARGET_HAS_ext32s_i64
+
+/* Note: must be synced with dyngen-exec.h */
+#define TCG_AREG0 TCG_REG_15
+#define TCG_AREG1 TCG_REG_9
+#define TCG_AREG2 TCG_REG_10
+#define TCG_AREG3 TCG_REG_11
+#define TCG_AREG4 TCG_REG_12
+#define TCG_AREG5 TCG_REG_13
+#define TCG_AREG6 TCG_REG_14
+
+#define TMP_REG1 TCG_REG_23
+#define TMP_REG2 TCG_REG_24
+#define TMP_REG3 TCG_REG_25
+
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    __asm__ __volatile__ ("call_pal 0x86");
+}
+
-- 
1.6.3.3



      ___________________________________________________________ 
  好玩贺卡等你发,邮箱贺卡全新上线! 
http://card.mail.cn.yahoo.com/

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [Qemu-devel] [PATCH] Porting TCG to alpha platform
@ 2010-01-20 17:19 identifier scorpio
  2010-01-20 21:26 ` Richard Henderson
  0 siblings, 1 reply; 21+ messages in thread
From: identifier scorpio @ 2010-01-20 17:19 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 49081 bytes --]

Thank you all, especially Richard, for reviewing, and I'v partly amended my code according to you advices, but the result is not very encouraging, I can still run linux-0.2.img image and still can't run MS windows. I think that most of your advices are related to performance and may significantly reduce the TB size. Below i'll append my newly generated patch against stable-0.10, in case it is mangled, i also put it in the attachment.

now I have some answers for your doubts.

> > +static int target_parse_constraint(TCGArgConstraint
> *ct, const char **pct_str)
> > +{
> > +    const char *ct_str = *pct_str;
> > +
> > +    switch(ct_str[0])
> > +    {
> > +    case 'r':
> ...
> > +    case 'L':
> 
> Do you really need extra temporaries for L?  You
> already have 3.

in qemu_ld/st, we must use $16,$17,$18 as temporaries, because pass them as argument to helper functions such as qemu_ld/st_helpers[].

... 
> Err.. "8 insns"?  You'd only ever need to output
> 5.  Also, why would you ever want to explicitly never
> elide one of these insns if you could? Say, if only L0 and
> L3 were non-zero?
> 

yes, the number of output instructions is 5, and my comment is a bit out-of-date.
your method here is more elegant and I'll migrate to your "tcg_out_op_long()" version tomorrow.

... 
> With I/J constraints you don't need this special casing.

I'm not very familiar with I/J constraints and i'll study them later.

...
> > +        tcg_out_reloc(s,
> s->code_ptr, R_ALPHA_REFQUAD, label_index, 0);
> > +        s->code_ptr += 4;
> 
> I realize that it doesn't really matter what value you use
> here, so long as things are consistent with patch_reloc, but
> it'll be less confusing if you use the proper relocation
> type: R_ALPHA_BRADDR.
> 

you are right, R_ALPHA_BRADDR is more clear.

...
> > +        tcg_out_inst2(s, opc^4,
> TMP_REG1, 1);
> > +    /* record relocation infor */
> > +        tcg_out_reloc(s,
> s->code_ptr, R_ALPHA_REFQUAD, label_index, 0);
> > +        s->code_ptr += 4;
> 
> Bug: You've applied the relocation to the wrong
> instruction.
> Bug: What's with the "opc^4"?
> 

what did you mean that i "applied the relocation to the wrong
instruction", couldn't i apply relocation to INDEX_op_brcond_i32 operation?
and opc^4 here is used to toggle between OP_BLBC(opcode 0x38) and OP_BLBS(opcode 0x3c), ugly code :)

...
> > +    /* if VM is of 32-bit arch, clear
> higher 32-bit of addr */
> 
> Use a zapnot insn for this.

zapnot is a good thing.

...
> You don't need to push/pop anything here.  $26 should
> be saved by the prologue we emitted, and $15 is
> call-saved.  What you could usefully do is define a
> register constraint for $27 so that TCG automatically loads
> the value into that register and saves you a register move
> here.

I push/pop them here just for safe.

> 
> > +    case INDEX_op_sar_i32:
> > +        tcg_out_inst4i(s,
> OP_SHIFT, args[1], 32, FUNC_SLL, args[1]);
> > +        tcg_out_inst4i(s,
> OP_SHIFT, args[1], 32, FUNC_SRA, args[1]);
> 
> That last shift can be combined with the requested shift
> via addition. For constant input, this saves an insn; for
> register input, the addition can be done in parallel with
> the first shift.

i changed to use "addl r, 0, r" here.

> For comparing 32-bit inputs, it doesn't actually matter how
> you extend the inputs, so long as you do it the same for
> both inputs.  Therefore the best solution here is to
> sign-extend both inputs with "addl r,0,r".  Note as
> well that you don't need temporaries, as the inputs only
> have 32-bits defined; high bits are garbage in, garbage
> out.

i changed to use "addl r, 0, r" here too.

> You'll also want to define INDEX_op_ext32s_i64 as "addl r,0,r".

added.
 
> > +    case INDEX_op_div2_i32:
> > +    case INDEX_op_divu2_i32:
> 
> Don't define these, but you will need to define
> 
>   div_i32, divu_i32, rem_i32, remu_i32
>   div_i64, divu_i64, rem_i64, remu_i64

I think when qemu met x86 divide instructions, it will call helper functions to simulate them, must i define div_i32/divu_i32/...?

... 
> > +    tcg_out_push(s, TCG_REG_26);
> > +    tcg_out_push(s, TCG_REG_27);
> > +    tcg_out_push(s, TCG_REG_28);
> > +    tcg_out_push(s, TCG_REG_29);
> 
> Of these only $26 needs to be saved.

also, i save them for safe.

++++++++++++++++++++++++++++++++++
below is the newest patch ...

From 7cc2acddfb7333ab3f1f6b17fa8fa5dcdd3c0095 Mon Sep 17 00:00:00 2001
From: Dong Weiyu <cidentifier@yahoo.com.cn>
Date: Wed, 20 Jan 2010 23:48:55 +0800
Subject: [PATCH] Porting TCG to alpha platform.

---
 cpu-all.h              |    2 +-
 tcg/alpha/tcg-target.c | 1196 ++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/alpha/tcg-target.h |   70 +++
 3 files changed, 1267 insertions(+), 1 deletions(-)
 create mode 100644 tcg/alpha/tcg-target.c
 create mode 100644 tcg/alpha/tcg-target.h

diff --git a/cpu-all.h b/cpu-all.h
index e0c3efd..bdf6fb2 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,7 +22,7 @@
 
 #include "qemu-common.h"
 
-#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__)
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__alpha__)
 #define WORDS_ALIGNED
 #endif
 
diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c
new file mode 100644
index 0000000..143f576
--- /dev/null
+++ b/tcg/alpha/tcg-target.c
@@ -0,0 +1,1196 @@
+/*

+ * Tiny Code Generator for QEMU on ALPHA platform
+*/

+

+#ifndef NDEBUG

+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {

+    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",

+    "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",

+    "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",

+    "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",

+};

+#endif

+

+/* 

+ * $26 ~ $31 are special, reserved, 

+ * and $25 is deliberately reserved for jcc operation

+ * and $0 is usually used for return function result, better allocate it later

+ * and $15 is used for cpu_env pointer, allocate it at last

+*/

+static const int tcg_target_reg_alloc_order[] = {

+    TCG_REG_9, TCG_REG_10, TCG_REG_11, TCG_REG_12, TCG_REG_13, TCG_REG_14,

+    TCG_REG_1, TCG_REG_2, TCG_REG_3, TCG_REG_4, TCG_REG_5, TCG_REG_6,

+    TCG_REG_7, TCG_REG_8, TCG_REG_22, 
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};

+

+/*

+ * according to alpha calling convention, these 6 registers are used for 

+ * function parameter passing. if function has more than 6 parameters, remained

+ * ones are stored on stack.

+*/

+static const int tcg_target_call_iarg_regs[6] = { 

+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21

+};

+

+/*

+ * according to alpha calling convention, $0 is used for returning function result.

+*/

+static const int tcg_target_call_oarg_regs[1] = { TCG_REG_0 };

+

+/*

+ * save the address of TB's epilogue.

+*/

+static uint8_t *tb_ret_addr;

+
+#define INSN_OP(x)     (((x) & 0x3f) << 26)
+#define INSN_FUNC1(x)  (((x) & 0x3) << 14)
+#define INSN_FUNC2(x)  (((x) & 0x7f) << 5)
+#define INSN_RA(x)     (((x) & 0x1f) << 21)
+#define INSN_RB(x)     (((x) & 0x1f) << 16)
+#define INSN_RC(x)     ((x) & 0x1f)
+#define INSN_LIT(x)    (((x) & 0xff) << 13)
+#define INSN_DISP16(x) ((x) & 0xffff)
+#define INSN_DISP21(x) ((x) & 0x1fffff)
+#define INSN_RSVED(x)  ((x) & 0x3fff)
+
+#define INSN_JMP       (INSN_OP(0x1a) | INSN_FUNC1(0))
+#define INSN_CALL      (INSN_OP(0x1a) | INSN_FUNC1(1))
+#define INSN_RET       (INSN_OP(0x1a) | INSN_FUNC1(2))
+#define INSN_BR        INSN_OP(0x30)
+#define INSN_BEQ       INSN_OP(0x39)
+#define INSN_BNE       INSN_OP(0x3d)
+#define INSN_BLBC      INSN_OP(0x38)
+#define INSN_BLBS      INSN_OP(0x3c)
+#define INSN_ADDL      (INSN_OP(0x10) | INSN_FUNC2(0))
+#define INSN_SUBL      (INSN_OP(0x10) | INSN_FUNC2(0x9))
+#define INSN_ADDQ      (INSN_OP(0x10) | INSN_FUNC2(0x20))
+#define INSN_SUBQ      (INSN_OP(0x10) | INSN_FUNC2(0x29))
+#define INSN_CMPEQ     (INSN_OP(0x10) | INSN_FUNC2(0x2d))
+#define INSN_CMPLT     (INSN_OP(0x10) | INSN_FUNC2(0x4d))
+#define INSN_CMPLE     (INSN_OP(0x10) | INSN_FUNC2(0x6d))
+#define INSN_CMPULT    (INSN_OP(0x10) | INSN_FUNC2(0x1d))
+#define INSN_CMPULE    (INSN_OP(0x10) | INSN_FUNC2(0x3d))
+#define INSN_MULL      (INSN_OP(0x13) | INSN_FUNC2(0))
+#define INSN_MULQ      (INSN_OP(0x13) | INSN_FUNC2(0x20))
+#define INSN_AND       (INSN_OP(0x11) | INSN_FUNC2(0))
+#define INSN_BIS       (INSN_OP(0x11) | INSN_FUNC2(0x20))
+#define INSN_XOR       (INSN_OP(0x11) | INSN_FUNC2(0x40))
+#define INSN_SLL       (INSN_OP(0x12) | INSN_FUNC2(0x39))
+#define INSN_SRL       (INSN_OP(0x12) | INSN_FUNC2(0x34))
+#define INSN_SRA       (INSN_OP(0x12) | INSN_FUNC2(0x3c))
+#define INSN_ZAPNOT    (INSN_OP(0x12) | INSN_FUNC2(0x31))
+#define INSN_SEXTB     (INSN_OP(0x1c) | INSN_FUNC2(0))
+#define INSN_SEXTW     (INSN_OP(0x1c) | INSN_FUNC2(0x1))
+#define INSN_LDA       INSN_OP(0x8)
+#define INSN_LDAH      INSN_OP(0x9)
+#define INSN_LDBU      INSN_OP(0xa)
+#define INSN_LDWU      INSN_OP(0xc)
+#define INSN_LDL       INSN_OP(0x28)
+#define INSN_LDQ       INSN_OP(0x29)
+#define INSN_STB       INSN_OP(0xe)
+#define INSN_STW       INSN_OP(0xd)
+#define INSN_STL       INSN_OP(0x2c)
+#define INSN_STQ       INSN_OP(0x2d)
+

+/*

+ * return the # of regs used for parameter passing on procedure calling.

+ * note that alpha use $16~$21 to transfer the first 6 paramenters of a procedure.

+*/

+static inline int tcg_target_get_call_iarg_regs_count(int flags)

+{

+    return 6;

+}

+

+/*

+ * given constraint, return available register set. this function is called once

+ * for each op at qemu's initialization stage.

+*/

+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)

+{

+    const char *ct_str = *pct_str;

+

+    switch(ct_str[0]) 

+    {

+    case 'r':

+        /* constaint 'r' means any register is okay */

+        ct->ct |= TCG_CT_REG;

+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);

+        break;

+

+    case 'L':

+        /* 

+        * constranit 'L' is used for qemu_ld/st, which has 2 meanings:

+        * 1st, we the argument need to be allocated a register.

+        * 2nd, we should reserve some registers that belong to caller-clobbered 

+        * list for qemu_ld/st local usage, so these registers must not be 

+        * allocated to the argument that the 'L' constraint is describing.

+        *

+        * note that op qemu_ld/st has the TCG_OPF_CALL_CLOBBER flag, and 

+        * tcg will free all callee-clobbered registers before generate target

+        * insn for qemu_ld/st, so we can use these register directly without

+        * warrying about destroying their content.

+        */

+        ct->ct |= TCG_CT_REG;

+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_16);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_17);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_18);
+        break;
+

+    default:

+        return -1;

+    }

+

+    ct_str++;

+    *pct_str = ct_str;

+    return 0;

+}

+

+/*

+ * whether op's input argument may use constant 

+*/

+static inline int tcg_target_const_match( \

+	tcg_target_long val, const TCGArgConstraint *arg_ct)

+{

+    int ct = arg_ct->ct;
+    return (ct & TCG_CT_CONST) ? 1 : 0;
+}
+

+static inline void tcg_out_fmt_br(TCGContext *s, int opc, int ra, int disp)

+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_DISP21(disp));
+}

+

+static inline void tcg_out_fmt_mem(TCGContext *s, int opc, int ra, int rb, int disp)

+{

+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_DISP16(disp));
+}

+

+static inline void tcg_out_fmt_jmp(TCGContext *s, int opc, int ra, int rb, int rsved)
+{

+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_RSVED(rsved));
+}

+

+static inline void tcg_out_fmt_opr(TCGContext *s, int opc, int ra, int rb, int rc)

+{

+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_RC(rc));
+}

+

+static inline void tcg_out_fmt_opi(TCGContext *s, int opc, int ra, int lit, int rc)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_LIT(lit)|INSN_RC(rc)|(1<<12));
+}
+

+/*

+ * mov from a reg to another

+*/

+static inline void tcg_out_mov(TCGContext *s, int rc, int rb)

+{  

+    if ( rb != rc ) {

+        tcg_out_fmt_opr(s, INSN_BIS, TCG_REG_31, rb, rc);
+    }

+}

+

+/*
+ * mov a 64-bit immediate 'arg' to regsiter 'ra', this function will
+ * generate fixed length (5 insns) of target insn sequence.
+*/
+static void tcg_out_movi_fixl( \
+    TCGContext *s, TCGType type, int ra, tcg_target_long arg)
+{
+    tcg_target_long l0, l1, l2, l3;
+    tcg_target_long l1_tmp, l2_tmp, l3_tmp;
+
+    l0 = arg & 0xffffu;
+    l1_tmp = l1 = ( arg >> 16) & 0xffffu;
+    l2_tmp = l2 = ( arg >> 32) & 0xffffu;
+    l3_tmp = l3 = ( arg >> 48) & 0xffffu;
+
+    if ( l0 & 0x8000u)
+        l1_tmp = (l1 + 1) & 0xffffu;
+    if ( (l1_tmp & 0x8000u) || ((l1_tmp == 0) && (l1_tmp != l1)))
+        l2_tmp = (l2 + 1) & 0xffffu;
+    if ( (l2_tmp & 0x8000u) || ((l2_tmp == 0) && (l2_tmp != l2)))
+        l3_tmp = (l3 + 1) & 0xffffu;
+
+    tcg_out_fmt_mem(s, INSN_LDAH, ra, TCG_REG_31, l3_tmp);
+    tcg_out_fmt_mem(s, INSN_LDA, ra, ra, l2_tmp);
+    tcg_out_fmt_opi(s, INSN_SLL, ra, 32, ra);
+    tcg_out_fmt_mem(s, INSN_LDAH, ra, ra, l1_tmp);
+    tcg_out_fmt_mem(s, INSN_LDA, ra, ra, l0);
+}
+
+/*

+ * mov 64-bit immediate 'arg' to regsiter 'ra'. this function will

+ * generate variable length of target insn sequence.

+*/

+static inline void tcg_out_movi( \

+    TCGContext *s, TCGType type, int ra, tcg_target_long arg)

+{
+    if ( type == TCG_TYPE_I32)
+        arg = (int32_t)arg;
+
+    if( arg == (int16_t)arg ) {
+        tcg_out_fmt_mem(s, INSN_LDA, ra, TCG_REG_31, arg);
+    } else if( arg == (int32_t)arg ) {
+        tcg_out_fmt_mem(s, INSN_LDAH, ra, TCG_REG_31, (arg>>16));
+        if( arg & ((tcg_target_ulong)0x8000) ) {
+            tcg_out_fmt_mem(s, INSN_LDAH, ra, ra, 1);
+        }
+        tcg_out_fmt_mem(s, INSN_LDA, ra, ra, arg);
+    } else {
+        tcg_out_movi_fixl(s, type, ra, arg);
+    }
+}
+
+static inline int _is_tmp_reg( int r)
+{
+    if ( r == TMP_REG1 || r == TMP_REG2 || r == TMP_REG3)
+        return 1;
+    else
+        return 0;
+}
+
+/*

+ * load value in disp(Rb) to Ra.

+*/

+static inline void tcg_out_ld( \

+    TCGContext *s, TCGType type, int ra, int rb, tcg_target_long disp)

+{
+    int opc;
+    
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+
+    opc = ((type == TCG_TYPE_I32) ? INSN_LDL : INSN_LDQ);
+
+    if( disp != (int16_t)disp ) {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    }

+    else
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);

+}

+
+/*

+ * store value in Ra to disp(Rb).

+*/

+static inline void tcg_out_st( \

+    TCGContext *s, TCGType type, int ra, int rb, tcg_target_long disp)

+{
+    int opc;

+
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+    
+    opc = ((type == TCG_TYPE_I32) ? INSN_STL : INSN_STQ);
+

+    if( disp != (int16_t)disp ) {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    }

+    else
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);

+}

+

+/*

+ * generate arithmatic instruction with immediate. ra is used as both

+ * input and output, and val is used as another input.

+*/

+static inline void tgen_arithi( \

+    TCGContext *s, int opc, int ra, tcg_target_long val)

+{
+    if ( _is_tmp_reg(ra))
+        tcg_abort();
+
+    if (val == (uint8_t)val) {
+        tcg_out_fmt_opi(s, opc, ra, val, ra);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val);

+        tcg_out_fmt_opr(s, opc, ra, TMP_REG1, ra);
+    }

+}

+

+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)

+{
+    if (val != 0)
+        tgen_arithi(s, INSN_ADDQ, reg, val);
+}

+

+static inline void tcg_out_push(TCGContext *s, int reg)

+{

+    tcg_out_fmt_opi(s, INSN_SUBQ, TCG_REG_30, 8, TCG_REG_30);

+    tcg_out_fmt_mem(s, INSN_STQ, reg, TCG_REG_30, 0);
+}

+

+static inline void tcg_out_pop(TCGContext *s, int reg)

+{

+    tcg_out_fmt_mem(s, INSN_LDQ, reg, TCG_REG_30, 0);
+    tcg_out_fmt_opi(s, INSN_ADDQ, TCG_REG_30, 8, TCG_REG_30);
+}
+
+static const uint64_t tcg_cond_to_jcc[10] = {
+    [TCG_COND_EQ] = INSN_CMPEQ,
+    [TCG_COND_NE] = INSN_CMPEQ,

+    [TCG_COND_LT] = INSN_CMPLT,

+    [TCG_COND_GE] = INSN_CMPLT,

+    [TCG_COND_LE] = INSN_CMPLE,

+    [TCG_COND_GT] = INSN_CMPLE,

+    [TCG_COND_LTU] = INSN_CMPULT,

+    [TCG_COND_GEU] = INSN_CMPULT,

+    [TCG_COND_LEU] = INSN_CMPULE,

+    [TCG_COND_GTU] = INSN_CMPULE

+};

+

+static void patch_reloc(uint8_t *code_ptr, \

+    int type, tcg_target_long value, tcg_target_long addend)

+{

+    TCGContext s;
+    tcg_target_long val;
+
+    if ( type != R_ALPHA_BRADDR)
+        tcg_abort();
+    
+    s.code_ptr = code_ptr;
+    val = (value - (tcg_target_long)s.code_ptr - 4) >> 2; 
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+
+    tcg_out_fmt_br(&s, INSN_BR, TCG_REG_31, val);
+}

+
+static void tcg_out_br(TCGContext *s, int label_index)
+{

+    TCGLabel *l = &s->labels[label_index];
+
+    if (l->has_value) {
+        tcg_target_long val;
+        val = ((tcg_target_long)(l->u.value) - (tcg_target_long)s->code_ptr - 4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            tcg_out_fmt_br(s, INSN_BR, TCG_REG_31, val);
+	} else {
+            tcg_abort();
+	}
+    } else {
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0);
+        s->code_ptr += 4;
+    }
+}
+
+static void tcg_out_brcond( TCGContext *s, int cond, \

+    TCGArg arg1, TCGArg arg2, int const_arg2, int label_index)

+{
+    int opc;
+    TCGLabel *l = &s->labels[label_index];
+
+    if ( cond < TCG_COND_EQ || cond > TCG_COND_GTU || const_arg2)
+        tcg_abort();
+

+    opc = tcg_cond_to_jcc[cond];
+    tcg_out_fmt_opr(s, opc, arg1, arg2, TMP_REG1);
+
+    if (l->has_value) {
+        tcg_target_long val;
+        val = ((tcg_target_long)l->u.value - (tcg_target_long)s->code_ptr - 4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            opc = (cond & 1) ? INSN_BLBC : INSN_BLBS;
+            tcg_out_fmt_br(s, opc, TMP_REG1, val);
+	} else {
+            tcg_abort();
+	}
+    } else {
+        opc = (cond & 1) ? INSN_BLBS : INSN_BLBC;
+        tcg_out_fmt_br(s, opc, TMP_REG1, 1);
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0);
+        s->code_ptr += 4;
+    }
+}
+

+#if defined(CONFIG_SOFTMMU)

+

+#include "../../softmmu_defs.h"

+

+static void *qemu_ld_helpers[4] = {

+    __ldb_mmu,

+    __ldw_mmu,

+    __ldl_mmu,

+    __ldq_mmu,

+};

+

+static void *qemu_st_helpers[4] = {

+    __stb_mmu,

+    __stw_mmu,

+    __stl_mmu,

+    __stq_mmu,

+};

+

+#endif

+

+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)

+{

+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;

+    tcg_target_long val;
+

+#if defined(CONFIG_SOFTMMU)

+    uint8_t *label1_ptr, *label2_ptr;

+#endif

+

+    data_reg = *args++;

+    addr_reg = *args++;

+    mem_index = *args;

+    s_bits = opc & 3;

+

+    r0 = TCG_REG_16;

+    r1 = TCG_REG_17;

+

+#if defined(CONFIG_SOFTMMU)

+

+    tcg_out_mov(s, r1, addr_reg); 

+    tcg_out_mov(s, r0, addr_reg); 

+ 

+#if TARGET_LONG_BITS == 32

+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r1, 0x0f, r1);
+#endif

+

+    tgen_arithi(s, INSN_AND, r0, TARGET_PAGE_MASK|((1<<s_bits)-1));
+

+    tgen_arithi(s, INSN_SRL, r1, TARGET_PAGE_BITS-CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, INSN_AND, r1, (CPU_TLB_SIZE-1)<<CPU_TLB_ENTRY_BITS);
+    
+    tcg_out_addi(s, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_REG_15, r1);
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_mem(s, INSN_LDL, TMP_REG1, r1, 0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, TMP_REG1, 0x0f, TMP_REG1);
+#else

+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+#endif

+		

+    //

+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read

+    // we below will compare them
+    //

+    tcg_out_fmt_opr(s, INSN_CMPEQ, TMP_REG1, r0, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+#endif
+
+    //

+    // if equal, we jump to label1. since label1 is not resolved yet, 

+    // we just record a relocation.

+    //

+    label1_ptr = s->code_ptr;

+    s->code_ptr += 4;

+

+    //

+    // here, unequal, TLB-miss.

+    //

+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_17, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)qemu_ld_helpers[s_bits]);
+    tcg_out_push(s, addr_reg);
+    //tcg_out_push(s, TCG_REG_26);
+    //tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27, TMP_REG1);
+    tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, TMP_REG1, 0);
+    //tcg_out_pop(s, TCG_REG_15);
+    //tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+	

+    //

+    // after helper function call, the result of ld is saved in $0

+    //

+    switch(opc) {

+    case 0 | 4:

+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, TCG_REG_0, data_reg);
+        break;

+    case 1 | 4:

+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, TCG_REG_0, data_reg);
+        break;

+    case 2 | 4:
+        tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_0, TCG_REG_31, data_reg);

+        break;

+    case 0:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0x1, data_reg);

+        break;

+    case 1:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0x3, data_reg);

+        break;

+    case 2:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0xf, data_reg);

+        break;
+    case 3:
+        tcg_out_mov(s, data_reg, TCG_REG_0);
+        break;
+    default:
+        tcg_abort();
+        break;
+    }
+

+    //

+    // we have done, jmp to label2. label2 is not resolved yet, 

+    // we record a relocation.

+    //

+    label2_ptr = s->code_ptr;

+    s->code_ptr += 4;

+    

+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \

+        ( INSN_BNE | ( TMP_REG1 << 21 ) | ( val & 0x1fffff));
+

+    //

+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 

+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should

+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 

+    // r0 to get the host VA

+    //

+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \

+	offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TMP_REG1, r1);
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, TMP_REG1, r0);
+	

+#else
+    r0 = addr_reg;

+#endif // endif defined(CONFIG_SOFTMMU)

+

+#ifdef TARGET_WORDS_BIGENDIAN

+    tcg_abort();
+#endif

+

+    //

+    // when we get here, r0 contains the host VA that can be used to access guest PA

+    //

+    switch(opc) {
+    case 0:
+        tcg_out_fmt_mem(s, INSN_LDBU, data_reg, r0, 0);
+        break;
+    case 0 | 4:
+        tcg_out_fmt_mem(s, INSN_LDBU, data_reg, r0, 0);
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, data_reg, data_reg);
+        break;
+    case 1:
+        tcg_out_fmt_mem(s, INSN_LDWU, data_reg, r0, 0);
+        break;
+    case 1 | 4:
+        tcg_out_fmt_mem(s, INSN_LDWU, data_reg, r0, 0);
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, data_reg, data_reg);
+        break;
+    case 2:

+        tcg_out_fmt_mem(s, INSN_LDL, data_reg, r0, 0);
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, data_reg, 0xf, data_reg);
+        break;
+    case 2 | 4:

+        tcg_out_fmt_mem(s, INSN_LDL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_fmt_mem(s, INSN_LDQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)
+    /* label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( INSN_BR \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff) );
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)

+{

+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;

+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)

+    uint8_t *label1_ptr, *label2_ptr;

+#endif

+

+    data_reg = *args++;

+    addr_reg = *args++;

+    mem_index = *args;

+    s_bits = opc&3;

+

+    r0 = TCG_REG_16;

+    r1 = TCG_REG_17;

+

+#if defined(CONFIG_SOFTMMU)

+

+    tcg_out_mov(s, r1, addr_reg); 

+    tcg_out_mov(s, r0, addr_reg); 

+ 

+#if TARGET_LONG_BITS == 32

+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */

+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r1, 0x0f, r1);
+#endif

+

+    tgen_arithi(s, INSN_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+

+    tgen_arithi(s, INSN_SRL, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, INSN_AND, r1, (CPU_TLB_SIZE-1) << CPU_TLB_ENTRY_BITS);
+

+    tcg_out_addi(s, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_REG_15, r1);
+

+#if TARGET_LONG_BITS == 32

+    tcg_out_fmt_mem(s, INSN_LDL, TMP_REG1, r1, 0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, TMP_REG1, 0x0f, TMP_REG1);
+#else

+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+#endif
+
+    //

+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read

+    // we below will compare them

+    //    

+    tcg_out_fmt_opr(s, INSN_CMPEQ, TMP_REG1, r0, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32

+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+#endif
+
+    //

+    // if equal, we jump to label1. since label1 is not resolved yet, 

+    // we just record a relocation.

+    //

+    label1_ptr = s->code_ptr;

+    s->code_ptr += 4;

+

+    // here, unequal, TLB-miss, ...

+    tcg_out_mov(s, TCG_REG_17, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_18, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)qemu_st_helpers[s_bits]);
+        
+    tcg_out_push(s, data_reg);
+    tcg_out_push(s, addr_reg);
+    //tcg_out_push(s, TCG_REG_26);

+    //tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27,TMP_REG1);
+    tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, TMP_REG1, 0);
+    //tcg_out_pop(s, TCG_REG_15);
+    //tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+    tcg_out_pop(s, data_reg);
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet,
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    

+    // patch jmp to label1

+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( INSN_BNE | ( TMP_REG1  << 21 ) | ( val & 0x1fffff));
+

+    //

+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 

+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should

+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 

+    // r0 to get the host VA

+    //

+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \

+        offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TMP_REG1, r1);
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1,  0);
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, TMP_REG1, r0);
+

+#else

+    r0 = addr_reg;

+#endif

+

+#ifdef TARGET_WORDS_BIGENDIAN

+    tcg_abort();
+#endif

+

+    //

+    // when we get here, r0 contains the host VA that can be used to access guest PA

+    //

+    switch(opc) {

+    case 0:

+        tcg_out_fmt_mem(s, INSN_STB, data_reg, r0, 0);
+        break;

+    case 1:

+        tcg_out_fmt_mem(s, INSN_STW, data_reg, r0, 0);
+        break;
+    case 2:
+        tcg_out_fmt_mem(s, INSN_STL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_fmt_mem(s, INSN_STQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+

+#if defined(CONFIG_SOFTMMU)

+    /* patch jmp to label2: */

+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( INSN_BR \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff));

+#endif
+}

+
+static inline void tgen_ldxx( TCGContext *s, int ra, int rb, tcg_target_long disp, int flags)

+{

+    int opc_array[4] = { INSN_LDBU, INSN_LDWU, INSN_LDL, INSN_LDQ};

+    int opc = opc_array[flags & 3];

+
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+
+    if( disp != (int16_t)disp ) {
+        /* disp cannot be stored in insn directly */

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);	

+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    } else {

+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+    }
+

+    switch ( flags & 7)	{

+    case 0:

+    case 1:

+    case 2|4:

+    case 3:

+        break;

+    case 0|4:

+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, ra, ra);
+        break;
+    case 1|4:

+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, ra, ra);
+        break;
+    case 2:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0x0f, ra);
+        break;
+    default:
+        tcg_abort();
+    }
+}
+

+static inline void tgen_stxx( TCGContext *s, int ra, int rb, tcg_target_long disp, int flags)

+{

+    int opc_array[4] = { INSN_STB, INSN_STW, INSN_STL, INSN_STQ};

+    int opc = opc_array[flags & 3];

+

+    if( disp != (int16_t)disp ) {

+        /* disp cannot be stored in insn directly */

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);

+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    } else {
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+    }

+}

+

+static inline void tcg_out_op(TCGContext *s, \

+	int opc, const TCGArg *args, const int *const_args)

+{

+    int oc;

+    switch(opc)
+    {
+    case INDEX_op_exit_tb:
+        /*
+         * exit_tb t0, where t0 is always constant and should be returned to engine
+         * since we'll back to engine soon, $0 and $1 will never be used
+        */
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_0, args[0]);
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)tb_ret_addr);
+   	tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TMP_REG1, 0);
+        break;

+

+    case INDEX_op_goto_tb:

+        /* goto_tb idx, where idx is constant 0 or 1, indicating the branch # */

+        if (s->tb_jmp_offset) {

+            /* we don't support direct jmp */

+            tcg_abort();

+        } else {

+            tcg_out_movi( s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, TMP_REG1, 0);
+            tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TMP_REG1, 0);
+        }

+        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;

+        break;

+

+    case INDEX_op_call:

+        if (const_args[0]) {

+            tcg_abort();

+	} else {

+            //tcg_out_push( s, TCG_REG_26);
+            //tcg_out_push( s, TCG_REG_15);
+            tcg_out_mov( s, TCG_REG_27, args[0]);
+            tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, args[0], 0);

+            //tcg_out_pop( s, TCG_REG_15);
+            //tcg_out_pop( s, TCG_REG_26);

+        }
+        break;

+

+    case INDEX_op_jmp: 

+        if (const_args[0]) {

+            tcg_abort();

+        } else {

+            tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, args[0], 0);

+        }

+        break;

+

+    case INDEX_op_br:
+        tcg_out_br(s, args[0]);
+        break;

+

+    case INDEX_op_ld8u_i32: 

+    case INDEX_op_ld8u_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 0);

+        break;


+    case INDEX_op_ld8s_i32: 

+    case INDEX_op_ld8s_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 0|4);

+        break;

+    case INDEX_op_ld16u_i32:

+    case INDEX_op_ld16u_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 1);

+        break;

+    case INDEX_op_ld16s_i32:

+    case INDEX_op_ld16s_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 1|4);

+        break;
	

+    case INDEX_op_ld32u_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 2);

+        break;

+    case INDEX_op_ld_i32: 

+    case INDEX_op_ld32s_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 2|4);

+        break;

+    case INDEX_op_ld_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 3);

+        break;

+		

+    case INDEX_op_st8_i32:

+    case INDEX_op_st8_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 0);

+        break;


+    case INDEX_op_st16_i32:

+    case INDEX_op_st16_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 1);

+        break;

+    case INDEX_op_st_i32:

+    case INDEX_op_st32_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 2);

+        break;


+    case INDEX_op_st_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 3);

+        break;

+

+    case INDEX_op_add_i32: 

+    case INDEX_op_add_i64: 

+        oc = INSN_ADDQ;

+        goto gen_arith;
+    case INDEX_op_sub_i32: 

+    case INDEX_op_sub_i64:
+        oc = INSN_SUBQ;

+        goto gen_arith;
+    case INDEX_op_mul_i32: 

+        oc = INSN_MULL;
+	goto gen_arith;
+    case INDEX_op_mul_i64: 

+        oc = INSN_MULQ;
+        goto gen_arith;

+    case INDEX_op_and_i32:

+    case INDEX_op_and_i64:

+        oc = INSN_AND;

+        goto gen_arith;

+    case INDEX_op_or_i32:

+    case INDEX_op_or_i64: 

+        oc = INSN_BIS;

+        goto gen_arith;

+    case INDEX_op_xor_i32:

+    case INDEX_op_xor_i64:
+        oc = INSN_XOR;

+	goto gen_arith;

+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:

+        oc = INSN_SLL;

+	goto gen_arith;

+    case INDEX_op_shr_i32:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, args[1], 0x0f, args[1]);
+    case INDEX_op_shr_i64:
+        oc = INSN_SRL;
+        goto gen_arith;

+    case INDEX_op_sar_i32:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[1]);
+    case INDEX_op_sar_i64:

+        oc = INSN_SRA;

+    gen_arith:

+        if (const_args[2]) {
+            tcg_abort();

+        } else {

+            tcg_out_fmt_opr(s, oc, args[1], args[2], args[0]);
+        }
+        break;

+

+    case INDEX_op_brcond_i32:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[0], TCG_REG_31, args[0]);
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[1]);
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        break;
+    case INDEX_op_brcond_i64:

+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);

+        break;

+

+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, args[1], args[0]);
+        break;
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, args[1], args[0]);
+        break;
+    case INDEX_op_ext32s_i64:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[0]);
+        break;
+    
+    case INDEX_op_qemu_ld8u:

+        tcg_out_qemu_ld(s, args, 0);

+        break;

+    case INDEX_op_qemu_ld8s:

+        tcg_out_qemu_ld(s, args, 0 | 4);

+        break;

+    case INDEX_op_qemu_ld16u:

+        tcg_out_qemu_ld(s, args, 1);

+        break;

+    case INDEX_op_qemu_ld16s:

+        tcg_out_qemu_ld(s, args, 1 | 4);

+        break;

+    case INDEX_op_qemu_ld32u:

+        tcg_out_qemu_ld(s, args, 2);

+        break;

+    case INDEX_op_qemu_ld32s:

+        tcg_out_qemu_ld(s, args, 2 | 4);

+        break;

+    case INDEX_op_qemu_ld64:

+        tcg_out_qemu_ld(s, args, 3);

+        break;

+

+    case INDEX_op_qemu_st8:

+        tcg_out_qemu_st(s, args, 0);

+        break;

+    case INDEX_op_qemu_st16:

+        tcg_out_qemu_st(s, args, 1);

+        break;

+    case INDEX_op_qemu_st32:

+        tcg_out_qemu_st(s, args, 2);

+        break;

+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);

+        break;

+

+    case INDEX_op_movi_i32: 

+    case INDEX_op_movi_i64: 

+    case INDEX_op_mov_i32: 

+    case INDEX_op_mov_i64:

+    case INDEX_op_div2_i32:

+    case INDEX_op_divu2_i32:

+    default:

+        tcg_abort();

+    }

+}

+

+static const TCGTargetOpDef alpha_op_defs[] = {

+    { INDEX_op_exit_tb, { } },

+    { INDEX_op_goto_tb, { } },

+    { INDEX_op_call, { "r" } },
+    { INDEX_op_jmp, { "r" } },
+    { INDEX_op_br, { } },

+

+    { INDEX_op_mov_i32, { "r", "r" } },
+    { INDEX_op_movi_i32, { "r" } },

+    { INDEX_op_ld8u_i32, { "r", "r" } },
+    { INDEX_op_ld8s_i32, { "r", "r" } },

+    { INDEX_op_ld16u_i32, { "r", "r" } },

+    { INDEX_op_ld16s_i32, { "r", "r" } },

+    { INDEX_op_ld_i32, { "r", "r" } },

+    { INDEX_op_st8_i32, { "r", "r" } },
+    { INDEX_op_st16_i32, { "r", "r" } },

+    { INDEX_op_st_i32, { "r", "r" } },

+

+    { INDEX_op_add_i32, { "r", "0", "r" } },

+    { INDEX_op_mul_i32, { "r", "0", "r" } },

+    //{ INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },

+    //{ INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },

+    { INDEX_op_sub_i32, { "r", "0", "r" } },

+    { INDEX_op_and_i32, { "r", "0", "r" } },

+    { INDEX_op_or_i32, { "r", "0", "r" } },

+    { INDEX_op_xor_i32, { "r", "0", "r" } },

+

+    { INDEX_op_shl_i32, { "r", "0", "r" } },

+    { INDEX_op_shr_i32, { "r", "0", "r" } },

+    { INDEX_op_sar_i32, { "r", "0", "r" } },

+

+    { INDEX_op_brcond_i32, { "r", "r" } },		

+

+    { INDEX_op_mov_i64, { "r", "r" } },	

+    { INDEX_op_movi_i64, { "r" } },

+    { INDEX_op_ld8u_i64, { "r", "r" } },

+    { INDEX_op_ld8s_i64, { "r", "r" } },

+    { INDEX_op_ld16u_i64, { "r", "r" } },

+    { INDEX_op_ld16s_i64, { "r", "r" } },

+    { INDEX_op_ld32u_i64, { "r", "r" } },

+    { INDEX_op_ld32s_i64, { "r", "r" } },

+    { INDEX_op_ld_i64, { "r", "r" } },

+    { INDEX_op_st8_i64, { "r", "r" } },	

+    { INDEX_op_st16_i64, { "r", "r" } },

+    { INDEX_op_st32_i64, { "r", "r" } },

+    { INDEX_op_st_i64, { "r", "r" } },

+

+    { INDEX_op_add_i64, { "r", "0", "r" } },

+    { INDEX_op_mul_i64, { "r", "0", "r" } },

+    //{ INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },

+    //{ INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },

+    { INDEX_op_sub_i64, { "r", "0", "r" } },

+    { INDEX_op_and_i64, { "r", "0", "r" } },

+    { INDEX_op_or_i64, { "r", "0", "r" } },

+    { INDEX_op_xor_i64, { "r", "0", "r" } },

+

+    { INDEX_op_shl_i64, { "r", "0", "r" } },

+    { INDEX_op_shr_i64, { "r", "0", "r" } },

+    { INDEX_op_sar_i64, { "r", "0", "r" } },

+

+    { INDEX_op_brcond_i64, { "r", "r" } },

+
+    { INDEX_op_ext8s_i32, { "r", "r"} },
+    { INDEX_op_ext16s_i32, { "r", "r"} },
+    { INDEX_op_ext8s_i64, { "r", "r"} },
+    { INDEX_op_ext16s_i64, { "r", "r"} },
+    { INDEX_op_ext32s_i64, { "r", "r"} },
+
+    { INDEX_op_qemu_ld8u, { "r", "L" } },

+    { INDEX_op_qemu_ld8s, { "r", "L" } },

+    { INDEX_op_qemu_ld16u, { "r", "L" } },

+    { INDEX_op_qemu_ld16s, { "r", "L" } },

+    { INDEX_op_qemu_ld32u, { "r", "L" } },

+    { INDEX_op_qemu_ld32s, { "r", "L" } },

+    { INDEX_op_qemu_ld64, { "r", "L" } },

+

+    { INDEX_op_qemu_st8, { "L", "L" } },

+    { INDEX_op_qemu_st16, { "L", "L" } },

+    { INDEX_op_qemu_st32, { "L", "L" } },

+    //{ INDEX_op_qemu_st64, { "L", "L", "L"} },

+    { INDEX_op_qemu_st64, { "L", "L"} },

+    { -1 },

+};

+

+

+static int tcg_target_callee_save_regs[] = {

+    TCG_REG_15,		// used for the global env, so no need to save

+    TCG_REG_9,

+    TCG_REG_10,

+    TCG_REG_11,

+    TCG_REG_12,

+    TCG_REG_13,

+    TCG_REG_14

+};

+

+/*

+ * Generate global QEMU prologue and epilogue code 

+*/

+void tcg_target_qemu_prologue(TCGContext *s)

+{

+    int i, frame_size, push_size, stack_addend;

+   

+    /* TB prologue */

+    /*printf("TB prologue @ %lx\n", s->code_ptr);*/
+	

+    /* save TCG_REG_26 */

+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_27);
+    tcg_out_push(s, TCG_REG_28);
+    tcg_out_push(s, TCG_REG_29);
+

+    /* save all callee saved registers */

+    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {

+        tcg_out_push(s, tcg_target_callee_save_regs[i]);

+    }

+	

+    /* reserve some stack space */

+    push_size = 8 + (4 + ARRAY_SIZE(tcg_target_callee_save_regs)) * 8;

+    frame_size = push_size + 4*TCG_STATIC_CALL_ARGS_SIZE;

+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1);

+    stack_addend = frame_size - push_size;

+    tcg_out_addi(s, TCG_REG_30, -stack_addend);

+

+    tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TCG_REG_16, 0);		/* jmp $16 */

+

+    /* TB epilogue */

+    tb_ret_addr = s->code_ptr;

+    tcg_out_addi(s, TCG_REG_30, stack_addend);

+    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {

+        tcg_out_pop(s, tcg_target_callee_save_regs[i]);

+    }

+

+    tcg_out_pop(s, TCG_REG_29);
+    tcg_out_pop(s, TCG_REG_28);
+    tcg_out_pop(s, TCG_REG_27);
+    tcg_out_pop(s, TCG_REG_26);

+    tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_31, TCG_REG_26, 0);		/* ret */

+}

+

+

+void tcg_target_init(TCGContext *s)

+{

+    /* fail safe */

+    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))

+        tcg_abort();

+

+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);

+    tcg_regset_set32(tcg_target_call_clobber_regs, 0,

+		(1 << TCG_REG_1  ) | (1 << TCG_REG_2 ) | (1 << TCG_REG_3  ) | (1 << TCG_REG_4 ) |

+		(1 << TCG_REG_5  ) | (1 << TCG_REG_6 ) | (1 << TCG_REG_7  ) | (1 << TCG_REG_8 ) | 

+		(1 << TCG_REG_22) | (1 << TCG_REG_23) | (1 << TCG_REG_24) | (1 << TCG_REG_25) | 

+              (1 << TCG_REG_16) | (1 << TCG_REG_17) | (1 << TCG_REG_18) | (1 << TCG_REG_19) | 

+              (1 << TCG_REG_20) | (1 << TCG_REG_21) | (1 << TCG_REG_0 ));
+
+    //tcg_regset_set32( tcg_target_call_clobber_regs, 0, 0xffffffff);
+    

+    tcg_regset_clear(s->reserved_regs);
+    // $26~$31 not allocated by tcg.c
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_26);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_27);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_28);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_29);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_30);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_31);
+    // resved registers for tmp usage
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG1);

+    tcg_regset_set_reg(s->reserved_regs, TMP_REG2);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG3);
+

+    tcg_add_target_add_op_defs(alpha_op_defs);
+}

+

diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h
new file mode 100644
index 0000000..79c57af
--- /dev/null
+++ b/tcg/alpha/tcg-target.h
@@ -0,0 +1,70 @@
+/*

+ * Tiny Code Generator for QEMU

+ *

+ * Copyright (c) 2008 Fabrice Bellard

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to deal

+ * in the Software without restriction, including without limitation the rights

+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

+ * copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

+ * THE SOFTWARE.

+ */

+#define TCG_TARGET_ALPHA 1

+

+#define TCG_TARGET_REG_BITS 64

+

+#define TCG_TARGET_NB_REGS 32

+

+enum {

+    TCG_REG_0 = 0, TCG_REG_1, TCG_REG_2, TCG_REG_3,
+    TCG_REG_4, TCG_REG_5, TCG_REG_6, TCG_REG_7,
+    TCG_REG_8, TCG_REG_9, TCG_REG_10, TCG_REG_11,
+    TCG_REG_12, TCG_REG_13, TCG_REG_14, TCG_REG_15,
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19,
+    TCG_REG_20, TCG_REG_21, TCG_REG_22, TCG_REG_23,
+    TCG_REG_24, TCG_REG_25, TCG_REG_26, TCG_REG_27,
+    TCG_REG_28, TCG_REG_29, TCG_REG_30, TCG_REG_31
+};

+

+/* used for function call generation */

+#define TCG_REG_CALL_STACK TCG_REG_30
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* we have signed extension instructions */
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+#define TCG_TARGET_HAS_ext32s_i64
+
+/* Note: must be synced with dyngen-exec.h */

+#define TCG_AREG0 TCG_REG_15
+#define TCG_AREG1 TCG_REG_9
+#define TCG_AREG2 TCG_REG_10
+#define TCG_AREG3 TCG_REG_11
+#define TCG_AREG4 TCG_REG_12
+#define TCG_AREG5 TCG_REG_13
+#define TCG_AREG6 TCG_REG_14
+

+#define TMP_REG1 TCG_REG_23
+#define TMP_REG2 TCG_REG_24
+#define TMP_REG3 TCG_REG_25
+

+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    __asm__ __volatile__ ("call_pal 0x86");
+}
+
-- 
1.6.3.3

Dong Weiyu.


      ___________________________________________________________ 
  好玩贺卡等你发,邮箱贺卡全新上线! 
http://card.mail.cn.yahoo.com/

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Porting-TCG-to-alpha-platform.patch --]
[-- Type: text/x-patch; name="0001-Porting-TCG-to-alpha-platform.patch", Size: 42321 bytes --]

From 7cc2acddfb7333ab3f1f6b17fa8fa5dcdd3c0095 Mon Sep 17 00:00:00 2001
From: Dong Weiyu <cidentifier@yahoo.com.cn>
Date: Wed, 20 Jan 2010 23:48:55 +0800
Subject: [PATCH] Porting TCG to alpha platform.

---
 cpu-all.h              |    2 +-
 tcg/alpha/tcg-target.c | 1196 ++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/alpha/tcg-target.h |   70 +++
 3 files changed, 1267 insertions(+), 1 deletions(-)
 create mode 100644 tcg/alpha/tcg-target.c
 create mode 100644 tcg/alpha/tcg-target.h

diff --git a/cpu-all.h b/cpu-all.h
index e0c3efd..bdf6fb2 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,7 +22,7 @@
 
 #include "qemu-common.h"
 
-#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__)
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__alpha__)
 #define WORDS_ALIGNED
 #endif
 
diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c
new file mode 100644
index 0000000..143f576
--- /dev/null
+++ b/tcg/alpha/tcg-target.c
@@ -0,0 +1,1196 @@
+/*
+ * Tiny Code Generator for QEMU on ALPHA platform
+*/
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
+    "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",
+    "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
+    "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",
+};
+#endif
+
+/* 
+ * $26 ~ $31 are special, reserved, 
+ * and $25 is deliberately reserved for jcc operation
+ * and $0 is usually used for return function result, better allocate it later
+ * and $15 is used for cpu_env pointer, allocate it at last
+*/
+static const int tcg_target_reg_alloc_order[] = {
+    TCG_REG_9, TCG_REG_10, TCG_REG_11, TCG_REG_12, TCG_REG_13, TCG_REG_14,
+    TCG_REG_1, TCG_REG_2, TCG_REG_3, TCG_REG_4, TCG_REG_5, TCG_REG_6,
+    TCG_REG_7, TCG_REG_8, TCG_REG_22, 
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};
+
+/*
+ * according to alpha calling convention, these 6 registers are used for 
+ * function parameter passing. if function has more than 6 parameters, remained
+ * ones are stored on stack.
+*/
+static const int tcg_target_call_iarg_regs[6] = { 
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};
+
+/*
+ * according to alpha calling convention, $0 is used for returning function result.
+*/
+static const int tcg_target_call_oarg_regs[1] = { TCG_REG_0 };
+
+/*
+ * save the address of TB's epilogue.
+*/
+static uint8_t *tb_ret_addr;
+
+#define INSN_OP(x)     (((x) & 0x3f) << 26)
+#define INSN_FUNC1(x)  (((x) & 0x3) << 14)
+#define INSN_FUNC2(x)  (((x) & 0x7f) << 5)
+#define INSN_RA(x)     (((x) & 0x1f) << 21)
+#define INSN_RB(x)     (((x) & 0x1f) << 16)
+#define INSN_RC(x)     ((x) & 0x1f)
+#define INSN_LIT(x)    (((x) & 0xff) << 13)
+#define INSN_DISP16(x) ((x) & 0xffff)
+#define INSN_DISP21(x) ((x) & 0x1fffff)
+#define INSN_RSVED(x)  ((x) & 0x3fff)
+
+#define INSN_JMP       (INSN_OP(0x1a) | INSN_FUNC1(0))
+#define INSN_CALL      (INSN_OP(0x1a) | INSN_FUNC1(1))
+#define INSN_RET       (INSN_OP(0x1a) | INSN_FUNC1(2))
+#define INSN_BR        INSN_OP(0x30)
+#define INSN_BEQ       INSN_OP(0x39)
+#define INSN_BNE       INSN_OP(0x3d)
+#define INSN_BLBC      INSN_OP(0x38)
+#define INSN_BLBS      INSN_OP(0x3c)
+#define INSN_ADDL      (INSN_OP(0x10) | INSN_FUNC2(0))
+#define INSN_SUBL      (INSN_OP(0x10) | INSN_FUNC2(0x9))
+#define INSN_ADDQ      (INSN_OP(0x10) | INSN_FUNC2(0x20))
+#define INSN_SUBQ      (INSN_OP(0x10) | INSN_FUNC2(0x29))
+#define INSN_CMPEQ     (INSN_OP(0x10) | INSN_FUNC2(0x2d))
+#define INSN_CMPLT     (INSN_OP(0x10) | INSN_FUNC2(0x4d))
+#define INSN_CMPLE     (INSN_OP(0x10) | INSN_FUNC2(0x6d))
+#define INSN_CMPULT    (INSN_OP(0x10) | INSN_FUNC2(0x1d))
+#define INSN_CMPULE    (INSN_OP(0x10) | INSN_FUNC2(0x3d))
+#define INSN_MULL      (INSN_OP(0x13) | INSN_FUNC2(0))
+#define INSN_MULQ      (INSN_OP(0x13) | INSN_FUNC2(0x20))
+#define INSN_AND       (INSN_OP(0x11) | INSN_FUNC2(0))
+#define INSN_BIS       (INSN_OP(0x11) | INSN_FUNC2(0x20))
+#define INSN_XOR       (INSN_OP(0x11) | INSN_FUNC2(0x40))
+#define INSN_SLL       (INSN_OP(0x12) | INSN_FUNC2(0x39))
+#define INSN_SRL       (INSN_OP(0x12) | INSN_FUNC2(0x34))
+#define INSN_SRA       (INSN_OP(0x12) | INSN_FUNC2(0x3c))
+#define INSN_ZAPNOT    (INSN_OP(0x12) | INSN_FUNC2(0x31))
+#define INSN_SEXTB     (INSN_OP(0x1c) | INSN_FUNC2(0))
+#define INSN_SEXTW     (INSN_OP(0x1c) | INSN_FUNC2(0x1))
+#define INSN_LDA       INSN_OP(0x8)
+#define INSN_LDAH      INSN_OP(0x9)
+#define INSN_LDBU      INSN_OP(0xa)
+#define INSN_LDWU      INSN_OP(0xc)
+#define INSN_LDL       INSN_OP(0x28)
+#define INSN_LDQ       INSN_OP(0x29)
+#define INSN_STB       INSN_OP(0xe)
+#define INSN_STW       INSN_OP(0xd)
+#define INSN_STL       INSN_OP(0x2c)
+#define INSN_STQ       INSN_OP(0x2d)
+
+/*
+ * return the # of regs used for parameter passing on procedure calling.
+ * note that alpha use $16~$21 to transfer the first 6 paramenters of a procedure.
+*/
+static inline int tcg_target_get_call_iarg_regs_count(int flags)
+{
+    return 6;
+}
+
+/*
+ * given constraint, return available register set. this function is called once
+ * for each op at qemu's initialization stage.
+*/
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+    const char *ct_str = *pct_str;
+
+    switch(ct_str[0]) 
+    {
+    case 'r':
+        /* constaint 'r' means any register is okay */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        break;
+
+    case 'L':
+        /* 
+        * constranit 'L' is used for qemu_ld/st, which has 2 meanings:
+        * 1st, we the argument need to be allocated a register.
+        * 2nd, we should reserve some registers that belong to caller-clobbered 
+        * list for qemu_ld/st local usage, so these registers must not be 
+        * allocated to the argument that the 'L' constraint is describing.
+        *
+        * note that op qemu_ld/st has the TCG_OPF_CALL_CLOBBER flag, and 
+        * tcg will free all callee-clobbered registers before generate target
+        * insn for qemu_ld/st, so we can use these register directly without
+        * warrying about destroying their content.
+        */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_16);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_17);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_18);
+        break;
+
+    default:
+        return -1;
+    }
+
+    ct_str++;
+    *pct_str = ct_str;
+    return 0;
+}
+
+/*
+ * whether op's input argument may use constant 
+*/
+static inline int tcg_target_const_match( \
+	tcg_target_long val, const TCGArgConstraint *arg_ct)
+{
+    int ct = arg_ct->ct;
+    return (ct & TCG_CT_CONST) ? 1 : 0;
+}
+
+static inline void tcg_out_fmt_br(TCGContext *s, int opc, int ra, int disp)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_DISP21(disp));
+}
+
+static inline void tcg_out_fmt_mem(TCGContext *s, int opc, int ra, int rb, int disp)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_DISP16(disp));
+}
+
+static inline void tcg_out_fmt_jmp(TCGContext *s, int opc, int ra, int rb, int rsved)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_RSVED(rsved));
+}
+
+static inline void tcg_out_fmt_opr(TCGContext *s, int opc, int ra, int rb, int rc)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_RC(rc));
+}
+
+static inline void tcg_out_fmt_opi(TCGContext *s, int opc, int ra, int lit, int rc)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_LIT(lit)|INSN_RC(rc)|(1<<12));
+}
+
+/*
+ * mov from a reg to another
+*/
+static inline void tcg_out_mov(TCGContext *s, int rc, int rb)
+{  
+    if ( rb != rc ) {
+        tcg_out_fmt_opr(s, INSN_BIS, TCG_REG_31, rb, rc);
+    }
+}
+
+/*
+ * mov a 64-bit immediate 'arg' to regsiter 'ra', this function will
+ * generate fixed length (5 insns) of target insn sequence.
+*/
+static void tcg_out_movi_fixl( \
+    TCGContext *s, TCGType type, int ra, tcg_target_long arg)
+{
+    tcg_target_long l0, l1, l2, l3;
+    tcg_target_long l1_tmp, l2_tmp, l3_tmp;
+
+    l0 = arg & 0xffffu;
+    l1_tmp = l1 = ( arg >> 16) & 0xffffu;
+    l2_tmp = l2 = ( arg >> 32) & 0xffffu;
+    l3_tmp = l3 = ( arg >> 48) & 0xffffu;
+
+    if ( l0 & 0x8000u)
+        l1_tmp = (l1 + 1) & 0xffffu;
+    if ( (l1_tmp & 0x8000u) || ((l1_tmp == 0) && (l1_tmp != l1)))
+        l2_tmp = (l2 + 1) & 0xffffu;
+    if ( (l2_tmp & 0x8000u) || ((l2_tmp == 0) && (l2_tmp != l2)))
+        l3_tmp = (l3 + 1) & 0xffffu;
+
+    tcg_out_fmt_mem(s, INSN_LDAH, ra, TCG_REG_31, l3_tmp);
+    tcg_out_fmt_mem(s, INSN_LDA, ra, ra, l2_tmp);
+    tcg_out_fmt_opi(s, INSN_SLL, ra, 32, ra);
+    tcg_out_fmt_mem(s, INSN_LDAH, ra, ra, l1_tmp);
+    tcg_out_fmt_mem(s, INSN_LDA, ra, ra, l0);
+}
+
+/*
+ * mov 64-bit immediate 'arg' to regsiter 'ra'. this function will
+ * generate variable length of target insn sequence.
+*/
+static inline void tcg_out_movi( \
+    TCGContext *s, TCGType type, int ra, tcg_target_long arg)
+{
+    if ( type == TCG_TYPE_I32)
+        arg = (int32_t)arg;
+
+    if( arg == (int16_t)arg ) {
+        tcg_out_fmt_mem(s, INSN_LDA, ra, TCG_REG_31, arg);
+    } else if( arg == (int32_t)arg ) {
+        tcg_out_fmt_mem(s, INSN_LDAH, ra, TCG_REG_31, (arg>>16));
+        if( arg & ((tcg_target_ulong)0x8000) ) {
+            tcg_out_fmt_mem(s, INSN_LDAH, ra, ra, 1);
+        }
+        tcg_out_fmt_mem(s, INSN_LDA, ra, ra, arg);
+    } else {
+        tcg_out_movi_fixl(s, type, ra, arg);
+    }
+}
+
+static inline int _is_tmp_reg( int r)
+{
+    if ( r == TMP_REG1 || r == TMP_REG2 || r == TMP_REG3)
+        return 1;
+    else
+        return 0;
+}
+
+/*
+ * load value in disp(Rb) to Ra.
+*/
+static inline void tcg_out_ld( \
+    TCGContext *s, TCGType type, int ra, int rb, tcg_target_long disp)
+{
+    int opc;
+    
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+
+    opc = ((type == TCG_TYPE_I32) ? INSN_LDL : INSN_LDQ);
+
+    if( disp != (int16_t)disp ) {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    }
+    else
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+}
+
+/*
+ * store value in Ra to disp(Rb).
+*/
+static inline void tcg_out_st( \
+    TCGContext *s, TCGType type, int ra, int rb, tcg_target_long disp)
+{
+    int opc;
+
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+    
+    opc = ((type == TCG_TYPE_I32) ? INSN_STL : INSN_STQ);
+
+    if( disp != (int16_t)disp ) {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    }
+    else
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+}
+
+/*
+ * generate arithmatic instruction with immediate. ra is used as both
+ * input and output, and val is used as another input.
+*/
+static inline void tgen_arithi( \
+    TCGContext *s, int opc, int ra, tcg_target_long val)
+{
+    if ( _is_tmp_reg(ra))
+        tcg_abort();
+
+    if (val == (uint8_t)val) {
+        tcg_out_fmt_opi(s, opc, ra, val, ra);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val);
+        tcg_out_fmt_opr(s, opc, ra, TMP_REG1, ra);
+    }
+}
+
+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+{
+    if (val != 0)
+        tgen_arithi(s, INSN_ADDQ, reg, val);
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+    tcg_out_fmt_opi(s, INSN_SUBQ, TCG_REG_30, 8, TCG_REG_30);
+    tcg_out_fmt_mem(s, INSN_STQ, reg, TCG_REG_30, 0);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+    tcg_out_fmt_mem(s, INSN_LDQ, reg, TCG_REG_30, 0);
+    tcg_out_fmt_opi(s, INSN_ADDQ, TCG_REG_30, 8, TCG_REG_30);
+}
+
+static const uint64_t tcg_cond_to_jcc[10] = {
+    [TCG_COND_EQ] = INSN_CMPEQ,
+    [TCG_COND_NE] = INSN_CMPEQ,
+    [TCG_COND_LT] = INSN_CMPLT,
+    [TCG_COND_GE] = INSN_CMPLT,
+    [TCG_COND_LE] = INSN_CMPLE,
+    [TCG_COND_GT] = INSN_CMPLE,
+    [TCG_COND_LTU] = INSN_CMPULT,
+    [TCG_COND_GEU] = INSN_CMPULT,
+    [TCG_COND_LEU] = INSN_CMPULE,
+    [TCG_COND_GTU] = INSN_CMPULE
+};
+
+static void patch_reloc(uint8_t *code_ptr, \
+    int type, tcg_target_long value, tcg_target_long addend)
+{
+    TCGContext s;
+    tcg_target_long val;
+
+    if ( type != R_ALPHA_BRADDR)
+        tcg_abort();
+    
+    s.code_ptr = code_ptr;
+    val = (value - (tcg_target_long)s.code_ptr - 4) >> 2; 
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+
+    tcg_out_fmt_br(&s, INSN_BR, TCG_REG_31, val);
+}
+
+static void tcg_out_br(TCGContext *s, int label_index)
+{
+    TCGLabel *l = &s->labels[label_index];
+
+    if (l->has_value) {
+        tcg_target_long val;
+        val = ((tcg_target_long)(l->u.value) - (tcg_target_long)s->code_ptr - 4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            tcg_out_fmt_br(s, INSN_BR, TCG_REG_31, val);
+	} else {
+            tcg_abort();
+	}
+    } else {
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0);
+        s->code_ptr += 4;
+    }
+}
+
+static void tcg_out_brcond( TCGContext *s, int cond, \
+    TCGArg arg1, TCGArg arg2, int const_arg2, int label_index)
+{
+    int opc;
+    TCGLabel *l = &s->labels[label_index];
+
+    if ( cond < TCG_COND_EQ || cond > TCG_COND_GTU || const_arg2)
+        tcg_abort();
+
+    opc = tcg_cond_to_jcc[cond];
+    tcg_out_fmt_opr(s, opc, arg1, arg2, TMP_REG1);
+
+    if (l->has_value) {
+        tcg_target_long val;
+        val = ((tcg_target_long)l->u.value - (tcg_target_long)s->code_ptr - 4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            opc = (cond & 1) ? INSN_BLBC : INSN_BLBS;
+            tcg_out_fmt_br(s, opc, TMP_REG1, val);
+	} else {
+            tcg_abort();
+	}
+    } else {
+        opc = (cond & 1) ? INSN_BLBS : INSN_BLBC;
+        tcg_out_fmt_br(s, opc, TMP_REG1, 1);
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0);
+        s->code_ptr += 4;
+    }
+}
+
+#if defined(CONFIG_SOFTMMU)
+
+#include "../../softmmu_defs.h"
+
+static void *qemu_ld_helpers[4] = {
+    __ldb_mmu,
+    __ldw_mmu,
+    __ldl_mmu,
+    __ldq_mmu,
+};
+
+static void *qemu_st_helpers[4] = {
+    __stb_mmu,
+    __stw_mmu,
+    __stl_mmu,
+    __stq_mmu,
+};
+
+#endif
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+{
+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;
+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)
+    uint8_t *label1_ptr, *label2_ptr;
+#endif
+
+    data_reg = *args++;
+    addr_reg = *args++;
+    mem_index = *args;
+    s_bits = opc & 3;
+
+    r0 = TCG_REG_16;
+    r1 = TCG_REG_17;
+
+#if defined(CONFIG_SOFTMMU)
+
+    tcg_out_mov(s, r1, addr_reg); 
+    tcg_out_mov(s, r0, addr_reg); 
+ 
+#if TARGET_LONG_BITS == 32
+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r1, 0x0f, r1);
+#endif
+
+    tgen_arithi(s, INSN_AND, r0, TARGET_PAGE_MASK|((1<<s_bits)-1));
+
+    tgen_arithi(s, INSN_SRL, r1, TARGET_PAGE_BITS-CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, INSN_AND, r1, (CPU_TLB_SIZE-1)<<CPU_TLB_ENTRY_BITS);
+    
+    tcg_out_addi(s, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_REG_15, r1);
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_mem(s, INSN_LDL, TMP_REG1, r1, 0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, TMP_REG1, 0x0f, TMP_REG1);
+#else
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+#endif
+		
+    //
+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read
+    // we below will compare them
+    //
+    tcg_out_fmt_opr(s, INSN_CMPEQ, TMP_REG1, r0, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+#endif
+
+    //
+    // if equal, we jump to label1. since label1 is not resolved yet, 
+    // we just record a relocation.
+    //
+    label1_ptr = s->code_ptr;
+    s->code_ptr += 4;
+
+    //
+    // here, unequal, TLB-miss.
+    //
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_17, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)qemu_ld_helpers[s_bits]);
+    tcg_out_push(s, addr_reg);
+    //tcg_out_push(s, TCG_REG_26);
+    //tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27, TMP_REG1);
+    tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, TMP_REG1, 0);
+    //tcg_out_pop(s, TCG_REG_15);
+    //tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+	
+    //
+    // after helper function call, the result of ld is saved in $0
+    //
+    switch(opc) {
+    case 0 | 4:
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, TCG_REG_0, data_reg);
+        break;
+    case 1 | 4:
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, TCG_REG_0, data_reg);
+        break;
+    case 2 | 4:
+        tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_0, TCG_REG_31, data_reg);
+        break;
+    case 0:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0x1, data_reg);
+        break;
+    case 1:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0x3, data_reg);
+        break;
+    case 2:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0xf, data_reg);
+        break;
+    case 3:
+        tcg_out_mov(s, data_reg, TCG_REG_0);
+        break;
+    default:
+        tcg_abort();
+        break;
+    }
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet, 
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    
+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( INSN_BNE | ( TMP_REG1 << 21 ) | ( val & 0x1fffff));
+
+    //
+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 
+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should
+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 
+    // r0 to get the host VA
+    //
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \
+	offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TMP_REG1, r1);
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, TMP_REG1, r0);
+	
+#else
+    r0 = addr_reg;
+#endif // endif defined(CONFIG_SOFTMMU)
+
+#ifdef TARGET_WORDS_BIGENDIAN
+    tcg_abort();
+#endif
+
+    //
+    // when we get here, r0 contains the host VA that can be used to access guest PA
+    //
+    switch(opc) {
+    case 0:
+        tcg_out_fmt_mem(s, INSN_LDBU, data_reg, r0, 0);
+        break;
+    case 0 | 4:
+        tcg_out_fmt_mem(s, INSN_LDBU, data_reg, r0, 0);
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, data_reg, data_reg);
+        break;
+    case 1:
+        tcg_out_fmt_mem(s, INSN_LDWU, data_reg, r0, 0);
+        break;
+    case 1 | 4:
+        tcg_out_fmt_mem(s, INSN_LDWU, data_reg, r0, 0);
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, data_reg, data_reg);
+        break;
+    case 2:
+        tcg_out_fmt_mem(s, INSN_LDL, data_reg, r0, 0);
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, data_reg, 0xf, data_reg);
+        break;
+    case 2 | 4:
+        tcg_out_fmt_mem(s, INSN_LDL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_fmt_mem(s, INSN_LDQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)
+    /* label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( INSN_BR \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff) );
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+{
+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;
+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)
+    uint8_t *label1_ptr, *label2_ptr;
+#endif
+
+    data_reg = *args++;
+    addr_reg = *args++;
+    mem_index = *args;
+    s_bits = opc&3;
+
+    r0 = TCG_REG_16;
+    r1 = TCG_REG_17;
+
+#if defined(CONFIG_SOFTMMU)
+
+    tcg_out_mov(s, r1, addr_reg); 
+    tcg_out_mov(s, r0, addr_reg); 
+ 
+#if TARGET_LONG_BITS == 32
+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r1, 0x0f, r1);
+#endif
+
+    tgen_arithi(s, INSN_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+    tgen_arithi(s, INSN_SRL, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, INSN_AND, r1, (CPU_TLB_SIZE-1) << CPU_TLB_ENTRY_BITS);
+
+    tcg_out_addi(s, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_REG_15, r1);
+
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_mem(s, INSN_LDL, TMP_REG1, r1, 0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, TMP_REG1, 0x0f, TMP_REG1);
+#else
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+#endif
+
+    //
+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read
+    // we below will compare them
+    //    
+    tcg_out_fmt_opr(s, INSN_CMPEQ, TMP_REG1, r0, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, r0, 0x0f, r0);
+#endif
+
+    //
+    // if equal, we jump to label1. since label1 is not resolved yet, 
+    // we just record a relocation.
+    //
+    label1_ptr = s->code_ptr;
+    s->code_ptr += 4;
+
+    // here, unequal, TLB-miss, ...
+    tcg_out_mov(s, TCG_REG_17, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_18, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)qemu_st_helpers[s_bits]);
+        
+    tcg_out_push(s, data_reg);
+    tcg_out_push(s, addr_reg);
+    //tcg_out_push(s, TCG_REG_26);
+    //tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27,TMP_REG1);
+    tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, TMP_REG1, 0);
+    //tcg_out_pop(s, TCG_REG_15);
+    //tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+    tcg_out_pop(s, data_reg);
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet,
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    
+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( INSN_BNE | ( TMP_REG1  << 21 ) | ( val & 0x1fffff));
+
+    //
+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 
+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should
+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 
+    // r0 to get the host VA
+    //
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \
+        offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TMP_REG1, r1);
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1,  0);
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, TMP_REG1, r0);
+
+#else
+    r0 = addr_reg;
+#endif
+
+#ifdef TARGET_WORDS_BIGENDIAN
+    tcg_abort();
+#endif
+
+    //
+    // when we get here, r0 contains the host VA that can be used to access guest PA
+    //
+    switch(opc) {
+    case 0:
+        tcg_out_fmt_mem(s, INSN_STB, data_reg, r0, 0);
+        break;
+    case 1:
+        tcg_out_fmt_mem(s, INSN_STW, data_reg, r0, 0);
+        break;
+    case 2:
+        tcg_out_fmt_mem(s, INSN_STL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_fmt_mem(s, INSN_STQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)
+    /* patch jmp to label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( INSN_BR \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff));
+#endif
+}
+
+static inline void tgen_ldxx( TCGContext *s, int ra, int rb, tcg_target_long disp, int flags)
+{
+    int opc_array[4] = { INSN_LDBU, INSN_LDWU, INSN_LDL, INSN_LDQ};
+    int opc = opc_array[flags & 3];
+
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+
+    if( disp != (int16_t)disp ) {
+        /* disp cannot be stored in insn directly */
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);	
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    } else {
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+    }
+
+    switch ( flags & 7)	{
+    case 0:
+    case 1:
+    case 2|4:
+    case 3:
+        break;
+    case 0|4:
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, ra, ra);
+        break;
+    case 1|4:
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, ra, ra);
+        break;
+    case 2:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0x0f, ra);
+        break;
+    default:
+        tcg_abort();
+    }
+}
+
+static inline void tgen_stxx( TCGContext *s, int ra, int rb, tcg_target_long disp, int flags)
+{
+    int opc_array[4] = { INSN_STB, INSN_STW, INSN_STL, INSN_STQ};
+    int opc = opc_array[flags & 3];
+
+    if( disp != (int16_t)disp ) {
+        /* disp cannot be stored in insn directly */
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    } else {
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+    }
+}
+
+static inline void tcg_out_op(TCGContext *s, \
+	int opc, const TCGArg *args, const int *const_args)
+{
+    int oc;
+    switch(opc)
+    {
+    case INDEX_op_exit_tb:
+        /*
+         * exit_tb t0, where t0 is always constant and should be returned to engine
+         * since we'll back to engine soon, $0 and $1 will never be used
+        */
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_0, args[0]);
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)tb_ret_addr);
+   	tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TMP_REG1, 0);
+        break;
+
+    case INDEX_op_goto_tb:
+        /* goto_tb idx, where idx is constant 0 or 1, indicating the branch # */
+        if (s->tb_jmp_offset) {
+            /* we don't support direct jmp */
+            tcg_abort();
+        } else {
+            tcg_out_movi( s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, TMP_REG1, 0);
+            tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TMP_REG1, 0);
+        }
+        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        break;
+
+    case INDEX_op_call:
+        if (const_args[0]) {
+            tcg_abort();
+	} else {
+            //tcg_out_push( s, TCG_REG_26);
+            //tcg_out_push( s, TCG_REG_15);
+            tcg_out_mov( s, TCG_REG_27, args[0]);
+            tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, args[0], 0);
+            //tcg_out_pop( s, TCG_REG_15);
+            //tcg_out_pop( s, TCG_REG_26);
+        }
+        break;
+
+    case INDEX_op_jmp: 
+        if (const_args[0]) {
+            tcg_abort();
+        } else {
+            tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, args[0], 0);
+        }
+        break;
+
+    case INDEX_op_br:
+        tcg_out_br(s, args[0]);
+        break;
+
+    case INDEX_op_ld8u_i32: 
+    case INDEX_op_ld8u_i64:
+        tgen_ldxx( s, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_ld8s_i32: 
+    case INDEX_op_ld8s_i64: 
+        tgen_ldxx( s, args[0], args[1], args[2], 0|4);
+        break;
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+        tgen_ldxx( s, args[0], args[1], args[2], 1);
+        break;
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64: 
+        tgen_ldxx( s, args[0], args[1], args[2], 1|4);
+        break;\r	
+    case INDEX_op_ld32u_i64: 
+        tgen_ldxx( s, args[0], args[1], args[2], 2);
+        break;
+    case INDEX_op_ld_i32: 
+    case INDEX_op_ld32s_i64:
+        tgen_ldxx( s, args[0], args[1], args[2], 2|4);
+        break;
+    case INDEX_op_ld_i64: 
+        tgen_ldxx( s, args[0], args[1], args[2], 3);
+        break;
+		
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64: 
+        tgen_stxx( s, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64: 
+        tgen_stxx( s, args[0], args[1], args[2], 1);
+        break;
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64: 
+        tgen_stxx( s, args[0], args[1], args[2], 2);
+        break;
+    case INDEX_op_st_i64: 
+        tgen_stxx( s, args[0], args[1], args[2], 3);
+        break;
+
+    case INDEX_op_add_i32: 
+    case INDEX_op_add_i64: 
+        oc = INSN_ADDQ;
+        goto gen_arith;
+    case INDEX_op_sub_i32: 
+    case INDEX_op_sub_i64:
+        oc = INSN_SUBQ;
+        goto gen_arith;
+    case INDEX_op_mul_i32: 
+        oc = INSN_MULL;
+	goto gen_arith;
+    case INDEX_op_mul_i64: 
+        oc = INSN_MULQ;
+        goto gen_arith;
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+        oc = INSN_AND;
+        goto gen_arith;
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64: 
+        oc = INSN_BIS;
+        goto gen_arith;
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        oc = INSN_XOR;
+	goto gen_arith;
+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:
+        oc = INSN_SLL;
+	goto gen_arith;
+    case INDEX_op_shr_i32:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, args[1], 0x0f, args[1]);
+    case INDEX_op_shr_i64:
+        oc = INSN_SRL;
+        goto gen_arith;
+    case INDEX_op_sar_i32:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[1]);
+    case INDEX_op_sar_i64:
+        oc = INSN_SRA;
+    gen_arith:
+        if (const_args[2]) {
+            tcg_abort();
+        } else {
+            tcg_out_fmt_opr(s, oc, args[1], args[2], args[0]);
+        }
+        break;
+
+    case INDEX_op_brcond_i32:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[0], TCG_REG_31, args[0]);
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[1]);
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        break;
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        break;
+
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, args[1], args[0]);
+        break;
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, args[1], args[0]);
+        break;
+    case INDEX_op_ext32s_i64:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[0]);
+        break;
+    
+    case INDEX_op_qemu_ld8u:
+        tcg_out_qemu_ld(s, args, 0);
+        break;
+    case INDEX_op_qemu_ld8s:
+        tcg_out_qemu_ld(s, args, 0 | 4);
+        break;
+    case INDEX_op_qemu_ld16u:
+        tcg_out_qemu_ld(s, args, 1);
+        break;
+    case INDEX_op_qemu_ld16s:
+        tcg_out_qemu_ld(s, args, 1 | 4);
+        break;
+    case INDEX_op_qemu_ld32u:
+        tcg_out_qemu_ld(s, args, 2);
+        break;
+    case INDEX_op_qemu_ld32s:
+        tcg_out_qemu_ld(s, args, 2 | 4);
+        break;
+    case INDEX_op_qemu_ld64:
+        tcg_out_qemu_ld(s, args, 3);
+        break;
+
+    case INDEX_op_qemu_st8:
+        tcg_out_qemu_st(s, args, 0);
+        break;
+    case INDEX_op_qemu_st16:
+        tcg_out_qemu_st(s, args, 1);
+        break;
+    case INDEX_op_qemu_st32:
+        tcg_out_qemu_st(s, args, 2);
+        break;
+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);
+        break;
+
+    case INDEX_op_movi_i32: 
+    case INDEX_op_movi_i64: 
+    case INDEX_op_mov_i32: 
+    case INDEX_op_mov_i64:
+    case INDEX_op_div2_i32:
+    case INDEX_op_divu2_i32:
+    default:
+        tcg_abort();
+    }
+}
+
+static const TCGTargetOpDef alpha_op_defs[] = {
+    { INDEX_op_exit_tb, { } },
+    { INDEX_op_goto_tb, { } },
+    { INDEX_op_call, { "r" } },
+    { INDEX_op_jmp, { "r" } },
+    { INDEX_op_br, { } },
+
+    { INDEX_op_mov_i32, { "r", "r" } },
+    { INDEX_op_movi_i32, { "r" } },
+    { INDEX_op_ld8u_i32, { "r", "r" } },
+    { INDEX_op_ld8s_i32, { "r", "r" } },
+    { INDEX_op_ld16u_i32, { "r", "r" } },
+    { INDEX_op_ld16s_i32, { "r", "r" } },
+    { INDEX_op_ld_i32, { "r", "r" } },
+    { INDEX_op_st8_i32, { "r", "r" } },
+    { INDEX_op_st16_i32, { "r", "r" } },
+    { INDEX_op_st_i32, { "r", "r" } },
+
+    { INDEX_op_add_i32, { "r", "0", "r" } },
+    { INDEX_op_mul_i32, { "r", "0", "r" } },
+    //{ INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
+    //{ INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
+    { INDEX_op_sub_i32, { "r", "0", "r" } },
+    { INDEX_op_and_i32, { "r", "0", "r" } },
+    { INDEX_op_or_i32, { "r", "0", "r" } },
+    { INDEX_op_xor_i32, { "r", "0", "r" } },
+
+    { INDEX_op_shl_i32, { "r", "0", "r" } },
+    { INDEX_op_shr_i32, { "r", "0", "r" } },
+    { INDEX_op_sar_i32, { "r", "0", "r" } },
+
+    { INDEX_op_brcond_i32, { "r", "r" } },		
+
+    { INDEX_op_mov_i64, { "r", "r" } },	
+    { INDEX_op_movi_i64, { "r" } },
+    { INDEX_op_ld8u_i64, { "r", "r" } },
+    { INDEX_op_ld8s_i64, { "r", "r" } },
+    { INDEX_op_ld16u_i64, { "r", "r" } },
+    { INDEX_op_ld16s_i64, { "r", "r" } },
+    { INDEX_op_ld32u_i64, { "r", "r" } },
+    { INDEX_op_ld32s_i64, { "r", "r" } },
+    { INDEX_op_ld_i64, { "r", "r" } },
+    { INDEX_op_st8_i64, { "r", "r" } },	
+    { INDEX_op_st16_i64, { "r", "r" } },
+    { INDEX_op_st32_i64, { "r", "r" } },
+    { INDEX_op_st_i64, { "r", "r" } },
+
+    { INDEX_op_add_i64, { "r", "0", "r" } },
+    { INDEX_op_mul_i64, { "r", "0", "r" } },
+    //{ INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
+    //{ INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
+    { INDEX_op_sub_i64, { "r", "0", "r" } },
+    { INDEX_op_and_i64, { "r", "0", "r" } },
+    { INDEX_op_or_i64, { "r", "0", "r" } },
+    { INDEX_op_xor_i64, { "r", "0", "r" } },
+
+    { INDEX_op_shl_i64, { "r", "0", "r" } },
+    { INDEX_op_shr_i64, { "r", "0", "r" } },
+    { INDEX_op_sar_i64, { "r", "0", "r" } },
+
+    { INDEX_op_brcond_i64, { "r", "r" } },
+
+    { INDEX_op_ext8s_i32, { "r", "r"} },
+    { INDEX_op_ext16s_i32, { "r", "r"} },
+    { INDEX_op_ext8s_i64, { "r", "r"} },
+    { INDEX_op_ext16s_i64, { "r", "r"} },
+    { INDEX_op_ext32s_i64, { "r", "r"} },
+
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32s, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
+    //{ INDEX_op_qemu_st64, { "L", "L", "L"} },
+    { INDEX_op_qemu_st64, { "L", "L"} },
+    { -1 },
+};
+
+
+static int tcg_target_callee_save_regs[] = {
+    TCG_REG_15,		// used for the global env, so no need to save
+    TCG_REG_9,
+    TCG_REG_10,
+    TCG_REG_11,
+    TCG_REG_12,
+    TCG_REG_13,
+    TCG_REG_14
+};
+
+/*
+ * Generate global QEMU prologue and epilogue code 
+*/
+void tcg_target_qemu_prologue(TCGContext *s)
+{
+    int i, frame_size, push_size, stack_addend;
+   
+    /* TB prologue */
+    /*printf("TB prologue @ %lx\n", s->code_ptr);*/
+	
+    /* save TCG_REG_26 */
+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_27);
+    tcg_out_push(s, TCG_REG_28);
+    tcg_out_push(s, TCG_REG_29);
+
+    /* save all callee saved registers */
+    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_push(s, tcg_target_callee_save_regs[i]);
+    }
+	
+    /* reserve some stack space */
+    push_size = 8 + (4 + ARRAY_SIZE(tcg_target_callee_save_regs)) * 8;
+    frame_size = push_size + 4*TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1);
+    stack_addend = frame_size - push_size;
+    tcg_out_addi(s, TCG_REG_30, -stack_addend);
+
+    tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TCG_REG_16, 0);		/* jmp $16 */
+
+    /* TB epilogue */
+    tb_ret_addr = s->code_ptr;
+    tcg_out_addi(s, TCG_REG_30, stack_addend);
+    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
+        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
+    }
+
+    tcg_out_pop(s, TCG_REG_29);
+    tcg_out_pop(s, TCG_REG_28);
+    tcg_out_pop(s, TCG_REG_27);
+    tcg_out_pop(s, TCG_REG_26);
+    tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_31, TCG_REG_26, 0);		/* ret */
+}
+
+
+void tcg_target_init(TCGContext *s)
+{
+    /* fail safe */
+    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
+        tcg_abort();
+
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+		(1 << TCG_REG_1  ) | (1 << TCG_REG_2 ) | (1 << TCG_REG_3  ) | (1 << TCG_REG_4 ) |
+		(1 << TCG_REG_5  ) | (1 << TCG_REG_6 ) | (1 << TCG_REG_7  ) | (1 << TCG_REG_8 ) | 
+		(1 << TCG_REG_22) | (1 << TCG_REG_23) | (1 << TCG_REG_24) | (1 << TCG_REG_25) | 
+              (1 << TCG_REG_16) | (1 << TCG_REG_17) | (1 << TCG_REG_18) | (1 << TCG_REG_19) | 
+              (1 << TCG_REG_20) | (1 << TCG_REG_21) | (1 << TCG_REG_0 ));
+
+    //tcg_regset_set32( tcg_target_call_clobber_regs, 0, 0xffffffff);
+    
+    tcg_regset_clear(s->reserved_regs);
+    // $26~$31 not allocated by tcg.c
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_26);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_27);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_28);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_29);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_30);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_31);
+    // resved registers for tmp usage
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG1);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG2);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG3);
+
+    tcg_add_target_add_op_defs(alpha_op_defs);
+}
+
diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h
new file mode 100644
index 0000000..79c57af
--- /dev/null
+++ b/tcg/alpha/tcg-target.h
@@ -0,0 +1,70 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define TCG_TARGET_ALPHA 1
+
+#define TCG_TARGET_REG_BITS 64
+
+#define TCG_TARGET_NB_REGS 32
+
+enum {
+    TCG_REG_0 = 0, TCG_REG_1, TCG_REG_2, TCG_REG_3,
+    TCG_REG_4, TCG_REG_5, TCG_REG_6, TCG_REG_7,
+    TCG_REG_8, TCG_REG_9, TCG_REG_10, TCG_REG_11,
+    TCG_REG_12, TCG_REG_13, TCG_REG_14, TCG_REG_15,
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19,
+    TCG_REG_20, TCG_REG_21, TCG_REG_22, TCG_REG_23,
+    TCG_REG_24, TCG_REG_25, TCG_REG_26, TCG_REG_27,
+    TCG_REG_28, TCG_REG_29, TCG_REG_30, TCG_REG_31
+};
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_30
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* we have signed extension instructions */
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+#define TCG_TARGET_HAS_ext32s_i64
+
+/* Note: must be synced with dyngen-exec.h */
+#define TCG_AREG0 TCG_REG_15
+#define TCG_AREG1 TCG_REG_9
+#define TCG_AREG2 TCG_REG_10
+#define TCG_AREG3 TCG_REG_11
+#define TCG_AREG4 TCG_REG_12
+#define TCG_AREG5 TCG_REG_13
+#define TCG_AREG6 TCG_REG_14
+
+#define TMP_REG1 TCG_REG_23
+#define TMP_REG2 TCG_REG_24
+#define TMP_REG3 TCG_REG_25
+
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    __asm__ __volatile__ ("call_pal 0x86");
+}
+
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [Qemu-devel] [PATCH] Porting TCG to alpha platform
@ 2010-01-21  3:42 identifier scorpio
  2010-01-21 18:18 ` Stefan Weil
  0 siblings, 1 reply; 21+ messages in thread
From: identifier scorpio @ 2010-01-21  3:42 UTC (permalink / raw)
  To: Stefan Weil; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 636 bytes --]

Thank Mr. Weil for your reply.
 
> 
> Maybe you can also try the TCG interpreter (TCI) from
> http://repo.or.cz/w/qemu/ar7.git.
> In theory, it supports any host architecture with or
> without native TCG
> support.
> 
> It was tested successful with some basic tests on x86,
> mips, ppc and arm,
> so I hope it will run on alpha, too.
>

so that means i have to learn another set of interface?
is TCI more simple or straightforward  than TCG?

Dong Weiyu.



      ___________________________________________________________ 
  好玩贺卡等你发,邮箱贺卡全新上线! 
http://card.mail.cn.yahoo.com/

[-- Attachment #2: Type: text/html, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [Qemu-devel] [PATCH] Porting TCG to alpha platform
@ 2010-01-22 15:47 identifier scorpio
  2010-01-22 18:00 ` Richard Henderson
  2010-01-26  1:19 ` Richard Henderson
  0 siblings, 2 replies; 21+ messages in thread
From: identifier scorpio @ 2010-01-22 15:47 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 989 bytes --]

Hi, Richard.

I almost re-write the whole code under your help and now it seems somehow elegant. Unfortunately, it still can't run MS windows, I wonder whether there are some other special places in QEMU to be modifed for alpha case, beside the 3 files (cpu-all.h/tcg-target.c/tcg-target.h) I write or modified. 

Is there any good method to find the bug except "guess and try"? And it seems that few people is interested in porting QEMU/TCG to alpha platform, why? just because alpha machine is disappearing? And may the patch in current stage be accepted by QEMU, i just want to attract more eye-balls and to get things done.

Anyhow, I'll continue to work on it and any help from you will be appreciated. I append the newest patch in attachment, please take a look if it doesn't waste too much your time.

Thanks.

Dong Weiyu






      ___________________________________________________________ 
  好玩贺卡等你发,邮箱贺卡全新上线! 
http://card.mail.cn.yahoo.com/

[-- Attachment #1.2: Type: text/html, Size: 1142 bytes --]

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Porting-TCG-to-alpha-platform.v2.patch --]
[-- Type: text/x-patch; name="0001-Porting-TCG-to-alpha-platform.v2.patch", Size: 37704 bytes --]

From d51491a60ccf9ab91eb963ba07be2e590afda71d Mon Sep 17 00:00:00 2001
From: Dong Weiyu <cidentifier@yahoo.com.cn>
Date: Fri, 22 Jan 2010 23:10:43 +0800
Subject: [PATCH] Porting TCG to alpha platform

---
 cpu-all.h              |    2 +-
 tcg/alpha/tcg-target.c | 1081 ++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/alpha/tcg-target.h |   68 +++
 3 files changed, 1150 insertions(+), 1 deletions(-)
 create mode 100644 tcg/alpha/tcg-target.c
 create mode 100644 tcg/alpha/tcg-target.h

diff --git a/cpu-all.h b/cpu-all.h
index e0c3efd..bdf6fb2 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,7 +22,7 @@
 
 #include "qemu-common.h"
 
-#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__)
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__alpha__)
 #define WORDS_ALIGNED
 #endif
 
diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c
new file mode 100644
index 0000000..977c9b1
--- /dev/null
+++ b/tcg/alpha/tcg-target.c
@@ -0,0 +1,1081 @@
+/*
+ * Tiny Code Generator for QEMU on ALPHA platform
+*/
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
+    "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",
+    "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
+    "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",
+};
+#endif
+
+/* 
+ * $26, $27, $29, $30, $31 are special, reserved, 
+ * $28 is reserved as temporary register.
+*/
+static const int tcg_target_reg_alloc_order[] = {
+    TCG_REG_9, TCG_REG_10, TCG_REG_11, TCG_REG_12, TCG_REG_13, TCG_REG_14,
+    TCG_REG_1, TCG_REG_2, TCG_REG_3, TCG_REG_4, TCG_REG_5, TCG_REG_6,
+    TCG_REG_7, TCG_REG_8, TCG_REG_22, TCG_REG_23, TCG_REG_24, TCG_REG_25,
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};
+
+/*
+ * according to alpha calling convention, these 6 registers are used for 
+ * function parameter passing. if function has more than 6 parameters, remained
+ * ones are stored on stack.
+*/
+static const int tcg_target_call_iarg_regs[6] = { 
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};
+
+/*
+ * according to alpha calling convention, $0 is used for returning function result.
+*/
+static const int tcg_target_call_oarg_regs[1] = { TCG_REG_0 };
+
+/*
+ * save the address of TB's epilogue.
+*/
+static uint8_t *tb_ret_addr;
+
+#define INSN_OP(x)     (((x) & 0x3f) << 26)
+#define INSN_FUNC1(x)  (((x) & 0x3) << 14)
+#define INSN_FUNC2(x)  (((x) & 0x7f) << 5)
+#define INSN_RA(x)     (((x) & 0x1f) << 21)
+#define INSN_RB(x)     (((x) & 0x1f) << 16)
+#define INSN_RC(x)     ((x) & 0x1f)
+#define INSN_LIT(x)    (((x) & 0xff) << 13)
+#define INSN_DISP16(x) ((x) & 0xffff)
+#define INSN_DISP21(x) ((x) & 0x1fffff)
+#define INSN_RSVED(x)  ((x) & 0x3fff)
+
+#define INSN_JMP       (INSN_OP(0x1a) | INSN_FUNC1(0))
+#define INSN_CALL      (INSN_OP(0x1a) | INSN_FUNC1(1))
+#define INSN_RET       (INSN_OP(0x1a) | INSN_FUNC1(2))
+#define INSN_BR        INSN_OP(0x30)
+#define INSN_BEQ       INSN_OP(0x39)
+#define INSN_BNE       INSN_OP(0x3d)
+#define INSN_BLBC      INSN_OP(0x38)
+#define INSN_BLBS      INSN_OP(0x3c)
+#define INSN_ADDL      (INSN_OP(0x10) | INSN_FUNC2(0))
+#define INSN_SUBL      (INSN_OP(0x10) | INSN_FUNC2(0x9))
+#define INSN_ADDQ      (INSN_OP(0x10) | INSN_FUNC2(0x20))
+#define INSN_SUBQ      (INSN_OP(0x10) | INSN_FUNC2(0x29))
+#define INSN_CMPEQ     (INSN_OP(0x10) | INSN_FUNC2(0x2d))
+#define INSN_CMPLT     (INSN_OP(0x10) | INSN_FUNC2(0x4d))
+#define INSN_CMPLE     (INSN_OP(0x10) | INSN_FUNC2(0x6d))
+#define INSN_CMPULT    (INSN_OP(0x10) | INSN_FUNC2(0x1d))
+#define INSN_CMPULE    (INSN_OP(0x10) | INSN_FUNC2(0x3d))
+#define INSN_MULL      (INSN_OP(0x13) | INSN_FUNC2(0))
+#define INSN_MULQ      (INSN_OP(0x13) | INSN_FUNC2(0x20))
+#define INSN_AND       (INSN_OP(0x11) | INSN_FUNC2(0))
+#define INSN_BIS       (INSN_OP(0x11) | INSN_FUNC2(0x20))
+#define INSN_XOR       (INSN_OP(0x11) | INSN_FUNC2(0x40))
+#define INSN_SLL       (INSN_OP(0x12) | INSN_FUNC2(0x39))
+#define INSN_SRL       (INSN_OP(0x12) | INSN_FUNC2(0x34))
+#define INSN_SRA       (INSN_OP(0x12) | INSN_FUNC2(0x3c))
+#define INSN_ZAPNOT    (INSN_OP(0x12) | INSN_FUNC2(0x31))
+#define INSN_SEXTB     (INSN_OP(0x1c) | INSN_FUNC2(0))
+#define INSN_SEXTW     (INSN_OP(0x1c) | INSN_FUNC2(0x1))
+#define INSN_LDA       INSN_OP(0x8)
+#define INSN_LDAH      INSN_OP(0x9)
+#define INSN_LDBU      INSN_OP(0xa)
+#define INSN_LDWU      INSN_OP(0xc)
+#define INSN_LDL       INSN_OP(0x28)
+#define INSN_LDQ       INSN_OP(0x29)
+#define INSN_STB       INSN_OP(0xe)
+#define INSN_STW       INSN_OP(0xd)
+#define INSN_STL       INSN_OP(0x2c)
+#define INSN_STQ       INSN_OP(0x2d)
+
+/*
+ * return the # of regs used for parameter passing on procedure calling.
+ * note that alpha use $16~$21 to transfer the first 6 paramenters of a procedure.
+*/
+static inline int tcg_target_get_call_iarg_regs_count(int flags)
+{
+    return 6;
+}
+
+/*
+ * given constraint, return available register set. this function is called once
+ * for each op at qemu's initialization stage.
+*/
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+    const char *ct_str = *pct_str;
+
+    switch(ct_str[0]) 
+    {
+    case 'r':
+        /* constaint 'r' means any register is okay */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        break;
+
+    case 'L':
+        /* constraint for qemu_ld/st */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_16);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_17);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_18);
+        break;
+
+    default:
+        return -1;
+    }
+
+    ct_str++;
+    *pct_str = ct_str;
+    return 0;
+}
+
+static inline int tcg_target_const_match( \
+    tcg_target_long val, const TCGArgConstraint *arg_ct)
+{
+    int ct = arg_ct->ct;
+    return (ct & TCG_CT_CONST) ? 1 : 0;
+}
+
+static inline void tcg_out_fmt_br(TCGContext *s, int opc, int ra, int disp)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_DISP21(disp));
+}
+
+static inline void tcg_out_fmt_mem(TCGContext *s, int opc, int ra, int rb, int disp)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_DISP16(disp));
+}
+
+static inline void tcg_out_fmt_jmp(TCGContext *s, int opc, int ra, int rb, int rsved)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_RSVED(rsved));
+}
+
+static inline void tcg_out_fmt_opr(TCGContext *s, int opc, int ra, int rb, int rc)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_RB(rb)|INSN_RC(rc));
+}
+
+static inline void tcg_out_fmt_opi(TCGContext *s, int opc, int ra, int lit, int rc)
+{
+    tcg_out32(s, (opc)|INSN_RA(ra)|INSN_LIT(lit)|INSN_RC(rc)|(1<<12));
+}
+
+/*
+ * mov from a reg to another
+*/
+static inline void tcg_out_mov(TCGContext *s, int rc, int rb)
+{  
+    if ( rb != rc ) {
+        tcg_out_fmt_opr(s, INSN_BIS, TCG_REG_31, rb, rc);
+    }
+}
+
+static inline int _is_tmp_reg( int r)
+{
+    return (r == TMP_REG1);
+}
+
+static void tcg_out_movi( TCGContext *s, \
+    TCGType type, int ra, tcg_target_long orig)
+{
+    long l0, l1, l2=0, l3=0, extra=0;
+    tcg_target_long val = orig;
+    int rs = TCG_REG_31;
+
+    if ( type == TCG_TYPE_I32)
+        val = (int32_t)val;
+
+    l0 = (int16_t)val;
+    val = (val - l0) >> 16;
+    l1 = (int16_t)val;
+
+    if ( orig >> 31 == -1 || orig >> 31 == 0) {
+        if ( l1 < 0 && orig >= 0) {
+            extra = 0x4000;
+            l1 = (int16_t)(val - 0x4000);
+        }
+    } else {
+        val = (val - l1) >> 16;
+        l2 = (int16_t)val;
+        val = (val - l2) >> 16;
+        l3 = (int16_t)val;
+        
+        if (l3) {
+            tcg_out_fmt_mem(s, INSN_LDAH, ra, rs, l3);
+            rs = ra;
+        }
+        if (l2) {
+            tcg_out_fmt_mem(s, INSN_LDA, ra, rs, l2);
+            rs = ra;
+        }
+        if ( l3 || l2)
+            tcg_out_fmt_opi(s, INSN_SLL, ra, 32, ra);
+    }
+    
+    if (l1) {
+        tcg_out_fmt_mem(s, INSN_LDAH, ra, rs, l1);
+        rs = ra;
+    }
+    if (extra) {
+        tcg_out_fmt_mem(s, INSN_LDAH, ra, rs, extra);
+        rs = ra;
+    }
+
+    tcg_out_fmt_mem(s, INSN_LDA, ra, rs, l0);
+}
+
+static void tcg_out_ldst( TCGContext *s, \
+    int opc, int ra, int rb, tcg_target_long disp, int sext)
+{
+    if ( _is_tmp_reg(ra) || _is_tmp_reg(rb))
+        tcg_abort();
+
+    if (disp != (int16_t)disp) {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);
+        tcg_out_fmt_opr(s, INSN_ADDQ, rb, TMP_REG1, TMP_REG1);
+        tcg_out_fmt_mem(s, opc, ra, TMP_REG1, 0);
+    } else {
+        tcg_out_fmt_mem(s, opc, ra, rb, disp);
+    }
+
+    switch ( opc) {
+    case INSN_STB:
+    case INSN_STW:
+    case INSN_STL:
+    case INSN_STQ:
+        break;
+    case INSN_LDBU:
+        if ( sext)
+            tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, ra, ra);
+        break;
+    case INSN_LDWU:
+        if ( sext)
+            tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, ra, ra);
+        break;
+    case INSN_LDL:
+        if ( !sext)
+            tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0xf, ra);
+        break;
+    case INSN_LDQ:
+        break;
+    default:
+        tcg_abort();
+    }
+}
+
+static void tcg_out_ld( TCGContext *s, \
+    int type, int ra, int rb, tcg_target_long disp)
+{
+    int opc = ((type == TCG_TYPE_I32) ? INSN_LDL : INSN_LDQ);
+    tcg_out_ldst(s, opc, ra, rb, disp, 1);
+}
+
+static void tcg_out_st( TCGContext *s, \
+    int type, int ra, int rb, tcg_target_long disp)
+{
+    int opc = ((type == TCG_TYPE_I32) ? INSN_STL : INSN_STQ);
+    tcg_out_ldst(s, opc, ra, rb, disp, 0);
+}
+/*
+ * generate arithmatic instruction with immediate. ra is used as both
+ * input and output, and val is used as another input.
+*/
+static inline void tgen_arithi( \
+    TCGContext *s, int opc, int ra, tcg_target_long val)
+{
+    if ( _is_tmp_reg(ra))
+        tcg_abort();
+
+    if (val == (uint8_t)val) {
+        tcg_out_fmt_opi(s, opc, ra, val, ra);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val);
+        tcg_out_fmt_opr(s, opc, ra, TMP_REG1, ra);
+    }
+}
+
+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+{
+    if (val != 0)
+        tgen_arithi(s, INSN_ADDQ, reg, val);
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+    tcg_out_fmt_opi(s, INSN_SUBQ, TCG_REG_30, 8, TCG_REG_30);
+    tcg_out_fmt_mem(s, INSN_STQ, reg, TCG_REG_30, 0);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+    tcg_out_fmt_mem(s, INSN_LDQ, reg, TCG_REG_30, 0);
+    tcg_out_fmt_opi(s, INSN_ADDQ, TCG_REG_30, 8, TCG_REG_30);
+}
+
+static const uint64_t tcg_cond_to_jcc[10] = {
+    [TCG_COND_EQ] = INSN_CMPEQ,
+    [TCG_COND_NE] = INSN_CMPEQ,
+    [TCG_COND_LT] = INSN_CMPLT,
+    [TCG_COND_GE] = INSN_CMPLT,
+    [TCG_COND_LE] = INSN_CMPLE,
+    [TCG_COND_GT] = INSN_CMPLE,
+    [TCG_COND_LTU] = INSN_CMPULT,
+    [TCG_COND_GEU] = INSN_CMPULT,
+    [TCG_COND_LEU] = INSN_CMPULE,
+    [TCG_COND_GTU] = INSN_CMPULE
+};
+
+static void patch_reloc(uint8_t *x_ptr, int type,
+                        tcg_target_long value,
+                        tcg_target_long addend)
+{
+    uint32_t *code_ptr = (uint32_t *)x_ptr;
+    uint32_t insn = *code_ptr;
+
+    switch (type) {
+    case R_ALPHA_BRADDR:
+        value -= (long)x_ptr + 4;
+        if ((value & 3) || value < -0x400000 || value >= 0x400000) {
+            tcg_abort();
+        }
+        *code_ptr = insn | INSN_DISP21(value >> 2);
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
+static void tcg_out_br(TCGContext *s, int opc, int ra, int label_index)
+{
+    TCGLabel *l = &s->labels[label_index];
+    tcg_target_long value;
+
+    if (l->has_value) {
+        value = l->u.value;
+        value -= (long)s->code_ptr + 4;
+        if ((value & 3) || value < -0x400000 || value >= 0x400000) {
+            tcg_abort();
+        }
+        value >>= 2;
+    } else {
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0);
+        value = 0;
+    }
+    tcg_out_fmt_br(s, opc, ra, value);
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond, \
+    TCGArg arg1, TCGArg arg2, int const_arg2, int label_index)
+{
+    int opc;
+
+    if ( cond < TCG_COND_EQ || cond > TCG_COND_GTU || const_arg2)
+        tcg_abort();
+
+    opc = tcg_cond_to_jcc[cond];
+    tcg_out_fmt_opr(s, opc, arg1, arg2, TMP_REG1);
+
+    opc = (cond & 1) ? INSN_BLBC : INSN_BLBS;
+    tcg_out_br(s, opc, TMP_REG1, label_index);
+}
+
+#if defined(CONFIG_SOFTMMU)
+
+#include "../../softmmu_defs.h"
+
+static void *qemu_ld_helpers[4] = {
+    __ldb_mmu,
+    __ldw_mmu,
+    __ldl_mmu,
+    __ldq_mmu,
+};
+
+static void *qemu_st_helpers[4] = {
+    __stb_mmu,
+    __stw_mmu,
+    __stl_mmu,
+    __stq_mmu,
+};
+
+#endif
+
+static void tcg_out_mov_addr( TCGContext *s, int ret, int addr)
+{
+    tcg_out_mov(s, ret, addr);
+#if TARGET_LONG_BITS == 32
+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, ret, 0x0f, ret);
+#endif
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+{
+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;
+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)
+    uint8_t *label1_ptr, *label2_ptr;
+#endif
+
+    data_reg = *args++;
+    addr_reg = *args++;
+    mem_index = *args;
+    s_bits = opc & 3;
+
+    r0 = TCG_REG_16;
+    r1 = TCG_REG_17;
+
+#if defined(CONFIG_SOFTMMU)
+
+    tcg_out_mov_addr(s, r1, addr_reg); 
+    tcg_out_mov_addr(s, r0, addr_reg); 
+ 
+    tgen_arithi(s, INSN_AND, r0, TARGET_PAGE_MASK|((1<<s_bits)-1));
+
+    tgen_arithi(s, INSN_SRL, r1, TARGET_PAGE_BITS-CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, INSN_AND, r1, (CPU_TLB_SIZE-1)<<CPU_TLB_ENTRY_BITS);
+    
+    tgen_arithi(s, INSN_ADDQ, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_REG_15, r1);
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_mem(s, INSN_LDL, TMP_REG1, r1, 0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, TMP_REG1, 0x0f, TMP_REG1);
+#else
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+#endif
+		
+    //
+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read
+    // we below will compare them
+    //
+    tcg_out_fmt_opr(s, INSN_CMPEQ, TMP_REG1, r0, TMP_REG1);
+
+    tcg_out_mov_addr(s, r0, addr_reg);
+
+    //
+    // if equal, we jump to label1. since label1 is not resolved yet, 
+    // we just record a relocation.
+    //
+    label1_ptr = s->code_ptr;
+    s->code_ptr += 4;
+
+    //
+    // here, unequal, TLB-miss.
+    //
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_17, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_27, (tcg_target_long)qemu_ld_helpers[s_bits]);
+    tcg_out_push(s, addr_reg);
+    tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, TCG_REG_27, 0);
+    tcg_out_pop(s, addr_reg);
+	
+    //
+    // after helper function call, the result of ld is saved in $0
+    //
+    switch(opc) {
+    case 0 | 4:
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, TCG_REG_0, data_reg);
+        break;
+    case 1 | 4:
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, TCG_REG_0, data_reg);
+        break;
+    case 2 | 4:
+        tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_0, TCG_REG_31, data_reg);
+        break;
+    case 0:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0x1, data_reg);
+        break;
+    case 1:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0x3, data_reg);
+        break;
+    case 2:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, TCG_REG_0, 0xf, data_reg);
+        break;
+    case 3:
+        tcg_out_mov(s, data_reg, TCG_REG_0);
+        break;
+    default:
+        tcg_abort();
+        break;
+    }
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet, 
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    
+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( INSN_BNE | ( TMP_REG1 << 21 ) | ( val & 0x1fffff));
+
+    //
+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 
+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should
+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 
+    // r0 to get the host VA
+    //
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \
+	offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TMP_REG1, r1);
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, TMP_REG1, r0);
+	
+#else
+    r0 = addr_reg;
+#endif // endif defined(CONFIG_SOFTMMU)
+
+#ifdef TARGET_WORDS_BIGENDIAN
+    tcg_abort();
+#endif
+
+    //
+    // when we get here, r0 contains the host VA that can be used to access guest PA
+    //
+    switch(opc) {
+    case 0:
+        tcg_out_fmt_mem(s, INSN_LDBU, data_reg, r0, 0);
+        break;
+    case 0 | 4:
+        tcg_out_fmt_mem(s, INSN_LDBU, data_reg, r0, 0);
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, data_reg, data_reg);
+        break;
+    case 1:
+        tcg_out_fmt_mem(s, INSN_LDWU, data_reg, r0, 0);
+        break;
+    case 1 | 4:
+        tcg_out_fmt_mem(s, INSN_LDWU, data_reg, r0, 0);
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, data_reg, data_reg);
+        break;
+    case 2:
+        tcg_out_fmt_mem(s, INSN_LDL, data_reg, r0, 0);
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, data_reg, 0xf, data_reg);
+        break;
+    case 2 | 4:
+        tcg_out_fmt_mem(s, INSN_LDL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_fmt_mem(s, INSN_LDQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)
+    /* label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( INSN_BR \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff) );
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+{
+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;
+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)
+    uint8_t *label1_ptr, *label2_ptr;
+#endif
+
+    data_reg = *args++;
+    addr_reg = *args++;
+    mem_index = *args;
+    s_bits = opc&3;
+
+    r0 = TCG_REG_16;
+    r1 = TCG_REG_17;
+
+#if defined(CONFIG_SOFTMMU)
+
+    tcg_out_mov_addr(s, r1, addr_reg); 
+    tcg_out_mov_addr(s, r0, addr_reg); 
+
+    tgen_arithi(s, INSN_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+    tgen_arithi(s, INSN_SRL, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, INSN_AND, r1, (CPU_TLB_SIZE-1) << CPU_TLB_ENTRY_BITS);
+
+    tgen_arithi(s, INSN_ADDQ, r1, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_REG_15, r1);
+
+#if TARGET_LONG_BITS == 32
+    tcg_out_fmt_mem(s, INSN_LDL, TMP_REG1, r1, 0);
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, TMP_REG1, 0x0f, TMP_REG1);
+#else
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1, 0);
+#endif
+
+    //
+    // now, r0 contains the page# and TMP_REG1 contains the addr to tlb_entry.addr_read
+    // we below will compare them
+    //    
+    tcg_out_fmt_opr(s, INSN_CMPEQ, TMP_REG1, r0, TMP_REG1);
+
+    tcg_out_mov_addr(s, r0, addr_reg);
+
+    //
+    // if equal, we jump to label1. since label1 is not resolved yet, 
+    // we just record a relocation.
+    //
+    label1_ptr = s->code_ptr;
+    s->code_ptr += 4;
+
+    // here, unequal, TLB-miss, ...
+    tcg_out_mov(s, TCG_REG_17, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_18, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_27, (tcg_target_long)qemu_st_helpers[s_bits]);       
+    tcg_out_push(s, data_reg);
+    tcg_out_push(s, addr_reg);
+    tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, TCG_REG_27, 0);
+    tcg_out_pop(s, addr_reg);
+    tcg_out_pop(s, data_reg);
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet,
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    
+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( INSN_BNE | ( TMP_REG1  << 21 ) | ( val & 0x1fffff));
+
+    //
+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 
+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should
+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 
+    // r0 to get the host VA
+    //
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \
+        offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TMP_REG1, r1);
+    tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, r1,  0);
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, TMP_REG1, r0);
+
+#else
+    r0 = addr_reg;
+#endif
+
+#ifdef TARGET_WORDS_BIGENDIAN
+    tcg_abort();
+#endif
+
+    //
+    // when we get here, r0 contains the host VA that can be used to access guest PA
+    //
+    switch(opc) {
+    case 0:
+        tcg_out_fmt_mem(s, INSN_STB, data_reg, r0, 0);
+        break;
+    case 1:
+        tcg_out_fmt_mem(s, INSN_STW, data_reg, r0, 0);
+        break;
+    case 2:
+        tcg_out_fmt_mem(s, INSN_STL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_fmt_mem(s, INSN_STQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)
+    /* patch jmp to label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( INSN_BR \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff));
+#endif
+}
+
+static inline void tcg_out_op(TCGContext *s, \
+	int opc, const TCGArg *args, const int *const_args)
+{
+    int oc;
+    switch(opc)
+    {
+    case INDEX_op_exit_tb:
+        /*
+         * exit_tb t0, where t0 is always constant and should be returned to engine
+         * since we'll back to engine soon, $0 and $1 will never be used
+        */
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_0, args[0]);
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)tb_ret_addr);
+   	tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TMP_REG1, 0);
+        break;
+
+    case INDEX_op_goto_tb:
+        /* goto_tb idx, where idx is constant 0 or 1, indicating the branch # */
+        if (s->tb_jmp_offset) {
+            /* we don't support direct jmp */
+            tcg_abort();
+        } else {
+            tcg_out_movi( s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out_fmt_mem(s, INSN_LDQ, TMP_REG1, TMP_REG1, 0);
+            tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TMP_REG1, 0);
+        }
+        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        break;
+
+    case INDEX_op_call:
+        if (const_args[0]) {
+            tcg_abort();
+	} else {
+            tcg_out_mov( s, TCG_REG_27, args[0]);
+            tcg_out_fmt_jmp(s, INSN_CALL, TCG_REG_26, args[0], 0);
+        }
+        break;
+
+    case INDEX_op_jmp: 
+        if (const_args[0]) {
+            tcg_abort();
+        } else {
+            tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, args[0], 0);
+        }
+        break;
+
+    case INDEX_op_br:
+        tcg_out_br(s, INSN_BR, TCG_REG_31, args[0]);
+        break;
+
+    case INDEX_op_ld8u_i32: 
+    case INDEX_op_ld8u_i64:
+        tcg_out_ldst( s, INSN_LDBU, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_ld8s_i32: 
+    case INDEX_op_ld8s_i64: 
+        tcg_out_ldst( s, INSN_LDBU, args[0], args[1], args[2], 1);
+        break;
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+        tcg_out_ldst( s, INSN_LDWU, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64: 
+        tcg_out_ldst( s, INSN_LDWU, args[0], args[1], args[2], 1);
+        break;
+    case INDEX_op_ld32u_i64: 
+        tcg_out_ldst( s, INSN_LDL, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld32s_i64:
+        tcg_out_ldst( s, INSN_LDL, args[0], args[1], args[2], 1);
+        break;
+    case INDEX_op_ld_i64: 
+        tcg_out_ldst( s, INSN_LDQ, args[0], args[1], args[2], 0);
+        break;		
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64: 
+        tcg_out_ldst( s, INSN_STB, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64: 
+        tcg_out_ldst( s, INSN_STW, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+        tcg_out_ldst( s, INSN_STL, args[0], args[1], args[2], 0);
+        break;
+    case INDEX_op_st_i64: 
+        tcg_out_ldst( s, INSN_STQ, args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_add_i32: 
+    case INDEX_op_add_i64: 
+        oc = INSN_ADDQ;
+        goto gen_arith;
+    case INDEX_op_sub_i32: 
+    case INDEX_op_sub_i64:
+        oc = INSN_SUBQ;
+        goto gen_arith;
+    case INDEX_op_mul_i32: 
+        oc = INSN_MULL;
+	goto gen_arith;
+    case INDEX_op_mul_i64: 
+        oc = INSN_MULQ;
+        goto gen_arith;
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+        oc = INSN_AND;
+        goto gen_arith;
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64: 
+        oc = INSN_BIS;
+        goto gen_arith;
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        oc = INSN_XOR;
+	goto gen_arith;
+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:
+        oc = INSN_SLL;
+	goto gen_arith;
+    case INDEX_op_shr_i32:
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, args[1], 0x0f, args[1]);
+    case INDEX_op_shr_i64:
+        oc = INSN_SRL;
+        goto gen_arith;
+    case INDEX_op_sar_i32:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[1]);
+    case INDEX_op_sar_i64:
+        oc = INSN_SRA;
+    gen_arith:
+        if (const_args[2]) {
+            tcg_abort();
+        } else {
+            tcg_out_fmt_opr(s, oc, args[1], args[2], args[0]);
+        }
+        break;
+
+    case INDEX_op_brcond_i32:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[0], TCG_REG_31, args[0]);
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[1]);
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        break;
+
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_31, args[1], args[0]);
+        break;
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_31, args[1], args[0]);
+        break;
+    case INDEX_op_ext32s_i64:
+        tcg_out_fmt_opr(s, INSN_ADDL, args[1], TCG_REG_31, args[0]);
+        break;
+    
+    case INDEX_op_qemu_ld8u:
+        tcg_out_qemu_ld(s, args, 0);
+        break;
+    case INDEX_op_qemu_ld8s:
+        tcg_out_qemu_ld(s, args, 0 | 4);
+        break;
+    case INDEX_op_qemu_ld16u:
+        tcg_out_qemu_ld(s, args, 1);
+        break;
+    case INDEX_op_qemu_ld16s:
+        tcg_out_qemu_ld(s, args, 1 | 4);
+        break;
+    case INDEX_op_qemu_ld32u:
+        tcg_out_qemu_ld(s, args, 2);
+        break;
+    case INDEX_op_qemu_ld32s:
+        tcg_out_qemu_ld(s, args, 2 | 4);
+        break;
+    case INDEX_op_qemu_ld64:
+        tcg_out_qemu_ld(s, args, 3);
+        break;
+
+    case INDEX_op_qemu_st8:
+        tcg_out_qemu_st(s, args, 0);
+        break;
+    case INDEX_op_qemu_st16:
+        tcg_out_qemu_st(s, args, 1);
+        break;
+    case INDEX_op_qemu_st32:
+        tcg_out_qemu_st(s, args, 2);
+        break;
+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);
+        break;
+
+    case INDEX_op_movi_i32: 
+    case INDEX_op_movi_i64: 
+    case INDEX_op_mov_i32: 
+    case INDEX_op_mov_i64:
+    case INDEX_op_div2_i32:
+    case INDEX_op_divu2_i32:
+    default:
+        tcg_abort();
+    }
+}
+
+static const TCGTargetOpDef alpha_op_defs[] = {
+    { INDEX_op_exit_tb, { } },
+    { INDEX_op_goto_tb, { } },
+    { INDEX_op_call, { "r" } },
+    { INDEX_op_jmp, { "r" } },
+    { INDEX_op_br, { } },
+
+    { INDEX_op_mov_i32, { "r", "r" } },
+    { INDEX_op_movi_i32, { "r" } },
+    { INDEX_op_ld8u_i32, { "r", "r" } },
+    { INDEX_op_ld8s_i32, { "r", "r" } },
+    { INDEX_op_ld16u_i32, { "r", "r" } },
+    { INDEX_op_ld16s_i32, { "r", "r" } },
+    { INDEX_op_ld_i32, { "r", "r" } },
+    { INDEX_op_st8_i32, { "r", "r" } },
+    { INDEX_op_st16_i32, { "r", "r" } },
+    { INDEX_op_st_i32, { "r", "r" } },
+
+    { INDEX_op_add_i32, { "r", "0", "r" } },
+    { INDEX_op_mul_i32, { "r", "0", "r" } },
+    { INDEX_op_sub_i32, { "r", "0", "r" } },
+    { INDEX_op_and_i32, { "r", "0", "r" } },
+    { INDEX_op_or_i32, { "r", "0", "r" } },
+    { INDEX_op_xor_i32, { "r", "0", "r" } },
+
+    { INDEX_op_shl_i32, { "r", "0", "r" } },
+    { INDEX_op_shr_i32, { "r", "0", "r" } },
+    { INDEX_op_sar_i32, { "r", "0", "r" } },
+
+    { INDEX_op_brcond_i32, { "r", "r" } },		
+
+    { INDEX_op_mov_i64, { "r", "r" } },	
+    { INDEX_op_movi_i64, { "r" } },
+    { INDEX_op_ld8u_i64, { "r", "r" } },
+    { INDEX_op_ld8s_i64, { "r", "r" } },
+    { INDEX_op_ld16u_i64, { "r", "r" } },
+    { INDEX_op_ld16s_i64, { "r", "r" } },
+    { INDEX_op_ld32u_i64, { "r", "r" } },
+    { INDEX_op_ld32s_i64, { "r", "r" } },
+    { INDEX_op_ld_i64, { "r", "r" } },
+    { INDEX_op_st8_i64, { "r", "r" } },	
+    { INDEX_op_st16_i64, { "r", "r" } },
+    { INDEX_op_st32_i64, { "r", "r" } },
+    { INDEX_op_st_i64, { "r", "r" } },
+
+    { INDEX_op_add_i64, { "r", "0", "r" } },
+    { INDEX_op_mul_i64, { "r", "0", "r" } },
+    { INDEX_op_sub_i64, { "r", "0", "r" } },
+    { INDEX_op_and_i64, { "r", "0", "r" } },
+    { INDEX_op_or_i64, { "r", "0", "r" } },
+    { INDEX_op_xor_i64, { "r", "0", "r" } },
+
+    { INDEX_op_shl_i64, { "r", "0", "r" } },
+    { INDEX_op_shr_i64, { "r", "0", "r" } },
+    { INDEX_op_sar_i64, { "r", "0", "r" } },
+
+    { INDEX_op_brcond_i64, { "r", "r" } },
+
+    { INDEX_op_ext8s_i32, { "r", "r"} },
+    { INDEX_op_ext16s_i32, { "r", "r"} },
+    { INDEX_op_ext8s_i64, { "r", "r"} },
+    { INDEX_op_ext16s_i64, { "r", "r"} },
+    { INDEX_op_ext32s_i64, { "r", "r"} },
+
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32s, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L"} },
+    { -1 },
+};
+
+
+static int tcg_target_callee_save_regs[] = {
+    TCG_REG_15,		// used for the global env, so no need to save
+    TCG_REG_9,
+    TCG_REG_10,
+    TCG_REG_11,
+    TCG_REG_12,
+    TCG_REG_13,
+    TCG_REG_14
+};
+
+/*
+ * Generate global QEMU prologue and epilogue code 
+*/
+void tcg_target_qemu_prologue(TCGContext *s)
+{
+    int i, frame_size, push_size, stack_addend;
+   
+    /* TB prologue */
+	
+    /* save TCG_REG_26 */
+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_27);
+    tcg_out_push(s, TCG_REG_28);
+    tcg_out_push(s, TCG_REG_29);
+
+    /* save all callee saved registers */
+    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_push(s, tcg_target_callee_save_regs[i]);
+    }
+	
+    /* reserve some stack space */
+    push_size = 8 + (4 + ARRAY_SIZE(tcg_target_callee_save_regs)) * 8;
+    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1);
+    stack_addend = frame_size - push_size;
+    tgen_arithi(s, INSN_ADDQ, TCG_REG_30, -stack_addend);
+
+    tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_31, TCG_REG_16, 0);		/* jmp $16 */
+
+    /* TB epilogue */
+    tb_ret_addr = s->code_ptr;
+    tgen_arithi(s, INSN_ADDQ, TCG_REG_30, stack_addend);
+    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
+        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
+    }
+
+    tcg_out_pop(s, TCG_REG_29);
+    tcg_out_pop(s, TCG_REG_28);
+    tcg_out_pop(s, TCG_REG_27);
+    tcg_out_pop(s, TCG_REG_26);
+    tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_31, TCG_REG_26, 0);		/* ret */
+}
+
+
+void tcg_target_init(TCGContext *s)
+{
+    /* fail safe */
+    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
+        tcg_abort();
+
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+		(1 << TCG_REG_1  ) | (1 << TCG_REG_2 ) | (1 << TCG_REG_3  ) | (1 << TCG_REG_4 ) |
+		(1 << TCG_REG_5  ) | (1 << TCG_REG_6 ) | (1 << TCG_REG_7  ) | (1 << TCG_REG_8 ) | 
+		(1 << TCG_REG_22) | (1 << TCG_REG_23) | (1 << TCG_REG_24) | (1 << TCG_REG_25) | 
+              (1 << TCG_REG_16) | (1 << TCG_REG_17) | (1 << TCG_REG_18) | (1 << TCG_REG_19) | 
+              (1 << TCG_REG_20) | (1 << TCG_REG_21) | (1 << TCG_REG_0 ));
+
+    //tcg_regset_set32( tcg_target_call_clobber_regs, 0, 0xffffffff);
+    
+    tcg_regset_clear(s->reserved_regs);
+    // $26~$31 not allocated by tcg.c
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_26);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_27);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_28);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_29);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_30);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_31);
+
+    tcg_add_target_add_op_defs(alpha_op_defs);
+}
+
diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h
new file mode 100644
index 0000000..e5083a5
--- /dev/null
+++ b/tcg/alpha/tcg-target.h
@@ -0,0 +1,68 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define TCG_TARGET_ALPHA 1
+
+#define TCG_TARGET_REG_BITS 64
+
+#define TCG_TARGET_NB_REGS 32
+
+enum {
+    TCG_REG_0 = 0, TCG_REG_1, TCG_REG_2, TCG_REG_3,
+    TCG_REG_4, TCG_REG_5, TCG_REG_6, TCG_REG_7,
+    TCG_REG_8, TCG_REG_9, TCG_REG_10, TCG_REG_11,
+    TCG_REG_12, TCG_REG_13, TCG_REG_14, TCG_REG_15,
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19,
+    TCG_REG_20, TCG_REG_21, TCG_REG_22, TCG_REG_23,
+    TCG_REG_24, TCG_REG_25, TCG_REG_26, TCG_REG_27,
+    TCG_REG_28, TCG_REG_29, TCG_REG_30, TCG_REG_31
+};
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_30
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* we have signed extension instructions */
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+#define TCG_TARGET_HAS_ext32s_i64
+
+/* Note: must be synced with dyngen-exec.h */
+#define TCG_AREG0 TCG_REG_15
+#define TCG_AREG1 TCG_REG_9
+#define TCG_AREG2 TCG_REG_10
+#define TCG_AREG3 TCG_REG_11
+#define TCG_AREG4 TCG_REG_12
+#define TCG_AREG5 TCG_REG_13
+#define TCG_AREG6 TCG_REG_14
+
+#define TMP_REG1 TCG_REG_28
+
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    __asm__ __volatile__ ("call_pal 0x86");
+}
+
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2010-01-31 23:09 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-19  8:47 [Qemu-devel] [PATCH] Porting TCG to alpha platform identifier scorpio
2010-01-19 20:18 ` Richard Henderson
2010-01-19 21:35   ` malc
2010-01-19 21:42 ` Stefan Weil
  -- strict thread matches above, loose matches on Subject: below --
2010-01-20 17:19 identifier scorpio
2010-01-20 21:26 ` Richard Henderson
2010-01-21  3:42 identifier scorpio
2010-01-21 18:18 ` Stefan Weil
2010-01-22 15:47 identifier scorpio
2010-01-22 18:00 ` Richard Henderson
2010-01-26  1:19 ` Richard Henderson
2010-01-29  1:55   ` identifier scorpio
2010-01-29 17:04     ` Richard Henderson
2010-01-29 21:38       ` Edgar E. Iglesias
2010-01-29 23:04         ` Stefan Weil
2010-01-30  0:38           ` Edgar E. Iglesias
2010-01-30  1:14           ` Laurent Desnogues
2010-01-29 17:37   ` Richard Henderson
2010-01-29 19:19   ` Richard Henderson
2010-01-30  2:45     ` identifier scorpio
2010-01-31 23:09       ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).