* [Qemu-devel] [PATCH]: sh4 delay slot code update
@ 2007-11-28 9:54 Magnus Damm
2007-11-28 12:49 ` Paul Mundt
0 siblings, 1 reply; 6+ messages in thread
From: Magnus Damm @ 2007-11-28 9:54 UTC (permalink / raw)
To: qemu-devel
[-- Attachment #1: Type: text/plain, Size: 979 bytes --]
Hi everyone,
This patch updates the delay slot handling for the sh4 target.
The main feature is that delay slot flags now are stored in the tlb.
This is needed for signal emulation and interrupts. There are also
some correctness fixes included in this patch - with this patch
applied i'm able to run quite a few dynamically linked busybox applets
(uclibc). Some of these applets were broken without this fix. I've ran
the sh4 binaries included in linux-user-test-0.2 and they work as
usual.
Some code and ideas come from the mips target. This is done to make
the sh4 code less special and more readable for people coming from
that architecture.
cpu-exec.c | 4 -
target-sh4/cpu.h | 18 +++----
target-sh4/op.c | 54 +++++++----------------
target-sh4/translate.c | 112 ++++++++++++++++++++++++++++++------------------
translate-all.c | 5 +-
5 files changed, 104 insertions(+), 89 deletions(-)
Please apply! Thank you.
/ magnus
[-- Attachment #2: qemu-cvs-20071128c-sh-delay-slot.patch --]
[-- Type: application/octet-stream, Size: 12809 bytes --]
--- 0001/cpu-exec.c
+++ work/cpu-exec.c 2007-11-28 17:55:52.000000000 +0900
@@ -202,8 +202,8 @@ static inline TranslationBlock *tb_find_
cs_base = 0;
pc = env->pc;
#elif defined(TARGET_SH4)
- flags = env->sr & (SR_MD | SR_RB);
- cs_base = 0; /* XXXXX */
+ flags = env->flags;
+ cs_base = 0;
pc = env->pc;
#elif defined(TARGET_ALPHA)
flags = env->ps;
--- 0001/target-sh4/cpu.h
+++ work/target-sh4/cpu.h 2007-11-28 17:55:52.000000000 +0900
@@ -46,16 +46,16 @@
#define FPSCR_SZ (1 << 20)
#define FPSCR_PR (1 << 19)
#define FPSCR_DN (1 << 18)
-
-#define DELAY_SLOT (1 << 0) /* Must be the same as SR_T. */
-/* This flag is set if the next insn is a delay slot for a conditional jump.
- The dynamic value of the DELAY_SLOT determines whether the jup is taken. */
+#define DELAY_SLOT (1 << 0)
#define DELAY_SLOT_CONDITIONAL (1 << 1)
-/* Those are used in contexts only */
-#define BRANCH (1 << 2)
-#define BRANCH_CONDITIONAL (1 << 3)
-#define MODE_CHANGE (1 << 4) /* Potential MD|RB change */
-#define BRANCH_EXCEPTION (1 << 5) /* Branch after exception */
+#define DELAY_SLOT_TRUE (1 << 2)
+#define DELAY_SLOT_CLEARME (1 << 3)
+/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
+ * after the delay slot should be taken or not. It is calculated from SR_T.
+ *
+ * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
+ * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
+ */
/* XXXXX The structure could be made more compact */
typedef struct tlb_t {
--- 0001/target-sh4/op.c
+++ work/target-sh4/op.c 2007-11-28 17:55:52.000000000 +0900
@@ -19,16 +19,6 @@
*/
#include "exec.h"
-static inline void set_flag(uint32_t flag)
-{
- env->flags |= flag;
-}
-
-static inline void clr_flag(uint32_t flag)
-{
- env->flags &= ~flag;
-}
-
static inline void set_t(void)
{
env->sr |= SR_T;
@@ -110,28 +100,37 @@ void OPPROTO op_not_T0(void)
void OPPROTO op_bf_s(void)
{
env->delayed_pc = PARAM1;
- set_flag(DELAY_SLOT_CONDITIONAL | ((~env->sr) & SR_T));
+ if (!(env->sr & SR_T)) {
+ env->flags |= DELAY_SLOT_TRUE;
+ }
RETURN();
}
void OPPROTO op_bt_s(void)
{
env->delayed_pc = PARAM1;
- set_flag(DELAY_SLOT_CONDITIONAL | (env->sr & SR_T));
+ if (env->sr & SR_T) {
+ env->flags |= DELAY_SLOT_TRUE;
+ }
+ RETURN();
+}
+
+void OPPROTO op_store_flags(void)
+{
+ env->flags &= DELAY_SLOT_TRUE;
+ env->flags |= PARAM1;
RETURN();
}
void OPPROTO op_bra(void)
{
env->delayed_pc = PARAM1;
- set_flag(DELAY_SLOT);
RETURN();
}
void OPPROTO op_braf_T0(void)
{
env->delayed_pc = PARAM1 + T0;
- set_flag(DELAY_SLOT);
RETURN();
}
@@ -139,7 +138,6 @@ void OPPROTO op_bsr(void)
{
env->pr = PARAM1;
env->delayed_pc = PARAM2;
- set_flag(DELAY_SLOT);
RETURN();
}
@@ -147,7 +145,6 @@ void OPPROTO op_bsrf_T0(void)
{
env->pr = PARAM1;
env->delayed_pc = PARAM1 + T0;
- set_flag(DELAY_SLOT);
RETURN();
}
@@ -155,26 +152,12 @@ void OPPROTO op_jsr_T0(void)
{
env->pr = PARAM1;
env->delayed_pc = T0;
- set_flag(DELAY_SLOT);
RETURN();
}
void OPPROTO op_rts(void)
{
env->delayed_pc = env->pr;
- set_flag(DELAY_SLOT);
- RETURN();
-}
-
-void OPPROTO op_clr_delay_slot(void)
-{
- clr_flag(DELAY_SLOT);
- RETURN();
-}
-
-void OPPROTO op_clr_delay_slot_conditional(void)
-{
- clr_flag(DELAY_SLOT_CONDITIONAL);
RETURN();
}
@@ -242,7 +225,6 @@ void OPPROTO op_rte(void)
{
env->sr = env->ssr;
env->delayed_pc = env->spc;
- set_flag(DELAY_SLOT);
RETURN();
}
@@ -458,7 +440,6 @@ void OPPROTO op_cmp_pz_T0(void)
void OPPROTO op_jmp_T0(void)
{
env->delayed_pc = T0;
- set_flag(DELAY_SLOT);
RETURN();
}
@@ -993,11 +974,10 @@ void OPPROTO op_jT(void)
void OPPROTO op_jdelayed(void)
{
- uint32_t flags;
- flags = env->flags;
- env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
- if (flags & DELAY_SLOT)
- GOTO_LABEL_PARAM(1);
+ if (env->flags & DELAY_SLOT_TRUE) {
+ env->flags &= ~DELAY_SLOT_TRUE;
+ GOTO_LABEL_PARAM(1);
+ }
RETURN();
}
--- 0001/target-sh4/translate.c
+++ work/target-sh4/translate.c 2007-11-28 18:04:58.000000000 +0900
@@ -57,11 +57,21 @@ typedef struct DisasContext {
uint32_t fpscr;
uint16_t opcode;
uint32_t flags;
+ int bstate;
int memidx;
uint32_t delayed_pc;
int singlestep_enabled;
} DisasContext;
+enum {
+ BS_NONE = 0, /* We go out of the TB without reaching a branch or an
+ * exception condition
+ */
+ BS_STOP = 1, /* We want to stop translation for any reason */
+ BS_BRANCH = 2, /* We reached a branch condition */
+ BS_EXCP = 3, /* We reached an exception condition */
+};
+
#ifdef CONFIG_USER_ONLY
#define GEN_OP_LD(width, reg) \
@@ -179,10 +189,6 @@ static void gen_goto_tb(DisasContext * c
/* Jump to pc after an exception */
static void gen_jump_exception(DisasContext * ctx)
{
- gen_op_movl_imm_T0(0);
- if (ctx->singlestep_enabled)
- gen_op_debug();
- gen_op_exit_tb();
}
static void gen_jump(DisasContext * ctx)
@@ -220,7 +226,7 @@ static void gen_delayed_conditional_jump
l1 = gen_new_label();
gen_op_jdelayed(l1);
- gen_goto_tb(ctx, 1, ctx->pc);
+ gen_goto_tb(ctx, 1, ctx->pc + 2);
gen_set_label(l1);
gen_jump(ctx);
}
@@ -248,10 +254,10 @@ static void gen_delayed_conditional_jump
#define CHECK_NOT_DELAY_SLOT \
if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) \
- {gen_op_raise_slot_illegal_instruction (); ctx->flags |= BRANCH_EXCEPTION; \
+ {gen_op_raise_slot_illegal_instruction (); ctx->bstate = BS_EXCP; \
return;}
-void decode_opc(DisasContext * ctx)
+void _decode_opc(DisasContext * ctx)
{
#if 0
fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
@@ -290,11 +296,11 @@ void decode_opc(DisasContext * ctx)
return;
case 0xfbfb: /* frchg */
gen_op_frchg();
- ctx->flags |= MODE_CHANGE;
+ ctx->bstate = BS_STOP;
return;
case 0xf3fb: /* fschg */
gen_op_fschg();
- ctx->flags |= MODE_CHANGE;
+ ctx->bstate = BS_STOP;
return;
case 0x0009: /* nop */
return;
@@ -805,7 +811,7 @@ void decode_opc(DisasContext * ctx)
CHECK_NOT_DELAY_SLOT
gen_conditional_jump(ctx, ctx->pc + 2,
ctx->pc + 4 + B7_0s * 2);
- ctx->flags |= BRANCH_CONDITIONAL;
+ ctx->bstate = BS_BRANCH;
return;
case 0x8f00: /* bf/s label */
CHECK_NOT_DELAY_SLOT
@@ -816,7 +822,7 @@ void decode_opc(DisasContext * ctx)
CHECK_NOT_DELAY_SLOT
gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2,
ctx->pc + 2);
- ctx->flags |= BRANCH_CONDITIONAL;
+ ctx->bstate = BS_BRANCH;
return;
case 0x8d00: /* bt/s label */
CHECK_NOT_DELAY_SLOT
@@ -908,7 +914,7 @@ void decode_opc(DisasContext * ctx)
case 0xc300: /* trapa #imm */
CHECK_NOT_DELAY_SLOT gen_op_movl_imm_PC(ctx->pc);
gen_op_trapa(B7_0);
- ctx->flags |= BRANCH;
+ ctx->bstate = BS_BRANCH;
return;
case 0xc800: /* tst #imm,R0 */
gen_op_tst_imm_rN(B7_0, REG(0));
@@ -1012,8 +1018,8 @@ void decode_opc(DisasContext * ctx)
gen_op_movl_rN_T1 (REG(B11_8)); \
gen_op_stl_T0_T1 (ctx); \
return;
- LDST(sr, 0x400e, 0x4007, ldc, 0x0002, 0x4003, stc, ctx->flags |=
- MODE_CHANGE;)
+ LDST(sr, 0x400e, 0x4007, ldc, 0x0002, 0x4003, stc, ctx->bstate =
+ BS_STOP;)
LDST(gbr, 0x401e, 0x4017, ldc, 0x0012, 0x4013, stc,)
LDST(vbr, 0x402e, 0x4027, ldc, 0x0022, 0x4023, stc,)
LDST(ssr, 0x403e, 0x4037, ldc, 0x0032, 0x4033, stc,)
@@ -1023,8 +1029,8 @@ void decode_opc(DisasContext * ctx)
LDST(macl, 0x401a, 0x4016, lds, 0x001a, 0x4012, sts,)
LDST(pr, 0x402a, 0x4026, lds, 0x002a, 0x4022, sts,)
LDST(fpul, 0x405a, 0x4056, lds, 0x005a, 0x4052, sts,)
- LDST(fpscr, 0x406a, 0x4066, lds, 0x006a, 0x4062, sts, ctx->flags |=
- MODE_CHANGE;)
+ LDST(fpscr, 0x406a, 0x4066, lds, 0x006a, 0x4062, sts, ctx->bstate =
+ BS_STOP;)
case 0x00c3: /* movca.l R0,@Rm */
gen_op_movl_rN_T0(REG(0));
gen_op_movl_rN_T1(REG(B11_8));
@@ -1141,7 +1147,28 @@ void decode_opc(DisasContext * ctx)
fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
ctx->opcode, ctx->pc);
gen_op_raise_illegal_instruction();
- ctx->flags |= BRANCH_EXCEPTION;
+ ctx->bstate = BS_EXCP;
+}
+
+void decode_opc(DisasContext * ctx)
+{
+ uint32_t old_flags = ctx->flags;
+
+ _decode_opc(ctx);
+
+ if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+ if (ctx->flags & DELAY_SLOT_CLEARME) {
+ gen_op_store_flags(0);
+ }
+ ctx->flags = 0;
+ ctx->bstate = BS_BRANCH;
+ if (old_flags & DELAY_SLOT_CONDITIONAL) {
+ gen_delayed_conditional_jump(ctx);
+ } else if (old_flags & DELAY_SLOT) {
+ gen_jump(ctx);
+ }
+
+ }
}
static inline int
@@ -1151,7 +1178,6 @@ gen_intermediate_code_internal(CPUState
DisasContext ctx;
target_ulong pc_start;
static uint16_t *gen_opc_end;
- uint32_t old_flags;
int i, ii;
pc_start = tb->pc;
@@ -1159,14 +1185,14 @@ gen_intermediate_code_internal(CPUState
gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
gen_opparam_ptr = gen_opparam_buf;
ctx.pc = pc_start;
- ctx.flags = env->flags;
- old_flags = 0;
+ ctx.flags = (uint32_t)tb->flags;
+ ctx.bstate = BS_NONE;
ctx.sr = env->sr;
ctx.fpscr = env->fpscr;
ctx.memidx = (env->sr & SR_MD) ? 1 : 0;
/* We don't know if the delayed pc came from a dynamic or static branch,
so assume it is a dynamic branch. */
- ctx.delayed_pc = -1;
+ ctx.delayed_pc = -1; /* use delayed pc from env pointer */
ctx.tb = tb;
ctx.singlestep_enabled = env->singlestep_enabled;
nb_gen_labels = 0;
@@ -1180,18 +1206,14 @@ gen_intermediate_code_internal(CPUState
#endif
ii = -1;
- while ((old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) == 0 &&
- (ctx.flags & (BRANCH | BRANCH_CONDITIONAL | MODE_CHANGE |
- BRANCH_EXCEPTION)) == 0 &&
- gen_opc_ptr < gen_opc_end && ctx.sr == env->sr) {
- old_flags = ctx.flags;
+ while (ctx.bstate == BS_NONE && gen_opc_ptr < gen_opc_end) {
if (env->nb_breakpoints > 0) {
for (i = 0; i < env->nb_breakpoints; i++) {
if (ctx.pc == env->breakpoints[i]) {
/* We have hit a breakpoint - make sure PC is up-to-date */
gen_op_movl_imm_PC(ctx.pc);
gen_op_debug();
- ctx.flags |= BRANCH_EXCEPTION;
+ ctx.bstate = BS_EXCP;
break;
}
}
@@ -1204,6 +1226,7 @@ gen_intermediate_code_internal(CPUState
gen_opc_instr_start[ii++] = 0;
}
gen_opc_pc[ii] = ctx.pc;
+ gen_opc_hflags[ii] = ctx.flags;
gen_opc_instr_start[ii] = 1;
}
#if 0
@@ -1221,21 +1244,30 @@ gen_intermediate_code_internal(CPUState
break;
#endif
}
-
- if (old_flags & DELAY_SLOT_CONDITIONAL) {
- gen_delayed_conditional_jump(&ctx);
- } else if (old_flags & DELAY_SLOT) {
- gen_op_clr_delay_slot();
- gen_jump(&ctx);
- } else if (ctx.flags & BRANCH_EXCEPTION) {
- gen_jump_exception(&ctx);
- } else if ((ctx.flags & (BRANCH | BRANCH_CONDITIONAL)) == 0) {
- gen_goto_tb(&ctx, 0, ctx.pc);
- }
-
if (env->singlestep_enabled) {
- gen_op_debug();
+ gen_op_debug();
+ } else {
+ switch (ctx.bstate) {
+ case BS_STOP:
+ /* gen_op_interrupt_restart(); */
+ /* fall through */
+ case BS_NONE:
+ if (ctx.flags) {
+ gen_op_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
+ }
+ gen_goto_tb(&ctx, 0, ctx.pc);
+ break;
+ case BS_EXCP:
+ /* gen_op_interrupt_restart(); */
+ gen_op_movl_imm_T0(0);
+ gen_op_exit_tb();
+ break;
+ case BS_BRANCH:
+ default:
+ break;
+ }
}
+
*gen_opc_ptr = INDEX_op_end;
if (search_pc) {
i = gen_opc_ptr - gen_opc_buf;
--- 0001/translate-all.c
+++ work/translate-all.c 2007-11-28 18:01:02.000000000 +0900
@@ -53,7 +53,7 @@ uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
#elif defined(TARGET_SPARC)
target_ulong gen_opc_npc[OPC_BUF_SIZE];
target_ulong gen_opc_jump_pc[2];
-#elif defined(TARGET_MIPS)
+#elif defined(TARGET_MIPS) || defined(TARGET_SH4)
uint32_t gen_opc_hflags[OPC_BUF_SIZE];
#endif
@@ -298,6 +298,9 @@ int cpu_restore_state(TranslationBlock *
env->hflags |= gen_opc_hflags[j];
#elif defined(TARGET_ALPHA)
env->pc = gen_opc_pc[j];
+#elif defined(TARGET_SH4)
+ env->pc = gen_opc_pc[j];
+ env->flags = gen_opc_hflags[j];
#endif
return 0;
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Qemu-devel] [PATCH]: sh4 delay slot code update
2007-11-28 9:54 [Qemu-devel] [PATCH]: sh4 delay slot code update Magnus Damm
@ 2007-11-28 12:49 ` Paul Mundt
2007-11-29 5:43 ` Magnus Damm
0 siblings, 1 reply; 6+ messages in thread
From: Paul Mundt @ 2007-11-28 12:49 UTC (permalink / raw)
To: qemu-devel
On Wed, Nov 28, 2007 at 06:54:20PM +0900, Magnus Damm wrote:
> +#define DELAY_SLOT_TRUE (1 << 2)
> +#define DELAY_SLOT_CLEARME (1 << 3)
> +/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
> + * after the delay slot should be taken or not. It is calculated from SR_T.
> + *
> + * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
> + * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
> + */
Nesting a 'tst' in a delay slot is certainly valid, and GCC correctly
treats it as a slottable instruction. If you're in doubt as to whether an
opcode can be placed in a delay slot or not, the machine descriptor is a
good way of sorting things out. The only restrictions I know of things
that cause changes to PC, most of the system instructions (like trapa and
ldtlb), and so on. There are of course cases where an instruction itself
is slottable which may perform illegal behaviour via PC modification or
so on, and we do have an exception for trapping that sort of abuse.
You can see an example in arch/sh/kernel/entry-common.S:
syscall_exit_work:
! r0: current_thread_info->flags
! r8: current_thread_info
tst #_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | _TIF_SYSCALL_AUDIT, r0
bt/s work_pending
tst #_TIF_NEED_RESCHED, r0
....
work_pending:
! r0: current_thread_info->flags
! r8: current_thread_info
! t: result of "tst #_TIF_NEED_RESCHED, r0"
bf/s work_resched
tst #(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0
....
This sort of access is not a particularly rare workload. Presumably you'd hit
this under system emulation at the very least.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Qemu-devel] [PATCH]: sh4 delay slot code update
2007-11-28 12:49 ` Paul Mundt
@ 2007-11-29 5:43 ` Magnus Damm
2007-11-29 5:55 ` Paul Mundt
0 siblings, 1 reply; 6+ messages in thread
From: Magnus Damm @ 2007-11-29 5:43 UTC (permalink / raw)
To: qemu-devel
Hi Paul,
Thanks for your comments.
On Nov 28, 2007 9:49 PM, Paul Mundt <lethal@linux-sh.org> wrote:
> On Wed, Nov 28, 2007 at 06:54:20PM +0900, Magnus Damm wrote:
> > +#define DELAY_SLOT_TRUE (1 << 2)
> > +#define DELAY_SLOT_CLEARME (1 << 3)
> > +/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
> > + * after the delay slot should be taken or not. It is calculated from SR_T.
> > + *
> > + * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
> > + * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
> > + */
>
> Nesting a 'tst' in a delay slot is certainly valid, and GCC correctly
> treats it as a slottable instruction. If you're in doubt as to whether an
> opcode can be placed in a delay slot or not, the machine descriptor is a
> good way of sorting things out. The only restrictions I know of things
> that cause changes to PC, most of the system instructions (like trapa and
> ldtlb), and so on. There are of course cases where an instruction itself
> is slottable which may perform illegal behaviour via PC modification or
> so on, and we do have an exception for trapping that sort of abuse.
I was mainly wondering if I really needed to save the state of SR_T,
but I assumed so. So the code should be correct. And yes, I'm sure
there are quite a few slottable instructions with interesting side
effects, but that's a separate issue.
> You can see an example in arch/sh/kernel/entry-common.S:
>
> syscall_exit_work:
> ! r0: current_thread_info->flags
> ! r8: current_thread_info
> tst #_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | _TIF_SYSCALL_AUDIT, r0
> bt/s work_pending
> tst #_TIF_NEED_RESCHED, r0
>
> ....
> work_pending:
> ! r0: current_thread_info->flags
> ! r8: current_thread_info
> ! t: result of "tst #_TIF_NEED_RESCHED, r0"
> bf/s work_resched
> tst #(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0
>
> ....
>
> This sort of access is not a particularly rare workload. Presumably you'd hit
> this under system emulation at the very least.
Yeah, that's a pretty good example that shows that I need to save the
SR_T state before executing the delay slot instruction. Thanks for
pointing out that code.
/ magnus
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Qemu-devel] [PATCH]: sh4 delay slot code update
2007-11-29 5:43 ` Magnus Damm
@ 2007-11-29 5:55 ` Paul Mundt
2007-11-29 9:18 ` Magnus Damm
0 siblings, 1 reply; 6+ messages in thread
From: Paul Mundt @ 2007-11-29 5:55 UTC (permalink / raw)
To: qemu-devel
On Thu, Nov 29, 2007 at 02:43:03PM +0900, Magnus Damm wrote:
> On Nov 28, 2007 9:49 PM, Paul Mundt <lethal@linux-sh.org> wrote:
> I was mainly wondering if I really needed to save the state of SR_T,
> but I assumed so. So the code should be correct. And yes, I'm sure
> there are quite a few slottable instructions with interesting side
> effects, but that's a separate issue.
>
The only thing to be careful of is the ordering semantics for the T-bit
state, which is what I tried to convey in the code snippet.
> > You can see an example in arch/sh/kernel/entry-common.S:
> >
> > syscall_exit_work:
> > ! r0: current_thread_info->flags
> > ! r8: current_thread_info
> > tst #_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | _TIF_SYSCALL_AUDIT, r0
> > bt/s work_pending
> > tst #_TIF_NEED_RESCHED, r0
> >
> > ....
> > work_pending:
> > ! r0: current_thread_info->flags
> > ! r8: current_thread_info
> > ! t: result of "tst #_TIF_NEED_RESCHED, r0"
> > bf/s work_resched
> > tst #(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0
> >
So while bt/s is conditional on the first flag test, the T-state is
relative to the second test by the time the branch happens.
The T-state check for bt/bf happens _prior_ to execution of the delay
slot instruction, while any delay-slot resident T-bit modifier is
executed by the time we enter the branch. I don't know if your code
handles that or not, but figured it's probably good to make that
explicit. T-bit modifiers are always a bit hairy..
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Qemu-devel] [PATCH]: sh4 delay slot code update
2007-11-29 5:55 ` Paul Mundt
@ 2007-11-29 9:18 ` Magnus Damm
2007-11-29 9:30 ` Paul Mundt
0 siblings, 1 reply; 6+ messages in thread
From: Magnus Damm @ 2007-11-29 9:18 UTC (permalink / raw)
To: qemu-devel
On Nov 29, 2007 2:55 PM, Paul Mundt <lethal@linux-sh.org> wrote:
> The only thing to be careful of is the ordering semantics for the T-bit
> state, which is what I tried to convey in the code snippet.
>
> > > You can see an example in arch/sh/kernel/entry-common.S:
> > >
> > > syscall_exit_work:
> > > ! r0: current_thread_info->flags
> > > ! r8: current_thread_info
> > > tst #_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | _TIF_SYSCALL_AUDIT, r0
> > > bt/s work_pending
> > > tst #_TIF_NEED_RESCHED, r0
> > >
> > > ....
> > > work_pending:
> > > ! r0: current_thread_info->flags
> > > ! r8: current_thread_info
> > > ! t: result of "tst #_TIF_NEED_RESCHED, r0"
> > > bf/s work_resched
> > > tst #(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0
> > >
> So while bt/s is conditional on the first flag test, the T-state is
> relative to the second test by the time the branch happens.
Exactly.
> The T-state check for bt/bf happens _prior_ to execution of the delay
> slot instruction, while any delay-slot resident T-bit modifier is
> executed by the time we enter the branch. I don't know if your code
> handles that or not, but figured it's probably good to make that
> explicit. T-bit modifiers are always a bit hairy..
Yeah, the T flag is indeed a bit hairy. The patch should handle the T
bit exactly as you describe it. As part of the bt/bf instruction the T
flag is evaluated and if true the the DELAY_SLOT_TRUE bit is set. This
bit is later used to decide if we should jump or not after the delay
slot instruction.
Thank you,
/ magnus
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Qemu-devel] [PATCH]: sh4 delay slot code update
2007-11-29 9:18 ` Magnus Damm
@ 2007-11-29 9:30 ` Paul Mundt
0 siblings, 0 replies; 6+ messages in thread
From: Paul Mundt @ 2007-11-29 9:30 UTC (permalink / raw)
To: qemu-devel
On Thu, Nov 29, 2007 at 06:18:32PM +0900, Magnus Damm wrote:
> On Nov 29, 2007 2:55 PM, Paul Mundt <lethal@linux-sh.org> wrote:
> > The T-state check for bt/bf happens _prior_ to execution of the delay
> > slot instruction, while any delay-slot resident T-bit modifier is
> > executed by the time we enter the branch. I don't know if your code
> > handles that or not, but figured it's probably good to make that
> > explicit. T-bit modifiers are always a bit hairy..
>
> Yeah, the T flag is indeed a bit hairy. The patch should handle the T
> bit exactly as you describe it. As part of the bt/bf instruction the T
> flag is evaluated and if true the the DELAY_SLOT_TRUE bit is set. This
> bit is later used to decide if we should jump or not after the delay
> slot instruction.
>
Ok, I have no objection to these changes then.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2007-11-29 9:30 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-28 9:54 [Qemu-devel] [PATCH]: sh4 delay slot code update Magnus Damm
2007-11-28 12:49 ` Paul Mundt
2007-11-29 5:43 ` Magnus Damm
2007-11-29 5:55 ` Paul Mundt
2007-11-29 9:18 ` Magnus Damm
2007-11-29 9:30 ` Paul Mundt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).