From: Vladimir Prus <vladimir@codesourcery.com>
To: "Edgar E. Iglesias" <edgar.iglesias@axis.com>
Cc: Shin-ichiro KAWASAKI <kawasaki@juno.dti.ne.jp>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] SH: Fix movca.l/ocbi emulation.
Date: Wed, 1 Apr 2009 17:36:43 +0400 [thread overview]
Message-ID: <200904011736.43530.vladimir@codesourcery.com> (raw)
In-Reply-To: <20090401124449.GB24403@edgar.se.axis.com>
[-- Attachment #1: Type: Text/Plain, Size: 3531 bytes --]
On Wednesday 01 April 2009 16:44:49 Edgar E. Iglesias wrote:
Hi Edgar,
> > I am afraid I might not be the best person to accurately catch all cases
> > where cache might be enabled or disabled. KAWASAKI-san has posted a function
> > that can be used to check if caching is enabled, in:
> >
> > http://article.gmane.org/gmane.comp.emulators.qemu/36348
> >
> > Does that one address your suggestion? If so, I can incorporate that function,
> > and checking for it.
>
> I dont have the manuals here but yes, I think something like that will be
> needed. On other archs I've seen ppl do memcpy to IO areas, don't know
> about sh but to be on the safe side we better not do any funny stuff
> with uncached stores.
I have added the check, using the routine provided by Kawasaki-san.
> > diff --git a/cpu-exec.c b/cpu-exec.c
> > index cf7c1fb..b0f0b5e 100644
> > --- a/cpu-exec.c
> > +++ b/cpu-exec.c
> > @@ -174,7 +174,7 @@ static inline TranslationBlock *tb_find_fast(void)
> > /* we record a subset of the CPU state. It will
> > always be the same before a given translated block
> > is executed. */
> > - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> > + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> ^^^
> Stray spaces.
Doh -- I had unsaved emacs buffer with this issue fixed.
> > tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
> > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
> > tb->flags != flags)) {
> > diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
> > index e9eee2c..67741f6 100644
> > --- a/target-sh4/cpu.h
> > +++ b/target-sh4/cpu.h
> > @@ -100,6 +100,12 @@ enum sh_features {
> > SH_FEATURE_BCR3_AND_BCR4 = 2,
> > };
> >
> > +typedef struct memory_content_t {
> > + uint32_t address;
> > + uint32_t value;
> > + struct memory_content_t *next;
> > +} memory_content_t;
> > +
>
> Please drop the _t (reserved names).
Fixed.
> > typedef struct CPUSH4State {
> > int id; /* CPU model */
> >
> > @@ -149,6 +155,8 @@ typedef struct CPUSH4State {
> > tlb_t itlb[ITLB_SIZE]; /* instruction translation table */
> > void *intc_handle;
> > int intr_at_halt; /* SR_BL ignored during sleep */
> > + memory_content_t *movcal_backup;
> > + memory_content_t **movcal_backup_tail;
>
> Do you really need the _tail member?
> Seems to me like you're only maintaining it but not really using it.
See helper_movcal:
*(env->movcal_backup_tail) = r;
I think having movcal backup list in-order is good. Say, if we have two
movcal to the same address and ocbi, we want to restore the saved when
the first movcal was executed, and given that helper_ocbi traversed
the list from the front we need to put elements in-order.
> > +void helper_ocbi(uint32_t address)
> > +{
> > + memory_content_t **current = &(env->movcal_backup);
> > + while (*current)
> > + {
> > + uint32_t a = (*current)->address;
> > + if ((a & ~0x1F) == (address & ~0x1F))
> > + {
> > + memory_content_t *next = (*current)->next;
> > +
> > + stl_data(a, (*current)->value);
> > +
> > + if (next == 0)
> > + {
> > + env->movcal_backup_tail = current;
> > + }
> > +
> > + free (*current);
> > + *current = next;
> > + break;
> > + }
>
> The indentation got messed up here.
I have fixed it. Revised patch is attached.
Thanks,
Volodya
[-- Attachment #2: movcal-2.diff --]
[-- Type: text/x-patch, Size: 9997 bytes --]
commit d5caa92b7e792c526306b3438e697bfab3dd0c03
Author: Vladimir Prus <vladimir@codesourcery.com>
Date: Tue Nov 11 12:29:02 2008 +0300
Fix movcal.l/ocbi emulation.
* target-sh4/cpu.h (memory_content_t): New.
(CPUSH4State): New fields movcal_backup and movcal_backup_tail.
* target-sh4/helper.h (helper_movcal)
(helper_discard_movcal_backup, helper_ocbi): New.
* target-sh4/op_helper.c (helper_movcal)
(helper_discard_movcal_backup, helper_ocbi): New.
* target-sh4/translate.c (DisasContext): New field has_movcal.
(sh4_defs): Update CVS for SH7785.
(cpu_sh4_init): Initialize env->movcal_backup_tail.
(_decode_opc): Discard movca.l-backup.
Make use of helper_movcal and helper_ocbi.
(gen_intermediate_code_internal): Initialize has_movcal to 1.
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index e9eee2c..d754512 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -100,6 +100,12 @@ enum sh_features {
SH_FEATURE_BCR3_AND_BCR4 = 2,
};
+typedef struct memory_content {
+ uint32_t address;
+ uint32_t value;
+ struct memory_content *next;
+} memory_content;
+
typedef struct CPUSH4State {
int id; /* CPU model */
@@ -149,6 +155,8 @@ typedef struct CPUSH4State {
tlb_t itlb[ITLB_SIZE]; /* instruction translation table */
void *intc_handle;
int intr_at_halt; /* SR_BL ignored during sleep */
+ memory_content *movcal_backup;
+ memory_content **movcal_backup_tail;
} CPUSH4State;
CPUSH4State *cpu_sh4_init(const char *cpu_model);
@@ -163,6 +171,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr,
uint32_t mem_value);
+int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr);
+
static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls)
{
env->gbr = newtls;
@@ -294,16 +304,19 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
env->flags = tb->flags;
}
+#define TB_FLAG_PENDING_MOVCA (1 << 4)
+
static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
*pc = env->pc;
*cs_base = 0;
*flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
- | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */
- | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
+ | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */
+ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
| (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */
- | (env->sr & SR_FD); /* Bit 15 */
+ | (env->sr & SR_FD) /* Bit 15 */
+ | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */
}
#endif /* _CPU_SH4_H */
diff --git a/target-sh4/helper.c b/target-sh4/helper.c
index f1feca7..5fa4fb4 100644
--- a/target-sh4/helper.c
+++ b/target-sh4/helper.c
@@ -652,4 +652,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr,
}
}
+int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr)
+{
+ int n;
+ int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0;
+
+ /* check area */
+ if (env->sr & SR_MD) {
+ /* For previledged mode, P2 and P4 area is not cachable. */
+ if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr)
+ return 0;
+ } else {
+ /* For user mode, only U0 area is cachable. */
+ if (0x80000000 <= addr)
+ return 0;
+ }
+
+ /*
+ * TODO : Evaluate CCR and check if the cache is on or off.
+ * Now CCR is not in CPUSH4State, but in SH7750State.
+ * When you move the ccr inot CPUSH4State, the code will be
+ * as follows.
+ */
+#if 0
+ /* check if operand cache is enabled or not. */
+ if (!(env->ccr & 1))
+ return 0;
+#endif
+
+ /* if MMU is off, no check for TLB. */
+ if (env->mmucr & MMUCR_AT)
+ return 1;
+
+ /* check TLB */
+ n = find_tlb_entry(env, addr, env->itlb, ITLB_SIZE, use_asid);
+ if (n >= 0)
+ return env->itlb[n].c;
+
+ n = find_tlb_entry(env, addr, env->utlb, UTLB_SIZE, use_asid);
+ if (n >= 0)
+ return env->utlb[n].c;
+
+ return 0;
+}
+
#endif
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index e665185..4b2fcdd 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void)
DEF_HELPER_1(sleep, void, i32)
DEF_HELPER_1(trapa, void, i32)
+DEF_HELPER_2(movcal, void, i32, i32)
+DEF_HELPER_0(discard_movcal_backup, void)
+DEF_HELPER_1(ocbi, void, i32)
+
DEF_HELPER_2(addv, i32, i32, i32)
DEF_HELPER_2(addc, i32, i32, i32)
DEF_HELPER_2(subv, i32, i32, i32)
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 84e1ad3..90c94dc 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
*/
#include <assert.h>
+#include <stdlib.h>
#include "exec.h"
#include "helper.h"
@@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra)
cpu_loop_exit();
}
+void helper_movcal(uint32_t address, uint32_t value)
+{
+ if (cpu_sh4_is_cached (env, address))
+ {
+ memory_content *r = (memory_content *)malloc (sizeof(memory_content));
+ r->address = address;
+ r->value = value;
+ r->next = NULL;
+
+ *(env->movcal_backup_tail) = r;
+ env->movcal_backup_tail = &(r->next);
+ }
+}
+
+void helper_discard_movcal_backup(void)
+{
+ memory_content *current = env->movcal_backup;
+
+ while(current)
+ {
+ memory_content *next = current->next;
+ free (current);
+ env->movcal_backup = current = next;
+ if (current == 0)
+ env->movcal_backup_tail = &(env->movcal_backup);
+ }
+}
+
+void helper_ocbi(uint32_t address)
+{
+ memory_content **current = &(env->movcal_backup);
+ while (*current)
+ {
+ uint32_t a = (*current)->address;
+ if ((a & ~0x1F) == (address & ~0x1F))
+ {
+ memory_content *next = (*current)->next;
+ stl_data(a, (*current)->value);
+
+ if (next == 0)
+ {
+ env->movcal_backup_tail = current;
+ }
+
+ free (*current);
+ *current = next;
+ break;
+ }
+ }
+}
+
uint32_t helper_addc(uint32_t arg0, uint32_t arg1)
{
uint32_t tmp0, tmp1;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index cc9f886..4ced176 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -50,6 +50,7 @@ typedef struct DisasContext {
uint32_t delayed_pc;
int singlestep_enabled;
uint32_t features;
+ int has_movcal;
} DisasContext;
#if defined(CONFIG_USER_ONLY)
@@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model)
env = qemu_mallocz(sizeof(CPUSH4State));
env->features = def->features;
cpu_exec_init(env);
+ env->movcal_backup_tail = &(env->movcal_backup);
sh4_translate_init();
env->cpu_model_str = cpu_model;
cpu_sh4_reset(env);
@@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
static void _decode_opc(DisasContext * ctx)
{
+ /* This code tries to make movcal emulation sufficiently
+ accurate for Linux purposes. This instruction writes
+ memory, and prior to that, always allocates a cache line.
+ It is used in two contexts:
+ - in memcpy, where data is copied in blocks, the first write
+ of to a block uses movca.l for performance.
+ - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used
+ to flush the cache. Here, the data written by movcal.l is never
+ written to memory, and the data written is just bogus.
+
+ To simulate this, we simulate movcal.l, we store the value to memory,
+ but we also remember the previous content. If we see ocbi, we check
+ if movcal.l for that address was done previously. If so, the write should
+ not have hit the memory, so we restore the previous content.
+ When we see an instruction that is neither movca.l
+ nor ocbi, the previous content is discarded.
+
+ To optimize, we only try to flush stores when we're at the start of
+ TB, or if we already saw movca.l in this TB and did not flush stores
+ yet. */
+ if (ctx->has_movcal)
+ {
+ int opcode = ctx->opcode & 0xf0ff;
+ if (opcode != 0x0093 /* ocbi */
+ && opcode != 0x00c3 /* movca.l */)
+ {
+ gen_helper_discard_movcal_backup ();
+ ctx->has_movcal = 0;
+ }
+ }
+
#if 0
fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
#endif
@@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx)
}
return;
case 0x00c3: /* movca.l R0,@Rm */
- tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
+ {
+ TCGv val = tcg_temp_new();
+ tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx);
+ gen_helper_movcal (REG(B11_8), val);
+ tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
+ }
+ ctx->has_movcal = 1;
return;
case 0x40a9:
/* MOVUA.L @Rm,R0 (Rm) -> R0
@@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx)
break;
case 0x0093: /* ocbi @Rn */
{
- TCGv dummy = tcg_temp_new();
- tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx);
- tcg_temp_free(dummy);
+ gen_helper_ocbi (REG(B11_8));
}
return;
case 0x00a3: /* ocbp @Rn */
@@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
ctx.tb = tb;
ctx.singlestep_enabled = env->singlestep_enabled;
ctx.features = env->features;
+ ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA);
#ifdef DEBUG_DISAS
qemu_log_mask(CPU_LOG_TB_CPU,
next prev parent reply other threads:[~2009-04-01 13:36 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-11 18:34 [Qemu-devel] SH: Fix movca.l/ocbi emulation Vladimir Prus
2009-01-14 20:45 ` Edgar E. Iglesias
2009-03-31 20:58 ` Vladimir Prus
2009-04-01 12:44 ` Edgar E. Iglesias
2009-04-01 13:36 ` Vladimir Prus [this message]
2009-04-01 13:48 ` Edgar E. Iglesias
2009-01-18 16:38 ` Shin-ichiro KAWASAKI
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200904011736.43530.vladimir@codesourcery.com \
--to=vladimir@codesourcery.com \
--cc=edgar.iglesias@axis.com \
--cc=kawasaki@juno.dti.ne.jp \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).