All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: Mike Frysinger <vapier@gentoo.org>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 2/5] Blackfin: initial port
Date: Tue, 25 Jun 2013 14:23:57 -0700	[thread overview]
Message-ID: <51CA0A6D.8030803@twiddle.net> (raw)
In-Reply-To: <1371453383-11484-1-git-send-email-vapier@gentoo.org>

> diff --git a/target-bfin/bfin-sim.c b/target-bfin/bfin-sim.c

Why this separate file from translate.c?

> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <inttypes.h>

Certainly you shouldn't need these, since this isn't a separately
compiled object -- you're included from translate.c.

> +static void
> +unhandled_instruction(DisasContext *dc, const char *insn)
> +{
> +    fprintf(stderr, "unhandled insn: %s\n", insn);

Use LOG_UNIMP.

> +#define HOST_LONG_WORD_SIZE (sizeof(long) * 8)

You mean TCG_TARGET_REG_BITS?

> +static TCGv
> +get_allreg(DisasContext *dc, int grp, int reg)
> +{
> +    TCGv *ret = cpu_regs[(grp << 3) | reg];
> +    if (ret) {
> +       return *ret;
> +    }
> +    abort();
> +    illegal_instruction(dc);
> +}

Well, which is it?  abort or illegal_instruction.  And come to that, how is
abort any better than SEGV from dereferecing the null?  Certainly the later
will generate a faster translator...

> +decode_multfunc_tl(DisasContext *dc, int h0, int h1, int src0, int src1,
> +                   int mmod, int MM, TCGv psat)
> +{
> +    TCGv s0, s1, val;
> +
> +    s0 = tcg_temp_local_new();

You'll really want to avoid local temps and branches, if at all possible.  For
some of the more complex stuff that you're open-coding, you may be better off
with helper functions instead.

> +        l = gen_new_label();
> +        endl = gen_new_label();
> +
> +        tcg_gen_brcondi_tl(TCG_COND_NE, val, 0x40000000, l);
> +        if (mmod == M_W32) {
> +            tcg_gen_movi_tl(val, 0x7fffffff);
> +        } else {
> +            tcg_gen_movi_tl(val, 0x80000000);
> +        }
> +        tcg_gen_movi_tl(psat, 1);
> +        tcg_gen_br(endl);
> +
> +        gen_set_label(l);
> +        tcg_gen_shli_tl(val, val, 1);
> +
> +        gen_set_label(endl);

Certainly possible here with 2 movcond, or 1 movcond, 1 setcond + 1 or.

> +    l = gen_new_label();
> +    tcg_gen_brcondi_tl(TCG_COND_EQ, psat, 0, l);
> +    tcg_gen_ext32u_i64(val1, val1);
> +    gen_set_label(l);

movcond again.

> +static void
> +saturate_s32(TCGv_i64 val, TCGv overflow)

I shall now stop mentioning movcond.  I sense there are many locations to come.

> +    } else if (prgfunc == 11 && poprnd < 6) {
> +        /* TESTSET (Preg{poprnd}); */
> +        TCGv tmp = tcg_temp_new();
> +        tcg_gen_qemu_ld8u(tmp, cpu_preg[poprnd], dc->mem_idx);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
> +        tcg_gen_ori_tl(tmp, tmp, 0x80);
> +        tcg_gen_qemu_st8(tmp, cpu_preg[poprnd], dc->mem_idx);
> +        tcg_temp_free(tmp);

I'll note that this is fine for system code, but for user code ought to be
atomic.  There are a bunch of really bad examples in the tree, and no real
good solutions atm.

> +    /* Can't push/pop reserved registers */
> +    /*if (reg_is_reserved(grp, reg))
> +        illegal_instruction(dc);*/

No commented out code like this.

> +    /* Everything here needs to be aligned, so check once */
> +    gen_align_check(dc, cpu_spreg, 4, false);

You ought not need to generate explicit alignment checks.  Yes, we don't do
that correctly for user-mode, but we do for system mode.

My hope is that user mode eventually has the option of using the system mode
page tables too -- there are just too many things that don't work correctly
when host and target page sizes don't match, or the host and target don't have
the same unaligned access characteristics.

> +        } else if (grp == 4 && (reg == 0 || reg == 2)) {
> +            /* Pop A#.X */
> +            tmp = tcg_temp_new();
> +            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
> +            tcg_gen_andi_tl(tmp, tmp, 0xff);
> +            tmp64 = tcg_temp_new_i64();
> +            tcg_gen_extu_i32_i64(tmp64, tmp);
> +            tcg_temp_free(tmp);
> +
> +            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xffffffff);
> +            tcg_gen_shli_i64(tmp64, tmp64, 32);
> +            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
> +            tcg_temp_free_i64(tmp64);

Drop the andi with 0xff and use

tcg_gen_deposit_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64, 32, 8)

> +        } else if (grp == 4 && (reg == 1 || reg == 3)) {
> +            /* Pop A#.W */
> +            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xff00000000);
> +            tmp = tcg_temp_new();
> +            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
> +            tmp64 = tcg_temp_new_i64();
> +            tcg_gen_extu_i32_i64(tmp64, tmp);
> +            tcg_temp_free(tmp);
> +            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
> +            tcg_temp_free_i64(tmp64);

And then this one becomes deposit(areg, areg, tmp64, 0, 32).

> +        } else if (grp == 4 && (reg == 0 || reg == 2)) {
> +            /* Push A#.X */
> +            tmp64 = tcg_temp_new_i64();
> +            tcg_gen_shri_i64(tmp64, cpu_areg[reg >> 1], 32);
> +            tmp = tcg_temp_new();
> +            tcg_gen_trunc_i64_i32(tmp, tmp64);
> +            tcg_temp_free_i64(tmp64);
> +            tcg_gen_andi_tl(tmp, tmp, 0xff);

Do we ever allow the high 24 bits to be non-zero?  Is this andi actually redundant?

> +    if (W == 1) {
> +        /* [--SP] = ({d}R7:imm{dr}, {p}P5:imm{pr}); */
> +        if (d) {
> +            for (i = dr; i < 8; i++) {
> +                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> +                tcg_gen_qemu_st32(cpu_dreg[i], cpu_spreg, dc->mem_idx);
> +            }
> +        }

What's the cpu exception effect of the second store causing a page fault?
Normally one needs to do the address increment in a temporary and only update
the real SP register at the end, so that the instruction can be restarted.

> +    /* CC = CC; is invalid.  */
> +    if (cbit == 5)
> +        illegal_instruction(dc);

Please handle all checkpatch.pl style errors.

> +    if (opc == 0) {
> +        /* CC = ! BITTST (Dreg{dst}, imm{uimm}); */
> +        tmp = tcg_temp_new();
> +        tcg_gen_movi_tl(tmp, 1 << uimm);
> +        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
> +        tcg_temp_free(tmp);
> +    } else if (opc == 1) {
> +        /* CC = BITTST (Dreg{dst}, imm{uimm}); */
> +        tmp = tcg_temp_new();
> +        tcg_gen_movi_tl(tmp, 1 << uimm);
> +        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
> +        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_cc, tmp, 0);
> +        tcg_temp_free(tmp);

You're writing

	(x & (1 << I)) != 0

whereas the alternative

	(x >> I) & 1

does not require the setcond, and will be faster on most hosts.

> +    if (aop == 1 && W == 0 && idx == ptr) {
> +        /* Dreg_lo{reg} = W[Preg{ptr}]; */
> +        tmp = tcg_temp_local_new();
> +        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff0000);
> +        gen_aligned_qemu_ld16u(dc, tmp, cpu_preg[ptr]);
> +        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
> +        tcg_temp_free(tmp);

Deposit again.  Lots of instances in this function.

> +        /* LINK imm{framesize}; */
> +        int size = uimm16s4(framesize);
> +        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> +        tcg_gen_qemu_st32(cpu_rets, cpu_spreg, dc->mem_idx);
> +        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> +        tcg_gen_qemu_st32(cpu_fpreg, cpu_spreg, dc->mem_idx);
> +        tcg_gen_mov_tl(cpu_fpreg, cpu_spreg);
> +        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, size);
> +    } else if (framesize == 0) {
> +        /* UNLINK; */
> +        /* Restore SP from FP.  */
> +        tcg_gen_mov_tl(cpu_spreg, cpu_fpreg);
> +        tcg_gen_qemu_ld32u(cpu_fpreg, cpu_spreg, dc->mem_idx);
> +        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
> +        tcg_gen_qemu_ld32u(cpu_rets, cpu_spreg, dc->mem_idx);
> +        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);

Similarly to push/pop multiple wrt intermediate SP.

> +    if ((aop == 0 || aop == 2) && aopcde == 9 && HL == 0 && s == 0) {
> +        int a = aop >> 1;
> +        /* Areg_lo{a} = Dreg_lo{src0}; */
> +        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], ~0xffff);
> +        tmp64 = tcg_temp_new_i64();
> +        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
> +        tcg_gen_andi_i64(tmp64, tmp64, 0xffff);
> +        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
> +        tcg_temp_free_i64(tmp64);

More deposits in this function.  I'll stop mentioning them, but pretty much
every place you touch aregs can use this.

> +#include "linux-fixed-code.h"
> +
> +static uint32_t bfin_lduw_code(DisasContext *dc, target_ulong pc)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    /* Intercept jump to the magic kernel page */
> +    if (((dc->env->personality & 0xff/*PER_MASK*/) == 0/*PER_LINUX*/) &&
> +        (pc & 0xFFFFFF00) == 0x400) {
> +        uint32_t off = pc - 0x400;
> +        if (off < sizeof(bfin_linux_fixed_code)) {
> +            return ((uint16_t)bfin_linux_fixed_code[off + 1] << 8) |
> +                   bfin_linux_fixed_code[off];
> +        }
> +    }
> +#endif

Surely this memory setup belongs in linux-user/.

> +/* Interpret a single Blackfin insn; breaks up parallel insns */
> +static void
> +interp_insn_bfin(DisasContext *dc)
> +{
> +    _interp_insn_bfin(dc, dc->pc);

I'd prefer a suffix like "1" rather than a prefix of "_".

> +typedef struct CPUBfinState {
> +    CPU_COMMON

COMMON should come last, or just about.
Certainly the cpu registers should come first, for most
efficient translation access on the host.

> +static inline void bfin_astat_write(CPUArchState *env, uint32_t astat)
> +{
> +    unsigned int i;
> +    for (i = 0; i < 32; ++i)
> +        env->astat[i] = !!(astat & (1 << i));

 = (astat >> i) & 1

> +typedef void (*hwloop_callback)(struct DisasContext *dc, int loop);
> +
> +typedef struct DisasContext {
> +    CPUArchState *env;
> +    struct TranslationBlock *tb;
> +    /* The current PC we're decoding (could be middle of parallel insn) */
> +    target_ulong pc;
> +    /* Length of current insn (2/4/8) */
> +    target_ulong insn_len;
> +
> +    /* For delayed ASTAT handling */
> +    enum astat_ops astat_op;
> +
> +    /* For hardware lop processing */
> +    hwloop_callback hwloop_callback;
> +    void *hwloop_data;
> +
> +    /* Was a DISALGNEXCPT used in this parallel insn ? */
> +    int disalgnexcpt;
> +
> +    int is_jmp;
> +    int mem_idx;
> +} DisasContext;

Really, this type should be private to translate.c.

> +static inline void cpu_get_tb_cpu_state(CPUArchState *env, target_ulong *pc,
> +                                        target_ulong *cs_base, int *flags)
> +{
> +    *pc = cpu_get_pc(env);
> +    *cs_base = 0;
> +    *flags = env->astat[ASTAT_RND_MOD];
> +}

You'll probably be better off with a bit that notes whether the loop registers
are active, or something, so that you don't have to always generate code that
handles them.

> +DEF_HELPER_3(raise_exception, void, env, i32, i32)

Lots of these can use better settings for flags.  Here, the only side effect is
to raise an exception, which leads to reading the globals.  So TCG_CALL_NO_WG.

> +DEF_HELPER_5(memalign, void, env, i32, i32, i32, i32)
> +
> +DEF_HELPER_4(dbga_l, void, env, i32, i32, i32)
> +DEF_HELPER_4(dbga_h, void, env, i32, i32, i32)

Likewise.

> +/* Count the number of bits set to 1 in the 32bit value */
> +uint32_t HELPER(ones)(uint32_t val)
> +{
> +    uint32_t i;
> +    uint32_t ret;
> +
> +    ret = 0;
> +    for (i = 0; i < 32; ++i)
> +        ret += !!(val & (1 << i));

ctpop32.

> +/* Count number of leading bits that match the sign bit */
> +uint32_t HELPER(signbits)(uint32_t val, uint32_t size)
...
> +/* Count number of leading bits that match the sign bit */
> +uint32_t HELPER(signbits_64)(uint64_t val, uint32_t size)

Surely we can make some use of clz here.  But I guess for now this is ok.

> +static void gen_goto_tb(DisasContext *dc, int tb_num, TCGv dest)
> +{
> +/*
> +    TranslationBlock *tb;
> +    tb = dc->tb;
> +
> +    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
> +        tcg_gen_goto_tb(tb_num);
> +        tcg_gen_mov_tl(cpu_pc, dest);
> +        tcg_gen_exit_tb((long)tb + tb_num);
> +    } else */{
> +        gen_astat_update(dc, false);
> +        tcg_gen_mov_tl(cpu_pc, dest);
> +        tcg_gen_exit_tb(0);
> +    }

Why the astat update here, when you have it on almost no other exits from the tb?

> +        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
> +            tcg_gen_debug_insn_start(dc->pc);

CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT


r~

  parent reply	other threads:[~2013-06-25 21:24 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-17  7:13 [Qemu-devel] [PATCH 0/5] Initial Blackfin support (linux-user only) Mike Frysinger
2013-06-17  7:13 ` [Qemu-devel] [PATCH 1/5] Blackfin: add disassembler support Mike Frysinger
2013-06-17  7:16 ` [Qemu-devel] [PATCH 2/5] Blackfin: initial port Mike Frysinger
2013-06-17  7:16   ` [Qemu-devel] [PATCH 3/5] Blackfin: add linux-user support Mike Frysinger
2013-06-17  7:16   ` [Qemu-devel] [PATCH 4/5] Blackfin: add test suite Mike Frysinger
2013-06-17  7:16   ` [Qemu-devel] [PATCH 5/5] linux-user: add support for Blackfin syscalls Mike Frysinger
2013-06-25 21:23   ` Richard Henderson [this message]
2013-06-25 23:14     ` [Qemu-devel] [PATCH 2/5] Blackfin: initial port Mike Frysinger
2013-06-28 13:47   ` Eric Blake
2013-06-28 14:24   ` Andreas Färber

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51CA0A6D.8030803@twiddle.net \
    --to=rth@twiddle.net \
    --cc=qemu-devel@nongnu.org \
    --cc=vapier@gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.