(this is mostly to get some ideas going rather than trying to get anything upstream... yet!)
This version of tcg_out_mov for i386's tcg-target.c filters out the
mov %ebx, %edx
mov %ebx, [some index]
mov %edx, %ebx
I don't have benchmarks, but it does remove a few mov's and qemu can still load and run a linux kernel ;)
It'd be easier to do more optimizations if qemu recorded output at the assembly instruction level.
---
static uint8_t *lmovloc = 0;
static int lmovret = -1, lmovarg = -1;
static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
{
int ldiff, nowrite = 0;
if (arg != ret) {
/* Check for a mov, mov->x, mov pattern */
ldiff = s->code_ptr - lmovloc;
if (((ldiff == 8) || (ldiff == 5)) &&
(*(lmovloc + 2) == 0x89) &&
((lmovret == ret) && (lmovarg == arg))) nowrite = 1;
/* Write */
lmovloc = s->code_ptr;
if (!nowrite) {
tcg_out_modrm(s, 0x8b, ret, arg);
} else {
qemu_log("removed\n");
}
lmovret = arg; lmovarg = ret;
}
}