qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
@ 2009-04-10  8:54 Aurelien Jarno
  2009-04-10 14:44 ` Paul Brook
  0 siblings, 1 reply; 7+ messages in thread
From: Aurelien Jarno @ 2009-04-10  8:54 UTC (permalink / raw)
  To: qemu-devel

Currently the memory used to spill registers is allocated on demand from
a frame, and deallocated at the beginning of each TB.

This patch changes that so that the memory is allocated only once at
startup. As we don't know what will be the usage of the registers, this
increase the size of cpu_env by 1.5 kB of memory on 32-bit hosts and
3 kB of memory on 64-bit hosts. Those values can probably be reduced
later as I doubt any target is actually using 512 TCG registers (256
looks a saner value). On the other hand, this makes sure we always have
enough available memory to spill registers.

There is a small speed gain between 0.2 and 0.4%, but the most
important is that it makes the code simpler and more readable. This also
allow saving variables into memory from tcg-target.c (for the yet to
come slow path optimization).

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 cpu-defs.h |    2 +-
 tcg/tcg.c  |   36 +++++++++++-------------------------
 tcg/tcg.h  |    5 -----
 3 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/cpu-defs.h b/cpu-defs.h
index b462a9f..9c2fcec 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -158,7 +158,7 @@ typedef struct CPUWatchpoint {
     TAILQ_ENTRY(CPUWatchpoint) entry;
 } CPUWatchpoint;
 
-#define CPU_TEMP_BUF_NLONGS 128
+#define CPU_TEMP_BUF_NLONGS 512
 #define CPU_COMMON                                                      \
     struct TranslationBlock *current_tb; /* currently executing TB  */  \
     /* soft mmu support */                                              \
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c64043f..d2e720f 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -245,9 +245,17 @@ void tcg_context_init(TCGContext *s)
 void tcg_set_frame(TCGContext *s, int reg,
                    tcg_target_long start, tcg_target_long size)
 {
-    s->frame_start = start;
-    s->frame_end = start + size;
-    s->frame_reg = reg;
+    int i;
+
+    assert(size != TCG_MAX_TEMPS * sizeof(tcg_target_long));
+
+    for (i = 0 ; i < TCG_MAX_TEMPS ; i++) {
+        TCGTemp *ts;
+        ts = &s->temps[i];
+        ts->mem_reg = reg;
+        ts->mem_offset = start;
+        start += sizeof(tcg_target_long);
+    }
 }
 
 void tcg_func_start(TCGContext *s)
@@ -259,7 +267,6 @@ void tcg_func_start(TCGContext *s)
         s->first_free_temp[i] = -1;
     s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
     s->nb_labels = 0;
-    s->current_frame_offset = s->frame_start;
 
     gen_opc_ptr = gen_opc_buf;
     gen_opparam_ptr = gen_opparam_buf;
@@ -330,7 +337,6 @@ static inline int tcg_global_mem_new_internal(TCGType type, int reg,
         ts->base_type = type;
         ts->type = TCG_TYPE_I32;
         ts->fixed_reg = 0;
-        ts->mem_allocated = 1;
         ts->mem_reg = reg;
 #ifdef TCG_TARGET_WORDS_BIGENDIAN
         ts->mem_offset = offset + 4;
@@ -345,7 +351,6 @@ static inline int tcg_global_mem_new_internal(TCGType type, int reg,
         ts->base_type = type;
         ts->type = TCG_TYPE_I32;
         ts->fixed_reg = 0;
-        ts->mem_allocated = 1;
         ts->mem_reg = reg;
 #ifdef TCG_TARGET_WORDS_BIGENDIAN
         ts->mem_offset = offset;
@@ -365,7 +370,6 @@ static inline int tcg_global_mem_new_internal(TCGType type, int reg,
         ts->base_type = type;
         ts->type = type;
         ts->fixed_reg = 0;
-        ts->mem_allocated = 1;
         ts->mem_reg = reg;
         ts->mem_offset = offset;
         ts->name = name;
@@ -679,7 +683,6 @@ static void tcg_reg_alloc_start(TCGContext *s)
     for(i = s->nb_globals; i < s->nb_temps; i++) {
         ts = &s->temps[i];
         ts->val_type = TEMP_VAL_DEAD;
-        ts->mem_allocated = 0;
         ts->fixed_reg = 0;
     }
     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
@@ -1302,19 +1305,6 @@ static void check_regs(TCGContext *s)
 }
 #endif
 
-static void temp_allocate_frame(TCGContext *s, int temp)
-{
-    TCGTemp *ts;
-    ts = &s->temps[temp];
-    s->current_frame_offset = (s->current_frame_offset + sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1);
-    if (s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end)
-        tcg_abort();
-    ts->mem_offset = s->current_frame_offset;
-    ts->mem_reg = s->frame_reg;
-    ts->mem_allocated = 1;
-    s->current_frame_offset += sizeof(tcg_target_long);
-}
-
 /* free register 'reg' by spilling the corresponding temporary if necessary */
 static void tcg_reg_free(TCGContext *s, int reg)
 {
@@ -1326,8 +1316,6 @@ static void tcg_reg_free(TCGContext *s, int reg)
         ts = &s->temps[temp];
         assert(ts->val_type == TEMP_VAL_REG);
         if (!ts->mem_coherent) {
-            if (!ts->mem_allocated) 
-                temp_allocate_frame(s, temp);
             tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
         }
         ts->val_type = TEMP_VAL_MEM;
@@ -1381,8 +1369,6 @@ static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
         case TEMP_VAL_CONST:
             reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 
                                 allocated_regs);
-            if (!ts->mem_allocated) 
-                temp_allocate_frame(s, temp);
             tcg_out_movi(s, ts->type, reg, ts->val);
             tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
             ts->val_type = TEMP_VAL_MEM;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index bf95d7e..1fd3c25 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -217,7 +217,6 @@ typedef struct TCGTemp {
     tcg_target_long mem_offset;
     unsigned int fixed_reg:1;
     unsigned int mem_coherent:1;
-    unsigned int mem_allocated:1;
     unsigned int temp_local:1; /* If true, the temp is saved accross
                                   basic blocks. Otherwise, it is not
                                   preserved accross basic blocks. */
@@ -259,10 +258,6 @@ struct TCGContext {
        into account fixed registers */
     int reg_to_temp[TCG_TARGET_NB_REGS];
     TCGRegSet reserved_regs;
-    tcg_target_long current_frame_offset;
-    tcg_target_long frame_start;
-    tcg_target_long frame_end;
-    int frame_reg;
 
     uint8_t *code_ptr;
     TCGTemp static_temps[TCG_MAX_TEMPS];
-- 
1.6.1.3


-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
  2009-04-10  8:54 [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup Aurelien Jarno
@ 2009-04-10 14:44 ` Paul Brook
  2009-04-10 14:56   ` Aurelien Jarno
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Brook @ 2009-04-10 14:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

On Friday 10 April 2009, Aurelien Jarno wrote:
> +    assert(size != TCG_MAX_TEMPS * sizeof(tcg_target_long));

Shouldn't this be >= ?

Paul

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
  2009-04-10 14:44 ` Paul Brook
@ 2009-04-10 14:56   ` Aurelien Jarno
  2009-04-10 15:20     ` Paul Brook
  0 siblings, 1 reply; 7+ messages in thread
From: Aurelien Jarno @ 2009-04-10 14:56 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

On Fri, Apr 10, 2009 at 03:44:55PM +0100, Paul Brook wrote:
> On Friday 10 April 2009, Aurelien Jarno wrote:
> > +    assert(size != TCG_MAX_TEMPS * sizeof(tcg_target_long));
> 
> Shouldn't this be >= ?
> 

When I wrote that, I thought that the two sizes have to match, but you
are right, the frame size can actually be bigger.

Any other comment?

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
  2009-04-10 14:56   ` Aurelien Jarno
@ 2009-04-10 15:20     ` Paul Brook
  2009-04-10 15:31       ` Aurelien Jarno
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Brook @ 2009-04-10 15:20 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

On Friday 10 April 2009, Aurelien Jarno wrote:
> On Fri, Apr 10, 2009 at 03:44:55PM +0100, Paul Brook wrote:
> > On Friday 10 April 2009, Aurelien Jarno wrote:
> > > +    assert(size != TCG_MAX_TEMPS * sizeof(tcg_target_long));
> >
> > Shouldn't this be >= ?
>
> When I wrote that, I thought that the two sizes have to match, 

In that case I think the assert is completely wrong. You're asserting that the 
two values are not equal. IIRC there's bits in the TCG headers that disable 
asserts by default - I've been bitten by this before.

> but you are right, the frame size can actually be bigger.
> Any other comment?

What were you benchmarking? Translation speed or execution speed?
I'd guess that your new code speeds up the translator, but makes the resulting 
code less cache friendly.

Maybe if I see your subsequent optimization it might make more sense, though 
I'm suspicious of tcg-target doing register allocation/spilling itself.

Paul

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
  2009-04-10 15:20     ` Paul Brook
@ 2009-04-10 15:31       ` Aurelien Jarno
  2009-04-10 15:52         ` Paul Brook
  0 siblings, 1 reply; 7+ messages in thread
From: Aurelien Jarno @ 2009-04-10 15:31 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

Paul Brook a écrit :
> On Friday 10 April 2009, Aurelien Jarno wrote:
>> On Fri, Apr 10, 2009 at 03:44:55PM +0100, Paul Brook wrote:
>>> On Friday 10 April 2009, Aurelien Jarno wrote:
>>>> +    assert(size != TCG_MAX_TEMPS * sizeof(tcg_target_long));
>>> Shouldn't this be >= ?
>> When I wrote that, I thought that the two sizes have to match, 
> 
> In that case I think the assert is completely wrong. You're asserting that the 
> two values are not equal. IIRC there's bits in the TCG headers that disable 
> asserts by default - I've been bitten by this before.

Correct.

>> but you are right, the frame size can actually be bigger.
>> Any other comment?
> 
> What were you benchmarking? Translation speed or execution speed?
> I'd guess that your new code speeds up the translator, but makes the resulting 
> code less cache friendly.

Execution speed, that is compilation of a small software in
qemu-system-mips64. I haven't try to do find which parts of QEMU goes
faster, but I agree with your hypothesis.

> Maybe if I see your subsequent optimization it might make more sense, though 
> I'm suspicious of tcg-target doing register allocation/spilling itself.

I am trying to fix the following comment from Fabrice (tcg.c)

/* XXX: for load/store we could do that only for the slow path
   (i.e. when a memory callback is called) */

The idea is not to do register allocation/spilling in tcg-target.c, but
to save registers containing TCG globals to memory in the slow path only
and without touching the TCGTemp structure. That's why it has to be done
in tcg-target.c

I currenty only have an hackish patch that does not fully work. I guess
the best is to resent the current patch along with the slow path
optimization one when it works.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
  2009-04-10 15:31       ` Aurelien Jarno
@ 2009-04-10 15:52         ` Paul Brook
  2009-04-10 16:06           ` Aurelien Jarno
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Brook @ 2009-04-10 15:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

> I am trying to fix the following comment from Fabrice (tcg.c)
>
> /* XXX: for load/store we could do that only for the slow path
>    (i.e. when a memory callback is called) */
>
> The idea is not to do register allocation/spilling in tcg-target.c, but
> to save registers containing TCG globals to memory in the slow path only
> and without touching the TCGTemp structure. That's why it has to be done
> in tcg-target.c

Hmm, you don't actually have to allocate a slot, just you need to stash the 
values somewhere while you're making the slow call, then restore them 
afterwards. The bits that do need writing back to home locations (i.e. 
visible guest cpu state) already have memory locations.

Paul

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup
  2009-04-10 15:52         ` Paul Brook
@ 2009-04-10 16:06           ` Aurelien Jarno
  0 siblings, 0 replies; 7+ messages in thread
From: Aurelien Jarno @ 2009-04-10 16:06 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

Paul Brook a écrit :
>> I am trying to fix the following comment from Fabrice (tcg.c)
>>
>> /* XXX: for load/store we could do that only for the slow path
>>    (i.e. when a memory callback is called) */
>>
>> The idea is not to do register allocation/spilling in tcg-target.c, but
>> to save registers containing TCG globals to memory in the slow path only
>> and without touching the TCGTemp structure. That's why it has to be done
>> in tcg-target.c
> 
> Hmm, you don't actually have to allocate a slot, just you need to stash the 
> values somewhere while you're making the slow call, then restore them 
> afterwards. The bits that do need writing back to home locations (i.e. 
> visible guest cpu state) already have memory locations.

For TCG globals, that's true.

But the plan is also to reduce the list of clobbered registers when the
fast path is taken (e.g. only 2 registers are used by the load/store
code on x86_64). This means that the other clobbered registers according
to the ABI have to be saved/restored when taking the slow path. This
looks like the best location to save those registers.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-04-10 16:07 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-10  8:54 [Qemu-devel] [RFC][PATCH] tcg: allocate memory to spill registers on startup Aurelien Jarno
2009-04-10 14:44 ` Paul Brook
2009-04-10 14:56   ` Aurelien Jarno
2009-04-10 15:20     ` Paul Brook
2009-04-10 15:31       ` Aurelien Jarno
2009-04-10 15:52         ` Paul Brook
2009-04-10 16:06           ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).