[Qemu-devel] [RFC][PATCH] x86: CS limit checks

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [RFC][PATCH] x86: CS limit checks
@ 2008-07-17 11:57 Jan Kiszka
  2008-07-17 12:17 ` Paul Brook
  2008-07-17 14:06 ` [Qemu-devel] " Fabrice Bellard
  0 siblings, 2 replies; 12+ messages in thread
From: Jan Kiszka @ 2008-07-17 11:57 UTC (permalink / raw)
  To: qemu-devel

Here is a proposal for adding code segment limit checks to x86. This
patch should not need the -seg-checks switch as its tests are mostly
performed during translation time. Moreover, I tried to confine the
small additional overhead in the TB lookup procedure to x86 and Sparc.

Note that this patch depends on my debugging series, namely [1], as that
one reduces the x86-specific code passages for TB generation. Also note
that this patch is early and only lightly tested so far, not yet
intended for inclusion, but definitely for commenting on!

Jan

[1] http://permalink.gmane.org/gmane.comp.emulators.qemu/26993

---
 cpu-exec.c              |   23 ++++++++++++++---------
 exec-all.h              |   29 +++++++++++++++++++++++++++--
 exec.c                  |    8 +++++---
 target-i386/translate.c |   15 ++++++++++++---
 4 files changed, 58 insertions(+), 17 deletions(-)

Index: b/exec-all.h
===================================================================
--- a/exec-all.h
+++ b/exec-all.h
@@ -78,8 +78,8 @@ int cpu_restore_state_copy(struct Transl
 void cpu_resume_from_signal(CPUState *env1, void *puc);
 void cpu_io_recompile(CPUState *env, void *retaddr);
 TranslationBlock *tb_gen_code(CPUState *env, 
-                              target_ulong pc, target_ulong cs_base, int flags,
-                              int cflags);
+                              target_ulong pc, target_ulong cs_base,
+                              target_ulong cs_limit, int flags, int cflags);
 void cpu_exec_init(CPUState *env);
 int page_unprotect(target_ulong address, unsigned long pc, void *puc);
 void tb_invalidate_phys_page_range(target_phys_addr_t start, target_phys_addr_t end,
@@ -125,6 +125,7 @@ static inline int tlb_set_page(CPUState
 struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
+    target_ulong cs_limit; /* CS limit for this block */
     uint64_t flags; /* flags defining in which context the code was generated */
     uint16_t size;      /* size of target code for this block (1 <=
                            size <= TARGET_PAGE_SIZE) */
@@ -157,6 +158,30 @@ struct TranslationBlock {
     uint32_t icount;
 };
 
+#if defined(TARGET_I386) || defined(TARGET_SPARC)
+static inline int tb_cs_base_equal(TranslationBlock *tb, target_ulong cs_base)
+{
+    return (tb->cs_base == cs_base);
+}
+#else
+static inline int tb_cs_base_equal(TranslationBlock *tb, target_ulong cs_base)
+{
+    return 1;
+}
+#endif
+
+#if defined(TARGET_I386)
+static inline int tb_cs_limit_equal(TranslationBlock *tb, target_ulong cs_limit)
+{
+    return (tb->cs_limit == cs_limit);
+}
+#else
+static inline int tb_cs_limit_equal(TranslationBlock *tb, target_ulong cs_limit)
+{
+    return 1;
+}
+#endif
+
 static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
 {
     target_ulong tmp;
Index: b/exec.c
===================================================================
--- a/exec.c
+++ b/exec.c
@@ -810,7 +810,7 @@ static void build_page_bitmap(PageDesc *
 
 TranslationBlock *tb_gen_code(CPUState *env,
                               target_ulong pc, target_ulong cs_base,
-                              int flags, int cflags)
+                              target_ulong cs_limit, int flags, int cflags)
 {
     TranslationBlock *tb;
     uint8_t *tc_ptr;
@@ -830,6 +830,7 @@ TranslationBlock *tb_gen_code(CPUState *
     tc_ptr = code_gen_ptr;
     tb->tc_ptr = tc_ptr;
     tb->cs_base = cs_base;
+    tb->cs_limit = cs_limit;
     tb->flags = flags;
     tb->cflags = cflags;
     cpu_gen_code(env, tb, &code_gen_size);
@@ -3133,7 +3134,7 @@ void cpu_io_recompile(CPUState *env, voi
 {
     TranslationBlock *tb;
     uint32_t n, cflags;
-    target_ulong pc, cs_base;
+    target_ulong pc, cs_base, cs_limit;
     uint64_t flags;
 
     tb = tb_find_pc((unsigned long)retaddr);
@@ -3173,11 +3174,12 @@ void cpu_io_recompile(CPUState *env, voi
     cflags = n | CF_LAST_IO;
     pc = tb->pc;
     cs_base = tb->cs_base;
+    cs_limit = tb->cs_limit;
     flags = tb->flags;
     tb_phys_invalidate(tb, -1);
     /* FIXME: In theory this could raise an exception.  In practice
        we have already translated the block once so it's probably ok.  */
-    tb_gen_code(env, pc, cs_base, flags, cflags);
+    tb_gen_code(env, pc, cs_base, cs_limit, flags, cflags);
     /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
        the first in the TB) then we end up generating a whole new TB and
        repeating the fault, which is horribly inefficient.
Index: b/cpu-exec.c
===================================================================
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -94,8 +94,8 @@ static void cpu_exec_nocache(int max_cyc
     if (max_cycles > CF_COUNT_MASK)
         max_cycles = CF_COUNT_MASK;
 
-    tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
-                     max_cycles);
+    tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->cs_limit,
+                     orig_tb->flags, max_cycles);
     env->current_tb = tb;
     /* execute the generated code */
     next_tb = tcg_qemu_tb_exec(tb->tc_ptr);
@@ -109,9 +109,8 @@ static void cpu_exec_nocache(int max_cyc
     tb_free(tb);
 }
 
-static TranslationBlock *tb_find_slow(target_ulong pc,
-                                      target_ulong cs_base,
-                                      uint64_t flags)
+static TranslationBlock *tb_find_slow(target_ulong pc, target_ulong cs_base,
+                                      target_ulong cs_limit, uint64_t flags)
 {
     TranslationBlock *tb, **ptb1;
     unsigned int h;
@@ -133,7 +132,8 @@ static TranslationBlock *tb_find_slow(ta
             goto not_found;
         if (tb->pc == pc &&
             tb->page_addr[0] == phys_page1 &&
-            tb->cs_base == cs_base &&
+            tb_cs_base_equal(tb, cs_base) &&
+            tb_cs_limit_equal(tb, cs_limit) &&
             tb->flags == flags) {
             /* check next page if needed */
             if (tb->page_addr[1] != -1) {
@@ -150,7 +150,7 @@ static TranslationBlock *tb_find_slow(ta
     }
  not_found:
    /* if no translated code available, then translate it now */
-    tb = tb_gen_code(env, pc, cs_base, flags, env->next_cflags);
+    tb = tb_gen_code(env, pc, cs_base, cs_limit, flags, env->next_cflags);
     env->next_cflags = 0;
 
  found:
@@ -163,6 +163,7 @@ static inline TranslationBlock *tb_find_
 {
     TranslationBlock *tb;
     target_ulong cs_base, pc;
+    target_ulong cs_limit = (target_ulong)-1;
     uint64_t flags;
 
     /* we record a subset of the CPU state. It will
@@ -172,6 +173,8 @@ static inline TranslationBlock *tb_find_
     flags = env->hflags;
     flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
     cs_base = env->segs[R_CS].base;
+    if ((env->hflags & (HF_PE_MASK | HF_CS64_MASK)) == HF_PE_MASK)
+        cs_limit = env->segs[R_CS].limit;
     pc = cs_base + env->eip;
 #elif defined(TARGET_ARM)
     flags = env->thumb | (env->vfp.vec_len << 1)
@@ -225,9 +228,11 @@ static inline TranslationBlock *tb_find_
 #error unsupported CPU
 #endif
     tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
-    if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
+    if (unlikely(!tb || tb->pc != pc ||
+                 !tb_cs_base_equal(tb, cs_base) ||
+                 !tb_cs_limit_equal(tb, cs_limit) ||
                  tb->flags != flags)) {
-        tb = tb_find_slow(pc, cs_base, flags);
+        tb = tb_find_slow(pc, cs_base, cs_limit, flags);
     }
     return tb;
 }
Index: b/target-i386/translate.c
===================================================================
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -81,6 +81,7 @@ typedef struct DisasContext {
                    static state change (stop translation) */
     /* current block context */
     target_ulong cs_base; /* base of CS segment */
+    target_ulong cs_limit; /* limit of the CS segment */
     int pe;     /* protected mode */
     int code32; /* 32 bit code segment */
 #ifdef TARGET_X86_64
@@ -3687,6 +3688,8 @@ static target_ulong disas_insn(DisasCont
     int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val, seg_reg;
     target_ulong next_eip, tval;
     int rex_w, rex_r;
+    uint16_t *gen_opc_start = gen_opc_ptr;
+    TCGArg *gen_opparam_start = gen_opparam_ptr;
 
     if (unlikely(loglevel & CPU_LOG_TB_OP))
         tcg_gen_debug_insn_start(pc_start);
@@ -7270,6 +7273,13 @@ static target_ulong disas_insn(DisasCont
     /* lock generation */
     if (s->prefix & PREFIX_LOCK)
         tcg_gen_helper_0_0(helper_unlock);
+    if (s->pc < s->cs_base || s->pc - s->cs_base > s->cs_limit) {
+        /* At least some of the opcode fetches violate the CS limit.
+           Overwrite the generated code with a GPF raising one. */
+        gen_opc_ptr = gen_opc_start;
+        gen_opparam_ptr = gen_opparam_start;
+        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+    }
     return s->pc;
  illegal_op:
     if (s->prefix & PREFIX_LOCK)
@@ -7316,13 +7326,11 @@ static inline int gen_intermediate_code_
     int j, lj, cflags;
     uint64_t flags;
     target_ulong pc_start;
-    target_ulong cs_base;
     int num_insns;
     int max_insns;
 
     /* generate intermediate code */
     pc_start = tb->pc;
-    cs_base = tb->cs_base;
     flags = tb->flags;
     cflags = tb->cflags;
 
@@ -7337,7 +7345,8 @@ static inline int gen_intermediate_code_
     dc->tf = (flags >> TF_SHIFT) & 1;
     dc->singlestep_enabled = env->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
-    dc->cs_base = cs_base;
+    dc->cs_base = tb->cs_base;
+    dc->cs_limit = tb->cs_limit;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
     /* select memory access functions */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 11:57 [Qemu-devel] [RFC][PATCH] x86: CS limit checks Jan Kiszka
@ 2008-07-17 12:17 ` Paul Brook
  2008-07-17 13:14   ` Jan Kiszka
  2008-07-17 14:06 ` [Qemu-devel] " Fabrice Bellard
  1 sibling, 1 reply; 12+ messages in thread
From: Paul Brook @ 2008-07-17 12:17 UTC (permalink / raw)
  To: qemu-devel; +Cc: Jan Kiszka

On Thursday 17 July 2008, Jan Kiszka wrote:
> +    if (s->pc < s->cs_base || s->pc - s->cs_base > s->cs_limit) {
> +        /* At least some of the opcode fetches violate the CS limit.
> +           Overwrite the generated code with a GPF raising one. */
> +        gen_opc_ptr = gen_opc_start;
> +        gen_opparam_ptr = gen_opparam_start;
> +        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
> +    }

I'm fairly sure this is wrong. The TB may fault before it gets to the end of 
the segment. Likewise if the instruction spanning the limit happens to be an 
illegal op you will generate the wrong kind of exception.

Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 12:17 ` Paul Brook
@ 2008-07-17 13:14   ` Jan Kiszka
  2008-07-17 13:37     ` Paul Brook
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Kiszka @ 2008-07-17 13:14 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

Paul Brook wrote:
> On Thursday 17 July 2008, Jan Kiszka wrote:
>> +    if (s->pc < s->cs_base || s->pc - s->cs_base > s->cs_limit) {
>> +        /* At least some of the opcode fetches violate the CS limit.
>> +           Overwrite the generated code with a GPF raising one. */
>> +        gen_opc_ptr = gen_opc_start;
>> +        gen_opparam_ptr = gen_opparam_start;
>> +        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>> +    }
> 
> I'm fairly sure this is wrong. The TB may fault before it gets to the end of 
> the segment. Likewise if the instruction spanning the limit happens to be an 
> illegal op you will generate the wrong kind of exception.

What a pity, it looked so easy. OK, will think about those aspects
again. BTW, what happens when the translator hits an unresolvable
address and faults?

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 13:14   ` Jan Kiszka
@ 2008-07-17 13:37     ` Paul Brook
  2008-07-17 16:10       ` Jan Kiszka
  0 siblings, 1 reply; 12+ messages in thread
From: Paul Brook @ 2008-07-17 13:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: Jan Kiszka

On Thursday 17 July 2008, Jan Kiszka wrote:
> Paul Brook wrote:
> > On Thursday 17 July 2008, Jan Kiszka wrote:
> >> +    if (s->pc < s->cs_base || s->pc - s->cs_base > s->cs_limit) {
> >> +        /* At least some of the opcode fetches violate the CS limit.
> >> +           Overwrite the generated code with a GPF raising one. */
> >> +        gen_opc_ptr = gen_opc_start;
> >> +        gen_opparam_ptr = gen_opparam_start;
> >> +        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
> >> +    }
> >
> > I'm fairly sure this is wrong. The TB may fault before it gets to the end
> > of the segment. Likewise if the instruction spanning the limit happens to
> > be an illegal op you will generate the wrong kind of exception.
>
> What a pity, it looked so easy. OK, will think about those aspects
> again. BTW, what happens when the translator hits an unresolvable
> address and faults?

Looks like that's also broken.  In practice I guess a page fault occuring 
early is usually less harmful than a GPF.

Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 13:37     ` Paul Brook
@ 2008-07-17 16:10       ` Jan Kiszka
  2008-07-17 17:45         ` Paul Brook
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Kiszka @ 2008-07-17 16:10 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

Paul Brook wrote:
> On Thursday 17 July 2008, Jan Kiszka wrote:
>> Paul Brook wrote:
>>> On Thursday 17 July 2008, Jan Kiszka wrote:
>>>> +    if (s->pc < s->cs_base || s->pc - s->cs_base > s->cs_limit) {
>>>> +        /* At least some of the opcode fetches violate the CS limit.
>>>> +           Overwrite the generated code with a GPF raising one. */
>>>> +        gen_opc_ptr = gen_opc_start;
>>>> +        gen_opparam_ptr = gen_opparam_start;
>>>> +        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>>>> +    }
>>> I'm fairly sure this is wrong. The TB may fault before it gets to the end
>>> of the segment. Likewise if the instruction spanning the limit happens to
>>> be an illegal op you will generate the wrong kind of exception.
>> What a pity, it looked so easy. OK, will think about those aspects
>> again. BTW, what happens when the translator hits an unresolvable
>> address and faults?
> 
> Looks like that's also broken.  In practice I guess a page fault occuring 
> early is usually less harmful than a GPF.

To me it looks like as if the generator can so far raise a PF
prematurely when it steps on an invalid code address while building a
new TB. This probably has to fix the same way as the limit check is
realized: by injecting an exception (PF or GP) into the generated code
at the correct PC. Hmm, the PF-during-translation issue is probably not
just limited to x86...

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 16:10       ` Jan Kiszka
@ 2008-07-17 17:45         ` Paul Brook
  2008-07-17 19:24           ` Fabrice Bellard
  0 siblings, 1 reply; 12+ messages in thread
From: Paul Brook @ 2008-07-17 17:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: Jan Kiszka

> To me it looks like as if the generator can so far raise a PF
> prematurely when it steps on an invalid code address while building a
> new TB. This probably has to fix the same way as the limit check is
> realized: by injecting an exception (PF or GP) into the generated code
> at the correct PC. Hmm, the PF-during-translation issue is probably not
> just limited to x86...

Alpha, PPC, SPARC, SH and ARM avoid the problem by having fixed length word 
aligned instructions. Thumb-1 has special handling for the cross-boundary 
case (Instructions aren't really variable length, we just treat them that way 
as an optimisation).

Thumb-2, m68k, cris and x86 all look like they may incorrectly fetch code from 
the next page.

Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 17:45         ` Paul Brook
@ 2008-07-17 19:24           ` Fabrice Bellard
  2008-07-17 21:30             ` [Qemu-devel] " Jan Kiszka
  0 siblings, 1 reply; 12+ messages in thread
From: Fabrice Bellard @ 2008-07-17 19:24 UTC (permalink / raw)
  To: qemu-devel

Paul Brook wrote:
>> To me it looks like as if the generator can so far raise a PF
>> prematurely when it steps on an invalid code address while building a
>> new TB. This probably has to fix the same way as the limit check is
>> realized: by injecting an exception (PF or GP) into the generated code
>> at the correct PC. Hmm, the PF-during-translation issue is probably not
>> just limited to x86...
> 
> Alpha, PPC, SPARC, SH and ARM avoid the problem by having fixed length word 
> aligned instructions. Thumb-1 has special handling for the cross-boundary 
> case (Instructions aren't really variable length, we just treat them that way 
> as an optimisation).
> 
> Thumb-2, m68k, cris and x86 all look like they may incorrectly fetch code from 
> the next page.

For x86 it is an expected behavior, not a bug. However, I agree that it 
would be safer to explicitely generate the exception. My plan has always 
been to suppress the ldx_code functions and to explicitly handle the PF 
and the cs_limit cases. Jocelyn Mayer submitted some time ago a patch to 
go in that direction.

Fabrice.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] Re: [RFC][PATCH] x86: CS limit checks
  2008-07-17 19:24           ` Fabrice Bellard
@ 2008-07-17 21:30             ` Jan Kiszka
  0 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2008-07-17 21:30 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1441 bytes --]

Fabrice Bellard wrote:
> Paul Brook wrote:
>>> To me it looks like as if the generator can so far raise a PF
>>> prematurely when it steps on an invalid code address while building a
>>> new TB. This probably has to fix the same way as the limit check is
>>> realized: by injecting an exception (PF or GP) into the generated code
>>> at the correct PC. Hmm, the PF-during-translation issue is probably not
>>> just limited to x86...
>>
>> Alpha, PPC, SPARC, SH and ARM avoid the problem by having fixed length
>> word aligned instructions. Thumb-1 has special handling for the
>> cross-boundary case (Instructions aren't really variable length, we
>> just treat them that way as an optimisation).
>>
>> Thumb-2, m68k, cris and x86 all look like they may incorrectly fetch
>> code from the next page.
> 
> For x86 it is an expected behavior, not a bug. However, I agree that it
> would be safer to explicitely generate the exception. My plan has always
> been to suppress the ldx_code functions and to explicitly handle the PF
> and the cs_limit cases.

This path offers another nice possibility: the introduction of a
separate code TLB. I bet this will improve the hit rate on both the
existing (and then data-only) TLB as well as the new one only for code.

> Jocelyn Mayer submitted some time ago a patch to
> go in that direction.

Any links or keywords at hand? So far I failed to find this patch.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [RFC][PATCH] x86: CS limit checks
  2008-07-17 11:57 [Qemu-devel] [RFC][PATCH] x86: CS limit checks Jan Kiszka
  2008-07-17 12:17 ` Paul Brook
@ 2008-07-17 14:06 ` Fabrice Bellard
  2008-07-17 16:30   ` [Qemu-devel] " Jan Kiszka
  1 sibling, 1 reply; 12+ messages in thread
From: Fabrice Bellard @ 2008-07-17 14:06 UTC (permalink / raw)
  To: qemu-devel

Jan Kiszka wrote:
> Here is a proposal for adding code segment limit checks to x86. This
> patch should not need the -seg-checks switch as its tests are mostly
> performed during translation time. Moreover, I tried to confine the
> small additional overhead in the TB lookup procedure to x86 and Sparc.
> 
> Note that this patch depends on my debugging series, namely [1], as that
> one reduces the x86-specific code passages for TB generation. Also note
> that this patch is early and only lightly tested so far, not yet
> intended for inclusion, but definitely for commenting on!

Using more than 32 bits for cs_limit (and cs_base) in the TB is 
wasteful, so I strongly suggest to use a uint32_t type. In that case, 
cs_limit must be explicitely ignored in 64 bit code.

@@ -172,6 +173,8 @@ static inline TranslationBlock *tb_find_
      flags = env->hflags;
      flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
      cs_base = env->segs[R_CS].base;
+    if ((env->hflags & (HF_PE_MASK | HF_CS64_MASK)) == HF_PE_MASK)
+        cs_limit = env->segs[R_CS].limit;
      pc = cs_base + env->eip;

This test should be suppressed for performance reasons.

Generally speaking as I said in a private mail, I don't want an option 
-seg-checks: the segment limit and right checks must always be enabled, 
even if there is a small performance hit. The way to implement it is to 
store in the TB.flags for each segment whether the limit must be tested 
and whether the segment is RW.

Fabrice.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] Re: [RFC][PATCH] x86: CS limit checks
  2008-07-17 14:06 ` [Qemu-devel] " Fabrice Bellard
@ 2008-07-17 16:30   ` Jan Kiszka
  2008-07-17 19:29     ` Fabrice Bellard
  2008-07-17 21:25     ` Jan Kiszka
  0 siblings, 2 replies; 12+ messages in thread
From: Jan Kiszka @ 2008-07-17 16:30 UTC (permalink / raw)
  To: qemu-devel

Fabrice Bellard wrote:
> Jan Kiszka wrote:
>> Here is a proposal for adding code segment limit checks to x86. This
>> patch should not need the -seg-checks switch as its tests are mostly
>> performed during translation time. Moreover, I tried to confine the
>> small additional overhead in the TB lookup procedure to x86 and Sparc.
>>
>> Note that this patch depends on my debugging series, namely [1], as that
>> one reduces the x86-specific code passages for TB generation. Also note
>> that this patch is early and only lightly tested so far, not yet
>> intended for inclusion, but definitely for commenting on!
> 
> Using more than 32 bits for cs_limit (and cs_base) in the TB is
> wasteful, so I strongly suggest to use a uint32_t type. In that case,
> cs_limit must be explicitely ignored in 64 bit code.
> 
> @@ -172,6 +173,8 @@ static inline TranslationBlock *tb_find_
>      flags = env->hflags;
>      flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
>      cs_base = env->segs[R_CS].base;
> +    if ((env->hflags & (HF_PE_MASK | HF_CS64_MASK)) == HF_PE_MASK)
> +        cs_limit = env->segs[R_CS].limit;
>      pc = cs_base + env->eip;
> 
> This test should be suppressed for performance reasons.

Yes, the test should be moved to the translator code. This will also
allow to reduce the width of cs_base/limit.

> 
> Generally speaking as I said in a private mail, I don't want an option
> -seg-checks: the segment limit and right checks must always be enabled,
> even if there is a small performance hit. The way to implement it is to
> store in the TB.flags for each segment whether the limit must be tested
> and whether the segment is RW.

Switching a segment selector is not yet a TB termination reason IIRC.
I'm not sure about the implication of such a change, e.g. if there are
relevant use case that have relaxed segment limits and attributes, but
perform a lot segment register reloads.

And then there is the open question how much performance can be gained
with compile-time optimization for those guests who do use segmentation.
The worst case is very roughly about 50% slowdown right now (/w vs. w/o
-seg-checks). As answered privately, some -no-seg-checks switch could
remain a useful optimization.

Jan

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] Re: [RFC][PATCH] x86: CS limit checks
  2008-07-17 16:30   ` [Qemu-devel] " Jan Kiszka
@ 2008-07-17 19:29     ` Fabrice Bellard
  2008-07-17 21:25     ` Jan Kiszka
  1 sibling, 0 replies; 12+ messages in thread
From: Fabrice Bellard @ 2008-07-17 19:29 UTC (permalink / raw)
  To: qemu-devel

[...]
> And then there is the open question how much performance can be gained
> with compile-time optimization for those guests who do use segmentation.

The goal is to optimize the cases where segmentation is not used ! It is 
just an extension of the existing "HF_ADDSEG" optimization.

> The worst case is very roughly about 50% slowdown right now (/w vs. w/o
> -seg-checks). As answered privately, some -no-seg-checks switch could
> remain a useful optimization.

Not sure. I believe most recent OSes (i.e. those where speed really 
matters) have segment registers loaded so that no runtime checks are 
necessary.

Fabrice.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] Re: [RFC][PATCH] x86: CS limit checks
  2008-07-17 16:30   ` [Qemu-devel] " Jan Kiszka
  2008-07-17 19:29     ` Fabrice Bellard
@ 2008-07-17 21:25     ` Jan Kiszka
  1 sibling, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2008-07-17 21:25 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1486 bytes --]

Jan Kiszka wrote:
> Fabrice Bellard wrote:
>> Jan Kiszka wrote:
>>> Here is a proposal for adding code segment limit checks to x86. This
>>> patch should not need the -seg-checks switch as its tests are mostly
>>> performed during translation time. Moreover, I tried to confine the
>>> small additional overhead in the TB lookup procedure to x86 and Sparc.
>>>
>>> Note that this patch depends on my debugging series, namely [1], as that
>>> one reduces the x86-specific code passages for TB generation. Also note
>>> that this patch is early and only lightly tested so far, not yet
>>> intended for inclusion, but definitely for commenting on!
>> Using more than 32 bits for cs_limit (and cs_base) in the TB is
>> wasteful, so I strongly suggest to use a uint32_t type. In that case,
>> cs_limit must be explicitely ignored in 64 bit code.
>>
>> @@ -172,6 +173,8 @@ static inline TranslationBlock *tb_find_
>>      flags = env->hflags;
>>      flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
>>      cs_base = env->segs[R_CS].base;
>> +    if ((env->hflags & (HF_PE_MASK | HF_CS64_MASK)) == HF_PE_MASK)
>> +        cs_limit = env->segs[R_CS].limit;
>>      pc = cs_base + env->eip;
>>
>> This test should be suppressed for performance reasons.
> 
> Yes, the test should be moved to the translator code. This will also
> allow to reduce the width of cs_base/limit.

cs_base must remain target_ulong - sparc relies on it for storing npc.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2008-07-17 21:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-17 11:57 [Qemu-devel] [RFC][PATCH] x86: CS limit checks Jan Kiszka
2008-07-17 12:17 ` Paul Brook
2008-07-17 13:14   ` Jan Kiszka
2008-07-17 13:37     ` Paul Brook
2008-07-17 16:10       ` Jan Kiszka
2008-07-17 17:45         ` Paul Brook
2008-07-17 19:24           ` Fabrice Bellard
2008-07-17 21:30             ` [Qemu-devel] " Jan Kiszka
2008-07-17 14:06 ` [Qemu-devel] " Fabrice Bellard
2008-07-17 16:30   ` [Qemu-devel] " Jan Kiszka
2008-07-17 19:29     ` Fabrice Bellard
2008-07-17 21:25     ` Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).