All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Jacobowitz <drow@false.org>
To: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Huge TLB performance improvement
Date: Sun, 12 Nov 2006 11:56:35 -0500	[thread overview]
Message-ID: <20061112165635.GA13998@nevyn.them.org> (raw)
In-Reply-To: <20061112142938.GC4040@networkno.de>

On Sun, Nov 12, 2006 at 02:29:38PM +0000, Thiemo Seufer wrote:
> JFTR, increasing the TLB size from 16 to 64 entries made no performance
> difference whatsoever.

I suspect that's because we do about as much eviction.  Here's a
different approach.  Whenever an entry is evicted by tlbwr, the guest
can't predict which existing entry will be removed.  So, let's evict
none of them.  This takes the "evicted" entry and swaps it out to
a second set of TLB entries, avoiding the qemu internal TLB flush.

I'm trying for a complete as-if implementation, so tlbp only searches
the "real" entries (I don't know if it should cause a flush of the
shadowed entries, but things seem to work OK without it).  tlbwi and
tlbr both discard the shadowed entries.

This appears to cut single page flushes by 90%.

My best time for boot/runlevel-2/halt yesterday was 73 seconds.  This
runs at about 51 seconds.  apt-get update finishes in a reasonable
amount of time.  This is with all of the patches I've posted to the
list applied, including the improved tb_jmp_cache handling - we still
do a non-trivial number of single page cache flushes so I think it's
a good idea.

> The excessive flushing for mips happens because Qemu doesn't properly
> model the hardware's ASID handling.

We still do flushes at ASID switches, by the way, so it might be
possible to get further gains here.  But we're down to under ~ 15%
of CPU time for soft-mmu routines and tb management routines, which
is very good.  Then there's about 65% executing guest code and the rest
in translation, virtual hardware, and other overhead.

-- 
Daniel Jacobowitz
CodeSourcery

---
 target-mips/cpu.h       |    3 ++-
 target-mips/exec.h      |    1 +
 target-mips/helper.c    |    2 +-
 target-mips/mips-defs.h |    1 +
 target-mips/op_helper.c |   43 +++++++++++++++++++++++++++++++++++++------
 target-mips/translate.c |    1 +
 6 files changed, 43 insertions(+), 8 deletions(-)

Index: qemu/target-mips/cpu.h
===================================================================
--- qemu.orig/target-mips/cpu.h	2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/cpu.h	2006-11-12 11:34:24.000000000 -0500
@@ -94,7 +94,8 @@ struct CPUMIPSState {
 		
 #endif
 #if defined(MIPS_USES_R4K_TLB)
-    tlb_t tlb[MIPS_TLB_NB];
+    tlb_t tlb[MIPS_TLB_MAX];
+    uint32_t tlb_in_use;
 #endif
     uint32_t CP0_index;
     uint32_t CP0_random;
Index: qemu/target-mips/exec.h
===================================================================
--- qemu.orig/target-mips/exec.h	2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/exec.h	2006-11-12 11:34:24.000000000 -0500
@@ -115,5 +115,6 @@ uint32_t cpu_mips_get_count (CPUState *e
 void cpu_mips_store_count (CPUState *env, uint32_t value);
 void cpu_mips_store_compare (CPUState *env, uint32_t value);
 void cpu_mips_clock_init (CPUState *env);
+void cpu_mips_tlb_flush (CPUState *env, int flush_global);
 
 #endif /* !defined(__QEMU_MIPS_EXEC_H__) */
Index: qemu/target-mips/helper.c
===================================================================
--- qemu.orig/target-mips/helper.c	2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/helper.c	2006-11-12 11:34:24.000000000 -0500
@@ -46,7 +46,7 @@ static int map_address (CPUState *env, t
     tlb_t *tlb;
     int i, n;
 
-    for (i = 0; i < MIPS_TLB_NB; i++) {
+    for (i = 0; i < env->tlb_in_use; i++) {
         tlb = &env->tlb[i];
         /* Check ASID, virtual page number & size */
         if ((tlb->G == 1 || tlb->ASID == ASID) &&
Index: qemu/target-mips/mips-defs.h
===================================================================
--- qemu.orig/target-mips/mips-defs.h	2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/mips-defs.h	2006-11-12 11:34:24.000000000 -0500
@@ -22,6 +22,7 @@
 /* Uses MIPS R4Kc TLB model */
 #define MIPS_USES_R4K_TLB
 #define MIPS_TLB_NB 16
+#define MIPS_TLB_MAX 128
 /* basic FPU register support */
 #define MIPS_USES_FPU 1
 /* Define a implementation number of 1.
Index: qemu/target-mips/op_helper.c
===================================================================
--- qemu.orig/target-mips/op_helper.c	2006-11-12 11:34:02.000000000 -0500
+++ qemu/target-mips/op_helper.c	2006-11-12 11:42:44.000000000 -0500
@@ -367,7 +367,7 @@ void do_mtc0 (int reg, int sel)
         env->CP0_EntryHi = val;
 	/* If the ASID changes, flush qemu's TLB.  */
 	if ((old & 0xFF) != (val & 0xFF))
-	  tlb_flush (env, 1);
+	  cpu_mips_tlb_flush (env, 1);
         rn = "EntryHi";
         break;
     case 11:
@@ -569,7 +569,14 @@ void fpu_handle_exception(void)
 
 /* TLB management */
 #if defined(MIPS_USES_R4K_TLB)
-static void invalidate_tlb (int idx)
+void cpu_mips_tlb_flush (CPUState *env, int flush_global)
+{
+    /* Flush qemu's TLB and discard all shadowed entries.  */
+    tlb_flush (env, flush_global);
+    env->tlb_in_use = MIPS_TLB_NB;
+}
+
+static void invalidate_tlb (int idx, int use_extra)
 {
     tlb_t *tlb;
     target_ulong addr;
@@ -582,6 +589,15 @@ static void invalidate_tlb (int idx)
         return;
     }
 
+    if (use_extra && env->tlb_in_use < MIPS_TLB_MAX) {
+        /* For tlbwr, we can shadow the discarded entry into
+	   a new (fake) TLB entry, as long as the guest can not
+	   tell that it's there.  */
+        memcpy (&env->tlb[env->tlb_in_use], tlb, sizeof (*tlb));
+        env->tlb_in_use++;
+        return;
+    }
+
     if (tlb->V0) {
         tb_invalidate_page_range(tlb->PFN[0], tlb->end - tlb->VPN);
         addr = tlb->VPN;
@@ -600,6 +616,14 @@ static void invalidate_tlb (int idx)
     }
 }
 
+static void mips_tlb_flush_extra (CPUState *env)
+{
+    tlb_random = 2;
+    while (env->tlb_in_use > MIPS_TLB_NB) {
+        invalidate_tlb(--env->tlb_in_use, 0);
+    }
+}
+
 static void fill_tlb (int idx)
 {
     tlb_t *tlb;
@@ -626,9 +650,14 @@ static void fill_tlb (int idx)
 
 void do_tlbwi (void)
 {
+    /* Discard cached TLB entries.  We could avoid doing this if the
+       tlbwi is just upgrading access permissions on the current entry;
+       that might be a further win.  */
+    mips_tlb_flush_extra (env);
+
     /* Wildly undefined effects for CP0_index containing a too high value and
        MIPS_TLB_NB not being a power of two.  But so does real silicon.  */
-    invalidate_tlb(env->CP0_index & (MIPS_TLB_NB - 1));
+    invalidate_tlb(env->CP0_index & (MIPS_TLB_NB - 1), 0);
     fill_tlb(env->CP0_index & (MIPS_TLB_NB - 1));
 }
 
@@ -636,7 +665,7 @@ void do_tlbwr (void)
 {
     int r = cpu_mips_get_random(env);
 
-    invalidate_tlb(r);
+    invalidate_tlb(r, 1);
     fill_tlb(r);
 }
 
@@ -673,8 +702,10 @@ void do_tlbr (void)
     tlb = &env->tlb[env->CP0_index & (MIPS_TLB_NB - 1)];
 
     /* If this will change the current ASID, flush qemu's TLB.  */
-    if (ASID != tlb->ASID && tlb->G != 1)
-      tlb_flush (env, 1);
+    if (ASID != tlb->ASID)
+        cpu_mips_tlb_flush (env, 1);
+
+    mips_tlb_flush_extra(env);
 
     env->CP0_EntryHi = tlb->VPN | tlb->ASID;
     size = (tlb->end - tlb->VPN) >> 12;
Index: qemu/target-mips/translate.c
===================================================================
--- qemu.orig/target-mips/translate.c	2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/translate.c	2006-11-12 11:34:24.000000000 -0500
@@ -2450,6 +2450,7 @@ void cpu_reset (CPUMIPSState *env)
     env->PC = 0xBFC00000;
 #if defined (MIPS_USES_R4K_TLB)
     env->CP0_random = MIPS_TLB_NB - 1;
+    env->tlb_in_use = MIPS_TLB_NB;
 #endif
     env->CP0_Wired = 0;
     env->CP0_Config0 = MIPS_CONFIG0;

  parent reply	other threads:[~2006-11-12 16:56 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-06 14:59 [Qemu-devel] [PATCH] Huge TLB performance improvement Thiemo Seufer
2006-11-05 15:38 ` Daniel Jacobowitz
2006-11-12  1:10   ` Daniel Jacobowitz
2006-11-12 11:49     ` Laurent Desnogues
2006-11-12 13:52       ` Thiemo Seufer
2006-11-12 14:08       ` Paul Brook
2006-11-12 14:29         ` Thiemo Seufer
2006-11-12 14:44           ` Paul Brook
2006-11-12 15:07             ` Daniel Jacobowitz
2006-11-12 15:24               ` Daniel Jacobowitz
2006-11-12 15:26             ` Thiemo Seufer
2006-11-12 16:56           ` Daniel Jacobowitz [this message]
2006-11-12 17:49             ` Daniel Jacobowitz
2006-11-12 18:02             ` Dirk Behme
2006-11-12 22:13               ` Daniel Jacobowitz
2006-11-12 20:42     ` Paul Brook

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061112165635.GA13998@nevyn.them.org \
    --to=drow@false.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.