From: Pranith Kumar <bobby.prani@gmail.com>
To: alex.bennee@linaro.org, Paolo Bonzini <pbonzini@redhat.com>,
Peter Crosthwaite <crosthwaite.peter@gmail.com>,
Richard Henderson <rth@twiddle.net>,
Claudio Fontana <claudio.fontana@huawei.com>,
Andrzej Zaborowski <balrogg@gmail.com>,
Aurelien Jarno <aurelien@aurel32.net>,
Alexander Graf <agraf@suse.de>, Stefan Weil <sw@weilnetz.de>,
qemu-devel@nongnu.org (open list:Overall),
qemu-arm@nongnu.org (open list:AArch64 target)
Cc: qemu-devel@nongnu.org
Subject: [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches
Date: Tue, 29 Aug 2017 02:33:13 -0400 [thread overview]
Message-ID: <20170829063313.10237-5-bobby.prani@gmail.com> (raw)
In-Reply-To: <20170829063313.10237-1-bobby.prani@gmail.com>
This patch increases the number of entries cached in the TLB. I went
over a few architectures to see if increasing it is problematic. Only
armv6 seems to have a limitation that only 8 bits can be used for
indexing these entries. For other architectures, the number of TLB
entries is increased to a 4K-sized cache. The patch also doubles the
number of victim TLB entries.
Some statistics collected from a build benchmark for various cache
sizes is listed below:
| TLB bits\vTLB entires | 8 | 16 | 32 |
| 8 | 952.94(+0.0%) | 929.99(+2.4%) | 919.02(+3.6%) |
| 10 | 898.92(+5.6%) | 886.13(+7.0%) | 887.03(+6.9%) |
| 12 | 878.56(+7.8%) | 873.53(+8.3%)* | 875.34(+8.1%) |
The best combination for this workload came out to be 12 bits for the
TLB and a 16 entry vTLB cache.
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
include/exec/cpu-defs.h | 13 ++++---------
tcg/aarch64/tcg-target.h | 1 +
tcg/arm/tcg-target.h | 1 +
tcg/i386/tcg-target.h | 6 ++++++
tcg/ia64/tcg-target.h | 1 +
tcg/mips/tcg-target.h | 6 ++++++
tcg/ppc/tcg-target.h | 1 +
tcg/s390/tcg-target.h | 1 +
tcg/sparc/tcg-target.h | 1 +
tcg/tci/tcg-target.h | 2 ++
10 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index bc8e7f848d..33b0ac6fe0 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -57,8 +57,8 @@ typedef uint64_t target_ulong;
#endif
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-/* use a fully associative victim tlb of 8 entries */
-#define CPU_VTLB_SIZE 8
+/* use a fully associative victim tlb of 16 entries */
+#define CPU_VTLB_SIZE 16
#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
#define CPU_TLB_ENTRY_BITS 4
@@ -87,14 +87,9 @@ typedef uint64_t target_ulong;
* could be something like 0xC000 (the offset of the last TLB table) plus
* 0x18 (the offset of the addend field in each TLB entry) plus the offset
* of tlb_table inside env (which is non-trivial but not huge).
+ * TODO: rewrite this comment
*/
-#define CPU_TLB_BITS \
- MIN(8, \
- TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
- (NB_MMU_MODES <= 1 ? 0 : \
- NB_MMU_MODES <= 2 ? 1 : \
- NB_MMU_MODES <= 4 ? 2 : \
- NB_MMU_MODES <= 8 ? 3 : 4))
+#define CPU_TLB_BITS MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index b41a248bee..9f4558cd83 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -15,6 +15,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
#undef TCG_TARGET_STACK_GROWSUP
typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index a38be15a39..ebe27991f3 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -60,6 +60,7 @@ extern int arm_arch;
#undef TCG_TARGET_STACK_GROWSUP
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 8
typedef enum {
TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 73a15f7e80..456d57115c 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -162,6 +162,12 @@ extern bool have_popcnt;
# define TCG_AREG0 TCG_REG_EBP
#endif
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 28
+#else
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 27
+#endif
+
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
}
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 8f475fe742..35878e20c7 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -28,6 +28,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 16
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
typedef struct {
uint64_t lo __attribute__((aligned(16)));
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index e9558d15bc..0c7c5cf64c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -39,6 +39,12 @@
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define TCG_TARGET_NB_REGS 32
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 12
+#else
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 11
+#endif
+
typedef enum {
TCG_REG_ZERO = 0,
TCG_REG_AT,
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5a092b038a..82e10c9471 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -34,6 +34,7 @@
#define TCG_TARGET_NB_REGS 32
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
typedef enum {
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index dc0e59193c..57f0e22532 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -27,6 +27,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 2
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
typedef enum TCGReg {
TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 4515c9ab48..378d218923 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -29,6 +29,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 12
#define TCG_TARGET_NB_REGS 32
typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 06963288dc..456a4fc4e1 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -40,9 +40,11 @@
#ifndef TCG_TARGET_H
#define TCG_TARGET_H
+
#define TCG_TARGET_INTERPRETER 1
#define TCG_TARGET_INSN_UNIT_SIZE 1
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
#if UINTPTR_MAX == UINT32_MAX
# define TCG_TARGET_REG_BITS 32
--
2.13.0
WARNING: multiple messages have this Message-ID (diff)
From: Pranith Kumar <bobby.prani@gmail.com>
To: alex.bennee@linaro.org, Paolo Bonzini <pbonzini@redhat.com>,
Peter Crosthwaite <crosthwaite.peter@gmail.com>,
Richard Henderson <rth@twiddle.net>,
Claudio Fontana <claudio.fontana@huawei.com>,
Andrzej Zaborowski <balrogg@gmail.com>,
Aurelien Jarno <aurelien@aurel32.net>,
Alexander Graf <agraf@suse.de>, Stefan Weil <sw@weilnetz.de>,
"open list:Overall" <qemu-devel@nongnu.org>,
"open list:AArch64 target" <qemu-arm@nongnu.org>
Subject: [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches
Date: Tue, 29 Aug 2017 02:33:13 -0400 [thread overview]
Message-ID: <20170829063313.10237-5-bobby.prani@gmail.com> (raw)
In-Reply-To: <20170829063313.10237-1-bobby.prani@gmail.com>
This patch increases the number of entries cached in the TLB. I went
over a few architectures to see if increasing it is problematic. Only
armv6 seems to have a limitation that only 8 bits can be used for
indexing these entries. For other architectures, the number of TLB
entries is increased to a 4K-sized cache. The patch also doubles the
number of victim TLB entries.
Some statistics collected from a build benchmark for various cache
sizes is listed below:
| TLB bits\vTLB entires | 8 | 16 | 32 |
| 8 | 952.94(+0.0%) | 929.99(+2.4%) | 919.02(+3.6%) |
| 10 | 898.92(+5.6%) | 886.13(+7.0%) | 887.03(+6.9%) |
| 12 | 878.56(+7.8%) | 873.53(+8.3%)* | 875.34(+8.1%) |
The best combination for this workload came out to be 12 bits for the
TLB and a 16 entry vTLB cache.
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
include/exec/cpu-defs.h | 13 ++++---------
tcg/aarch64/tcg-target.h | 1 +
tcg/arm/tcg-target.h | 1 +
tcg/i386/tcg-target.h | 6 ++++++
tcg/ia64/tcg-target.h | 1 +
tcg/mips/tcg-target.h | 6 ++++++
tcg/ppc/tcg-target.h | 1 +
tcg/s390/tcg-target.h | 1 +
tcg/sparc/tcg-target.h | 1 +
tcg/tci/tcg-target.h | 2 ++
10 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index bc8e7f848d..33b0ac6fe0 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -57,8 +57,8 @@ typedef uint64_t target_ulong;
#endif
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-/* use a fully associative victim tlb of 8 entries */
-#define CPU_VTLB_SIZE 8
+/* use a fully associative victim tlb of 16 entries */
+#define CPU_VTLB_SIZE 16
#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
#define CPU_TLB_ENTRY_BITS 4
@@ -87,14 +87,9 @@ typedef uint64_t target_ulong;
* could be something like 0xC000 (the offset of the last TLB table) plus
* 0x18 (the offset of the addend field in each TLB entry) plus the offset
* of tlb_table inside env (which is non-trivial but not huge).
+ * TODO: rewrite this comment
*/
-#define CPU_TLB_BITS \
- MIN(8, \
- TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
- (NB_MMU_MODES <= 1 ? 0 : \
- NB_MMU_MODES <= 2 ? 1 : \
- NB_MMU_MODES <= 4 ? 2 : \
- NB_MMU_MODES <= 8 ? 3 : 4))
+#define CPU_TLB_BITS MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index b41a248bee..9f4558cd83 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -15,6 +15,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
#undef TCG_TARGET_STACK_GROWSUP
typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index a38be15a39..ebe27991f3 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -60,6 +60,7 @@ extern int arm_arch;
#undef TCG_TARGET_STACK_GROWSUP
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 8
typedef enum {
TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 73a15f7e80..456d57115c 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -162,6 +162,12 @@ extern bool have_popcnt;
# define TCG_AREG0 TCG_REG_EBP
#endif
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 28
+#else
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 27
+#endif
+
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
}
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 8f475fe742..35878e20c7 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -28,6 +28,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 16
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
typedef struct {
uint64_t lo __attribute__((aligned(16)));
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index e9558d15bc..0c7c5cf64c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -39,6 +39,12 @@
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define TCG_TARGET_NB_REGS 32
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 12
+#else
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 11
+#endif
+
typedef enum {
TCG_REG_ZERO = 0,
TCG_REG_AT,
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5a092b038a..82e10c9471 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -34,6 +34,7 @@
#define TCG_TARGET_NB_REGS 32
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
typedef enum {
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index dc0e59193c..57f0e22532 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -27,6 +27,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 2
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
typedef enum TCGReg {
TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 4515c9ab48..378d218923 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -29,6 +29,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 12
#define TCG_TARGET_NB_REGS 32
typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 06963288dc..456a4fc4e1 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -40,9 +40,11 @@
#ifndef TCG_TARGET_H
#define TCG_TARGET_H
+
#define TCG_TARGET_INTERPRETER 1
#define TCG_TARGET_INSN_UNIT_SIZE 1
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
#if UINTPTR_MAX == UINT32_MAX
# define TCG_TARGET_REG_BITS 32
--
2.13.0
next prev parent reply other threads:[~2017-08-29 6:33 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-29 6:33 [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
2017-08-29 6:33 ` [Qemu-devel] " Pranith Kumar
2017-08-29 6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
2017-09-05 12:28 ` Alex Bennée
2017-08-29 6:33 ` [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering Pranith Kumar
2017-08-29 6:33 ` [Qemu-devel] " Pranith Kumar
2017-08-29 14:51 ` Richard Henderson
2017-08-29 14:51 ` [Qemu-devel] " Richard Henderson
2017-08-29 6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
2017-08-29 14:53 ` Richard Henderson
2017-09-02 1:44 ` Emilio G. Cota
2017-08-29 6:33 ` Pranith Kumar [this message]
2017-08-29 6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
2017-08-29 15:01 ` Richard Henderson
2017-08-29 15:01 ` [Qemu-devel] " Richard Henderson
2017-08-29 16:23 ` Pranith Kumar
2017-08-29 16:23 ` [Qemu-devel] " Pranith Kumar
2017-08-29 15:03 ` Richard Henderson
2017-08-29 15:03 ` [Qemu-devel] " Richard Henderson
2017-09-05 12:02 ` [PATCH 1/5] target/arm: Remove stale comment Alex Bennée
2017-09-05 12:02 ` [Qemu-devel] " Alex Bennée
2017-09-06 0:35 ` Pranith Kumar
2017-09-06 0:35 ` [Qemu-devel] " Pranith Kumar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170829063313.10237-5-bobby.prani@gmail.com \
--to=bobby.prani@gmail.com \
--cc=agraf@suse.de \
--cc=alex.bennee@linaro.org \
--cc=aurelien@aurel32.net \
--cc=balrogg@gmail.com \
--cc=claudio.fontana@huawei.com \
--cc=crosthwaite.peter@gmail.com \
--cc=pbonzini@redhat.com \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
--cc=sw@weilnetz.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.