* [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches
@ 2017-08-24 15:58 Pranith Kumar
2017-08-24 16:02 ` Pranith Kumar
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Pranith Kumar @ 2017-08-24 15:58 UTC (permalink / raw)
To: alex.bennee, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
Alexander Graf, Stefan Weil, open list:Overall,
open list:AArch64 target
This patch increases the number of entries cached in the TLB. I went
over a few architectures to see if increasing it is problematic. Only
armv6 seems to have a limitation that only 8 bits can be used for
indexing these entries. For other architectures, the number of TLB
entries is increased to a 4K-sized cache. The patch also doubles the
number of victim TLB entries.
A few statistics collected from a build benchmark for various cache
sizes is below:
| TLB bits\vTLB entires | 8 | 16 | 32 |
| 8 | 952.94(+0.0%) | 929.99(+2.4%) | 919.02(+3.6%) |
| 10 | 898.92(+5.6%) | 886.13(+7.0%) | 887.03(+6.9%) |
| 12 | 878.56(+7.8%) | 873.53(+8.3%)* | 875.34(+8.1%) |
The best combination for this workload came out to be 12 bits for the
TLB and a 16 entry vTLB cache.
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
include/exec/cpu-defs.h | 6 +++---
tcg/aarch64/tcg-target.h | 1 +
tcg/arm/tcg-target.h | 1 +
tcg/i386/tcg-target.h | 2 ++
tcg/ia64/tcg-target.h | 1 +
tcg/mips/tcg-target.h | 1 +
tcg/ppc/tcg-target.h | 1 +
tcg/s390/tcg-target.h | 1 +
tcg/sparc/tcg-target.h | 1 +
tcg/tci/tcg-target.h | 1 +
10 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index bc8e7f848d..a5e1ad6cea 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -57,8 +57,8 @@ typedef uint64_t target_ulong;
#endif
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-/* use a fully associative victim tlb of 8 entries */
-#define CPU_VTLB_SIZE 8
+/* use a fully associative victim tlb of 16 entries */
+#define CPU_VTLB_SIZE 16
#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
#define CPU_TLB_ENTRY_BITS 4
@@ -89,7 +89,7 @@ typedef uint64_t target_ulong;
* of tlb_table inside env (which is non-trivial but not huge).
*/
#define CPU_TLB_BITS \
- MIN(8, \
+ MIN(CPU_TLB_BITS_MAX, \
TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
(NB_MMU_MODES <= 1 ? 0 : \
NB_MMU_MODES <= 2 ? 1 : \
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 55a46ac825..f428e09c98 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -15,6 +15,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#define CPU_TLB_BITS_MAX 12
#undef TCG_TARGET_STACK_GROWSUP
typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 5ef1086710..69414be393 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -60,6 +60,7 @@ extern int arm_arch;
#undef TCG_TARGET_STACK_GROWSUP
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define CPU_TLB_BITS_MAX 8
typedef enum {
TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 73a15f7e80..35c27a977b 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -162,6 +162,8 @@ extern bool have_popcnt;
# define TCG_AREG0 TCG_REG_EBP
#endif
+#define CPU_TLB_BITS_MAX 12
+
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
}
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 901bb7575d..fd713f7adf 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -28,6 +28,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 16
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+#define CPU_TLB_BITS_MAX 8
typedef struct {
uint64_t lo __attribute__((aligned(16)));
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d75cb63ed3..fd9046b7ad 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -37,6 +37,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define CPU_TLB_BITS_MAX 12
#define TCG_TARGET_NB_REGS 32
typedef enum {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5f4a40a5b4..f5071f706d 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -34,6 +34,7 @@
#define TCG_TARGET_NB_REGS 32
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define CPU_TLB_BITS_MAX 8
typedef enum {
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 957f0c0afe..218be322ad 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -27,6 +27,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 2
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+#define CPU_TLB_BITS_MAX 12
typedef enum TCGReg {
TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 854a0afd70..9fd59a64f2 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -29,6 +29,7 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define CPU_TLB_BITS_MAX 12
#define TCG_TARGET_NB_REGS 32
typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 06963288dc..3d39d479ea 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -43,6 +43,7 @@
#define TCG_TARGET_INTERPRETER 1
#define TCG_TARGET_INSN_UNIT_SIZE 1
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define CPU_TLB_BITS_MAX 8
#if UINTPTR_MAX == UINT32_MAX
# define TCG_TARGET_REG_BITS 32
--
2.13.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches
2017-08-24 15:58 [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches Pranith Kumar
@ 2017-08-24 16:02 ` Pranith Kumar
2017-08-28 18:08 ` Richard Henderson
2017-08-28 20:45 ` Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Pranith Kumar @ 2017-08-24 16:02 UTC (permalink / raw)
To: Alex Bennée, Paolo Bonzini, Peter Crosthwaite,
Richard Henderson, Andrzej Zaborowski, Aurelien Jarno,
Alexander Graf, Stefan Weil, open list:Overall,
open list:AArch64 target
On Thu, Aug 24, 2017 at 11:58 AM, Pranith Kumar <bobby.prani@gmail.com> wrote:
> This patch increases the number of entries cached in the TLB. I went
> over a few architectures to see if increasing it is problematic. Only
> armv6 seems to have a limitation that only 8 bits can be used for
> indexing these entries. For other architectures, the number of TLB
> entries is increased to a 4K-sized cache. The patch also doubles the
> number of victim TLB entries.
>
> A few statistics collected from a build benchmark for various cache
> sizes is below:
>
> | TLB bits\vTLB entires | 8 | 16 | 32 |
> | 8 | 952.94(+0.0%) | 929.99(+2.4%) | 919.02(+3.6%) |
> | 10 | 898.92(+5.6%) | 886.13(+7.0%) | 887.03(+6.9%) |
> | 12 | 878.56(+7.8%) | 873.53(+8.3%)* | 875.34(+8.1%) |
>
> The best combination for this workload came out to be 12 bits for the
> TLB and a 16 entry vTLB cache.
You can find the raw data here: http://paste.ubuntu.com/25383585/
Thanks,
--
Pranith
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches
2017-08-24 15:58 [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches Pranith Kumar
2017-08-24 16:02 ` Pranith Kumar
@ 2017-08-28 18:08 ` Richard Henderson
2017-08-28 20:45 ` Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2017-08-28 18:08 UTC (permalink / raw)
To: Pranith Kumar, alex.bennee, Paolo Bonzini, Peter Crosthwaite,
Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
Alexander Graf, Stefan Weil, open list:Overall,
open list:AArch64 target
On 08/24/2017 08:58 AM, Pranith Kumar wrote:
> | TLB bits\vTLB entires | 8 | 16 | 32 |
> | 8 | 952.94(+0.0%) | 929.99(+2.4%) | 919.02(+3.6%) |
> | 10 | 898.92(+5.6%) | 886.13(+7.0%) | 887.03(+6.9%) |
> | 12 | 878.56(+7.8%) | 873.53(+8.3%)* | 875.34(+8.1%) |
Thanks for collecting this.
> @@ -89,7 +89,7 @@ typedef uint64_t target_ulong;
> * of tlb_table inside env (which is non-trivial but not huge).
> */
> #define CPU_TLB_BITS \
> - MIN(8, \
> + MIN(CPU_TLB_BITS_MAX, \
> TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
> (NB_MMU_MODES <= 1 ? 0 : \
> NB_MMU_MODES <= 2 ? 1 : \
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 55a46ac825..f428e09c98 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -15,6 +15,7 @@
>
> #define TCG_TARGET_INSN_UNIT_SIZE 4
> #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
> +#define CPU_TLB_BITS_MAX 12
I'd rather the definition in tcg-target.h reflect the actual maximum and limit
that to what we want (12) within cpu-defs.h. So, here maybe
#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
etc.
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches
2017-08-24 15:58 [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches Pranith Kumar
2017-08-24 16:02 ` Pranith Kumar
2017-08-28 18:08 ` Richard Henderson
@ 2017-08-28 20:45 ` Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2017-08-28 20:45 UTC (permalink / raw)
To: Pranith Kumar, alex.bennee, Paolo Bonzini, Peter Crosthwaite,
Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
Alexander Graf, Stefan Weil, open list:Overall,
open list:AArch64 target
On 08/24/2017 08:58 AM, Pranith Kumar wrote:
> +#define CPU_TLB_BITS_MAX 12
Following up on our IRC conversation, host maximums are:
aarch64: unlimited (32)
arm: 8 (patch exists to increase to 32 for armv7)
i386: 32 - CPU_TLB_ENTRY_BITS
ia64: unlimited (32)
mips: 16 - CPU_TLB_ENTRY_BITS
ppc: unlimited (32)
s390: unlimited (32)
sparc: 12
tci: unlimited (32)
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-08-28 20:45 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-24 15:58 [Qemu-devel] [RFC v2 PATCH] tcg/softmmu: Increase size of TLB caches Pranith Kumar
2017-08-24 16:02 ` Pranith Kumar
2017-08-28 18:08 ` Richard Henderson
2017-08-28 20:45 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).