- * [PATCH 1/4] target/ppc: Add new hflags to support BHRB
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
@ 2023-09-12 20:23 ` Glenn Miles
  2023-09-15  0:39   ` Nicholas Piggin
  2023-09-12 20:24 ` [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB Glenn Miles
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 20:23 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
This commit is preparatory to the addition of Branch History
Rolling Buffer (BHRB) functionality, which is being provided
today starting with the P8 processor.
BHRB uses several SPR register fields to control whether or not
a branch instruction's address (and sometimes target address)
should be recorded.  Checking each of these fields with each
branch instruction using jitted code would lead to a significant
decrease in performance.
Therefore, it was decided that BHRB configuration bits that are
not expected to change frequently should have their state stored in
hflags so that the amount of checking done by jitted code can
be reduced.
This commit contains the changes for storing the state of the
following register fields as hflags:
	MMCR0[FCP] - Determines if BHRB recording is frozen in the
                     problem state
	MMCR0[FCPC] - A modifier for MMCR0[FCP]
	MMCRA[BHRBRD] - Disables all BHRB recording for a thread
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/cpu.h                 |  9 +++++++++
 target/ppc/cpu_init.c            |  4 ++--
 target/ppc/helper.h              |  1 +
 target/ppc/helper_regs.c         | 12 ++++++++++++
 target/ppc/machine.c             |  2 +-
 target/ppc/power8-pmu-regs.c.inc |  5 +++++
 target/ppc/power8-pmu.c          | 15 +++++++++++----
 target/ppc/power8-pmu.h          |  4 ++--
 target/ppc/spr_common.h          |  1 +
 target/ppc/translate.c           |  6 ++++++
 10 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 25fac9577a..20ae1466a5 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -439,6 +439,9 @@ FIELD(MSR, LE, MSR_LE, 1)
 #define MMCR0_FC56   PPC_BIT(59)         /* PMC Freeze Counters 5-6 bit */
 #define MMCR0_PMC1CE PPC_BIT(48)         /* MMCR0 PMC1 Condition Enabled */
 #define MMCR0_PMCjCE PPC_BIT(49)         /* MMCR0 PMCj Condition Enabled */
+#define MMCR0_BHRBA  PPC_BIT_NR(42)      /* BHRB Available */
+#define MMCR0_FCP    PPC_BIT(34)         /* Freeze Counters/BHRB if PR=1 */
+#define MMCR0_FCPC   PPC_BIT(51)         /* Condition for FCP bit */
 /* MMCR0 userspace r/w mask */
 #define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE)
 /* MMCR2 userspace r/w mask */
@@ -451,6 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
 #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
                          MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
 
+#define MMCRA_BHRBRD    PPC_BIT(26)            /* BHRB Recording Disable */
+
+
 #define MMCR1_EVT_SIZE 8
 /* extract64() does a right shift before extracting */
 #define MMCR1_PMC1SEL_START 32
@@ -703,6 +709,9 @@ enum {
     HFLAGS_PMCJCE = 17, /* MMCR0 PMCjCE bit */
     HFLAGS_PMC_OTHER = 18, /* PMC other than PMC5-6 is enabled */
     HFLAGS_INSN_CNT = 19, /* PMU instruction count enabled */
+    HFLAGS_FCPC = 20,   /* MMCR0 FCPC bit */
+    HFLAGS_FCP = 21,    /* MMCR0 FCP bit */
+    HFLAGS_BHRBRD = 22, /* MMCRA BHRBRD bit */
     HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
     HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
 
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 02b7aad9b0..568f9c3b88 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5152,7 +5152,7 @@ static void register_book3s_pmu_sup_sprs(CPUPPCState *env)
                      KVM_REG_PPC_MMCR1, 0x00000000);
     spr_register_kvm(env, SPR_POWER_MMCRA, "MMCRA",
                      SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
+                     &spr_read_generic, &spr_write_MMCRA,
                      KVM_REG_PPC_MMCRA, 0x00000000);
     spr_register_kvm(env, SPR_POWER_PMC1, "PMC1",
                      SPR_NOACCESS, SPR_NOACCESS,
@@ -7152,7 +7152,7 @@ static void ppc_cpu_reset_hold(Object *obj)
         if (env->mmu_model != POWERPC_MMU_REAL) {
             ppc_tlb_invalidate_all(env);
         }
-        pmu_mmcr01_updated(env);
+        pmu_mmcr01a_updated(env);
     }
 
     /* clean any pending stop state */
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index abec6fe341..1a3d9a7e57 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -27,6 +27,7 @@ DEF_HELPER_2(store_lpcr, void, env, tl)
 DEF_HELPER_2(store_pcr, void, env, tl)
 DEF_HELPER_2(store_mmcr0, void, env, tl)
 DEF_HELPER_2(store_mmcr1, void, env, tl)
+DEF_HELPER_2(store_mmcrA, void, env, tl)
 DEF_HELPER_3(store_pmc, void, env, i32, i64)
 DEF_HELPER_2(read_pmc, tl, env, i32)
 DEF_HELPER_2(insns_inc, void, env, i32)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index f380342d4d..4ff054063d 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -61,6 +61,15 @@ static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState *env)
     if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) {
         hflags |= 1 << HFLAGS_PMCJCE;
     }
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FCP) {
+        hflags |= 1 << HFLAGS_FCP;
+    }
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FCPC) {
+        hflags |= 1 << HFLAGS_FCPC;
+    }
+    if (env->spr[SPR_POWER_MMCRA] & MMCRA_BHRBRD) {
+        hflags |= 1 << HFLAGS_BHRBRD;
+    }
 
 #ifndef CONFIG_USER_ONLY
     if (env->pmc_ins_cnt) {
@@ -85,6 +94,9 @@ static uint32_t hreg_compute_pmu_hflags_mask(CPUPPCState *env)
     hflags_mask |= 1 << HFLAGS_PMCJCE;
     hflags_mask |= 1 << HFLAGS_INSN_CNT;
     hflags_mask |= 1 << HFLAGS_PMC_OTHER;
+    hflags_mask |= 1 << HFLAGS_FCP;
+    hflags_mask |= 1 << HFLAGS_FCPC;
+    hflags_mask |= 1 << HFLAGS_BHRBRD;
 #endif
     return hflags_mask;
 }
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 1270a1f7fc..b195fb4dc8 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -313,7 +313,7 @@ static int cpu_post_load(void *opaque, int version_id)
     post_load_update_msr(env);
 
     if (tcg_enabled()) {
-        pmu_mmcr01_updated(env);
+        pmu_mmcr01a_updated(env);
     }
 
     return 0;
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index c82feedaff..cab488918a 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -175,6 +175,11 @@ void spr_write_MMCR2_ureg(DisasContext *ctx, int sprn, int gprn)
     gen_store_spr(SPR_POWER_MMCR2, masked_gprn);
 }
 
+void spr_write_MMCRA(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_mmcrA(cpu_env, cpu_gpr[gprn]);
+}
+
 void spr_read_PMC(DisasContext *ctx, int gprn, int sprn)
 {
     TCGv_i32 t_sprn = tcg_constant_i32(sprn);
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index cbc5889d91..6f5d4e1256 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -82,7 +82,7 @@ static void pmu_update_summaries(CPUPPCState *env)
     env->pmc_cyc_cnt = cyc_cnt;
 }
 
-void pmu_mmcr01_updated(CPUPPCState *env)
+void pmu_mmcr01a_updated(CPUPPCState *env)
 {
     PowerPCCPU *cpu = env_archcpu(env);
 
@@ -260,7 +260,7 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
 
     env->spr[SPR_POWER_MMCR0] = value;
 
-    pmu_mmcr01_updated(env);
+    pmu_mmcr01a_updated(env);
 
     /* Update cycle overflow timers with the current MMCR0 state */
     pmu_update_overflow_timers(env);
@@ -272,7 +272,14 @@ void helper_store_mmcr1(CPUPPCState *env, uint64_t value)
 
     env->spr[SPR_POWER_MMCR1] = value;
 
-    pmu_mmcr01_updated(env);
+    pmu_mmcr01a_updated(env);
+}
+
+void helper_store_mmcrA(CPUPPCState *env, uint64_t value)
+{
+    env->spr[SPR_POWER_MMCRA] = value;
+
+    pmu_mmcr01a_updated(env);
 }
 
 target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
@@ -301,7 +308,7 @@ static void perfm_alert(PowerPCCPU *cpu)
         env->spr[SPR_POWER_MMCR0] |= MMCR0_FC;
 
         /* Changing MMCR0_FC requires summaries and hflags update */
-        pmu_mmcr01_updated(env);
+        pmu_mmcr01a_updated(env);
 
         /*
          * Delete all pending timers if we need to freeze
diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
index 775e640053..87fa8c9334 100644
--- a/target/ppc/power8-pmu.h
+++ b/target/ppc/power8-pmu.h
@@ -18,10 +18,10 @@
 #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
 
 void cpu_ppc_pmu_init(CPUPPCState *env);
-void pmu_mmcr01_updated(CPUPPCState *env);
+void pmu_mmcr01a_updated(CPUPPCState *env);
 #else
 static inline void cpu_ppc_pmu_init(CPUPPCState *env) { }
-static inline void pmu_mmcr01_updated(CPUPPCState *env) { }
+static inline void pmu_mmcr01a_updated(CPUPPCState *env) { }
 #endif
 
 #endif
diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h
index 5995070eaf..3c499c1ebd 100644
--- a/target/ppc/spr_common.h
+++ b/target/ppc/spr_common.h
@@ -85,6 +85,7 @@ void spr_write_generic32(DisasContext *ctx, int sprn, int gprn);
 void spr_core_write_generic(DisasContext *ctx, int sprn, int gprn);
 void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn);
 void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn);
+void spr_write_MMCRA(DisasContext *ctx, int sprn, int gprn);
 void spr_write_PMC(DisasContext *ctx, int sprn, int gprn);
 void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn);
 void spr_read_xer(DisasContext *ctx, int gprn, int sprn);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 7111b34030..d93fbd4574 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -190,6 +190,9 @@ struct DisasContext {
     bool mmcr0_pmcjce;
     bool pmc_other;
     bool pmu_insn_cnt;
+    bool mmcr0_fcpc;
+    bool mmcr0_fcp;
+    bool mmcra_bhrbrd;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint32_t flags;
@@ -7326,6 +7329,9 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->mmcr0_pmcjce = (hflags >> HFLAGS_PMCJCE) & 1;
     ctx->pmc_other = (hflags >> HFLAGS_PMC_OTHER) & 1;
     ctx->pmu_insn_cnt = (hflags >> HFLAGS_INSN_CNT) & 1;
+    ctx->mmcr0_fcpc = (hflags >> HFLAGS_FCPC) & 1;
+    ctx->mmcr0_fcp = (hflags >> HFLAGS_FCP) & 1;
+    ctx->mmcra_bhrbrd = (hflags >> HFLAGS_BHRBRD) & 1;
 
     ctx->singlestep_enabled = 0;
     if ((hflags >> HFLAGS_SE) & 1) {
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * Re: [PATCH 1/4] target/ppc: Add new hflags to support BHRB
  2023-09-12 20:23 ` [PATCH 1/4] target/ppc: Add new hflags to support BHRB Glenn Miles
@ 2023-09-15  0:39   ` Nicholas Piggin
  2023-09-19 21:19     ` Glenn Miles
  0 siblings, 1 reply; 18+ messages in thread
From: Nicholas Piggin @ 2023-09-15  0:39 UTC (permalink / raw)
  To: Glenn Miles, qemu-devel
  Cc: Daniel Henrique Barboza, Cédric Le Goater, David Gibson,
	Greg Kurz, open list:PowerPC TCG CPUs
On Wed Sep 13, 2023 at 6:23 AM AEST, Glenn Miles wrote:
> This commit is preparatory to the addition of Branch History
> Rolling Buffer (BHRB) functionality, which is being provided
> today starting with the P8 processor.
>
> BHRB uses several SPR register fields to control whether or not
> a branch instruction's address (and sometimes target address)
> should be recorded.  Checking each of these fields with each
> branch instruction using jitted code would lead to a significant
> decrease in performance.
>
> Therefore, it was decided that BHRB configuration bits that are
> not expected to change frequently should have their state stored in
> hflags so that the amount of checking done by jitted code can
> be reduced.
>
> This commit contains the changes for storing the state of the
> following register fields as hflags:
>
> 	MMCR0[FCP] - Determines if BHRB recording is frozen in the
>                      problem state
>
> 	MMCR0[FCPC] - A modifier for MMCR0[FCP]
>
> 	MMCRA[BHRBRD] - Disables all BHRB recording for a thread
>
> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
> ---
>  target/ppc/cpu.h                 |  9 +++++++++
>  target/ppc/cpu_init.c            |  4 ++--
>  target/ppc/helper.h              |  1 +
>  target/ppc/helper_regs.c         | 12 ++++++++++++
>  target/ppc/machine.c             |  2 +-
>  target/ppc/power8-pmu-regs.c.inc |  5 +++++
>  target/ppc/power8-pmu.c          | 15 +++++++++++----
>  target/ppc/power8-pmu.h          |  4 ++--
>  target/ppc/spr_common.h          |  1 +
>  target/ppc/translate.c           |  6 ++++++
>  10 files changed, 50 insertions(+), 9 deletions(-)
>
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 25fac9577a..20ae1466a5 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -439,6 +439,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>  #define MMCR0_FC56   PPC_BIT(59)         /* PMC Freeze Counters 5-6 bit */
>  #define MMCR0_PMC1CE PPC_BIT(48)         /* MMCR0 PMC1 Condition Enabled */
>  #define MMCR0_PMCjCE PPC_BIT(49)         /* MMCR0 PMCj Condition Enabled */
> +#define MMCR0_BHRBA  PPC_BIT_NR(42)      /* BHRB Available */
It's confusing to use NR for this. Either call it MMCR0_BHRBA_NR or have
the facility check in patch 3 take the bit value. I'd move it to patch 3
too.
> +#define MMCR0_FCP    PPC_BIT(34)         /* Freeze Counters/BHRB if PR=1 */
> +#define MMCR0_FCPC   PPC_BIT(51)         /* Condition for FCP bit */
>  /* MMCR0 userspace r/w mask */
>  #define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE)
>  /* MMCR2 userspace r/w mask */
> @@ -451,6 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>  #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
>                           MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
>  
> +#define MMCRA_BHRBRD    PPC_BIT(26)            /* BHRB Recording Disable */
> +
> +
>  #define MMCR1_EVT_SIZE 8
>  /* extract64() does a right shift before extracting */
>  #define MMCR1_PMC1SEL_START 32
> @@ -703,6 +709,9 @@ enum {
>      HFLAGS_PMCJCE = 17, /* MMCR0 PMCjCE bit */
>      HFLAGS_PMC_OTHER = 18, /* PMC other than PMC5-6 is enabled */
>      HFLAGS_INSN_CNT = 19, /* PMU instruction count enabled */
> +    HFLAGS_FCPC = 20,   /* MMCR0 FCPC bit */
> +    HFLAGS_FCP = 21,    /* MMCR0 FCP bit */
> +    HFLAGS_BHRBRD = 22, /* MMCRA BHRBRD bit */
>      HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
>      HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
hflags are an interesting tradeoff. You can specialise some code but
at the cost of duplicating your jit footprint, which is often the
most costly thing. The ideal hflag is one where code is not shared
between flag set/clear like PR and HV. Rarely used features is another
good one, that BHRB falls into.
But, we do want flags that carry stronger or more direct semantics
wrt code generation because you want to avoid redundant hflags values
that result in the same code generation. I might have missed something
but AFAIKS BHRB_ENABLED could be a combination of this logic (from
later patch):
+    /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
+    if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd || !ctx->pr)) {
+        return;
+    }
+
+    /* Check for BHRB "frozen" conditions */
+    if (ctx->mmcr0_fcpc) {
+        if (ctx->mmcr0_fcp) {
+            if ((ctx->hv) && (ctx->pr)) {
+                return;
+            }
+        } else if (!(ctx->hv) && (ctx->pr)) {
+            return;
+        }
+    } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
+        return;
+    }
Otherwise the patch looks good to me.
Thanks,
Nick
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [PATCH 1/4] target/ppc: Add new hflags to support BHRB
  2023-09-15  0:39   ` Nicholas Piggin
@ 2023-09-19 21:19     ` Glenn Miles
  0 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-19 21:19 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: qemu-devel, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, open list:PowerPC TCG CPUs
On 2023-09-14 19:39, Nicholas Piggin wrote:
> On Wed Sep 13, 2023 at 6:23 AM AEST, Glenn Miles wrote:
>> This commit is preparatory to the addition of Branch History
>> Rolling Buffer (BHRB) functionality, which is being provided
>> today starting with the P8 processor.
>> 
>> BHRB uses several SPR register fields to control whether or not
>> a branch instruction's address (and sometimes target address)
>> should be recorded.  Checking each of these fields with each
>> branch instruction using jitted code would lead to a significant
>> decrease in performance.
>> 
>> Therefore, it was decided that BHRB configuration bits that are
>> not expected to change frequently should have their state stored in
>> hflags so that the amount of checking done by jitted code can
>> be reduced.
>> 
>> This commit contains the changes for storing the state of the
>> following register fields as hflags:
>> 
>> 	MMCR0[FCP] - Determines if BHRB recording is frozen in the
>>                      problem state
>> 
>> 	MMCR0[FCPC] - A modifier for MMCR0[FCP]
>> 
>> 	MMCRA[BHRBRD] - Disables all BHRB recording for a thread
>> 
>> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
>> ---
>>  target/ppc/cpu.h                 |  9 +++++++++
>>  target/ppc/cpu_init.c            |  4 ++--
>>  target/ppc/helper.h              |  1 +
>>  target/ppc/helper_regs.c         | 12 ++++++++++++
>>  target/ppc/machine.c             |  2 +-
>>  target/ppc/power8-pmu-regs.c.inc |  5 +++++
>>  target/ppc/power8-pmu.c          | 15 +++++++++++----
>>  target/ppc/power8-pmu.h          |  4 ++--
>>  target/ppc/spr_common.h          |  1 +
>>  target/ppc/translate.c           |  6 ++++++
>>  10 files changed, 50 insertions(+), 9 deletions(-)
>> 
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index 25fac9577a..20ae1466a5 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -439,6 +439,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>>  #define MMCR0_FC56   PPC_BIT(59)         /* PMC Freeze Counters 5-6 
>> bit */
>>  #define MMCR0_PMC1CE PPC_BIT(48)         /* MMCR0 PMC1 Condition 
>> Enabled */
>>  #define MMCR0_PMCjCE PPC_BIT(49)         /* MMCR0 PMCj Condition 
>> Enabled */
>> +#define MMCR0_BHRBA  PPC_BIT_NR(42)      /* BHRB Available */
> 
> It's confusing to use NR for this. Either call it MMCR0_BHRBA_NR or 
> have
> the facility check in patch 3 take the bit value. I'd move it to patch 
> 3
> too.
> 
Ok, adding NR suffix.
>> +#define MMCR0_FCP    PPC_BIT(34)         /* Freeze Counters/BHRB if 
>> PR=1 */
>> +#define MMCR0_FCPC   PPC_BIT(51)         /* Condition for FCP bit */
>>  /* MMCR0 userspace r/w mask */
>>  #define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE)
>>  /* MMCR2 userspace r/w mask */
>> @@ -451,6 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>>  #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
>>                           MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
>> 
>> +#define MMCRA_BHRBRD    PPC_BIT(26)            /* BHRB Recording 
>> Disable */
>> +
>> +
>>  #define MMCR1_EVT_SIZE 8
>>  /* extract64() does a right shift before extracting */
>>  #define MMCR1_PMC1SEL_START 32
>> @@ -703,6 +709,9 @@ enum {
>>      HFLAGS_PMCJCE = 17, /* MMCR0 PMCjCE bit */
>>      HFLAGS_PMC_OTHER = 18, /* PMC other than PMC5-6 is enabled */
>>      HFLAGS_INSN_CNT = 19, /* PMU instruction count enabled */
>> +    HFLAGS_FCPC = 20,   /* MMCR0 FCPC bit */
>> +    HFLAGS_FCP = 21,    /* MMCR0 FCP bit */
>> +    HFLAGS_BHRBRD = 22, /* MMCRA BHRBRD bit */
>>      HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
>>      HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
> 
> hflags are an interesting tradeoff. You can specialise some code but
> at the cost of duplicating your jit footprint, which is often the
> most costly thing. The ideal hflag is one where code is not shared
> between flag set/clear like PR and HV. Rarely used features is another
> good one, that BHRB falls into.
> 
> But, we do want flags that carry stronger or more direct semantics
> wrt code generation because you want to avoid redundant hflags values
> that result in the same code generation. I might have missed something
> but AFAIKS BHRB_ENABLED could be a combination of this logic (from
> later patch):
> 
> +    /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
> +    if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd || 
> !ctx->pr)) {
> +        return;
> +    }
> +
> +    /* Check for BHRB "frozen" conditions */
> +    if (ctx->mmcr0_fcpc) {
> +        if (ctx->mmcr0_fcp) {
> +            if ((ctx->hv) && (ctx->pr)) {
> +                return;
> +            }
> +        } else if (!(ctx->hv) && (ctx->pr)) {
> +            return;
> +        }
> +    } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
> +        return;
> +    }
> 
Ok, Combining above logic into a single hflag.
> Otherwise the patch looks good to me.
> 
> Thanks,
> Nick
^ permalink raw reply	[flat|nested] 18+ messages in thread
 
 
- * [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
  2023-09-12 20:23 ` [PATCH 1/4] target/ppc: Add new hflags to support BHRB Glenn Miles
@ 2023-09-12 20:24 ` Glenn Miles
  2023-09-15  1:02   ` Nicholas Piggin
  2023-09-12 20:24 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 20:24 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
This commit continues adding support for the Branch History
Rolling Buffer (BHRB) as is provided starting with the P8
processor and continuing with its successors.  This commit
is limited to the recording and filtering of taken branches.
The following changes were made:
  - Added a BHRB buffer for storing branch instruction and
    target addresses for taken branches
  - Renamed gen_update_cfar to gen_update_branch_history and
    added a 'target' parameter to hold the branch target
    address and 'inst_type' parameter to use for filtering
  - Added a combination of jit-time and run-time checks to
    gen_update_branch_history for determining if a branch
    should be recorded
  - Added TCG code to gen_update_branch_history that stores
    data to the BHRB and updates the BHRB offset.
  - Added BHRB resource initialization and reset functions
  - Enabled functionality for P8, P9 and P10 processors.
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/cpu.h                       |  18 +++-
 target/ppc/cpu_init.c                  |  41 ++++++++-
 target/ppc/helper_regs.c               |  32 +++++++
 target/ppc/helper_regs.h               |   1 +
 target/ppc/power8-pmu.c                |   2 +
 target/ppc/power8-pmu.h                |   7 ++
 target/ppc/translate.c                 | 114 +++++++++++++++++++++++--
 target/ppc/translate/branch-impl.c.inc |   2 +-
 8 files changed, 205 insertions(+), 12 deletions(-)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 20ae1466a5..bda1afb700 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -454,8 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
 #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
                          MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
 
-#define MMCRA_BHRBRD    PPC_BIT(26)            /* BHRB Recording Disable */
-
+#define MMCRA_BHRBRD    PPC_BIT(26)         /* BHRB Recording Disable */
+#define MMCRA_IFM_MASK  PPC_BITMASK(32, 33) /* BHRB Instruction Filtering */
+#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
 
 #define MMCR1_EVT_SIZE 8
 /* extract64() does a right shift before extracting */
@@ -682,6 +683,8 @@ enum {
     POWERPC_FLAG_SMT      = 0x00400000,
     /* Using "LPAR per core" mode  (as opposed to per-thread)                */
     POWERPC_FLAG_SMT_1LPAR = 0x00800000,
+    /* Has BHRB */
+    POWERPC_FLAG_BHRB      = 0x01000000,
 };
 
 /*
@@ -1110,6 +1113,9 @@ DEXCR_ASPECT(PHIE, 6)
 #define PPC_CPU_OPCODES_LEN          0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
 
+#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
+#define BHRB_MAX_NUM_ENTRIES      (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
+
 struct CPUArchState {
     /* Most commonly used resources during translated code execution first */
     target_ulong gpr[32];  /* general purpose registers */
@@ -1196,6 +1202,14 @@ struct CPUArchState {
     int dcache_line_size;
     int icache_line_size;
 
+    /* Branch History Rolling Buffer (BHRB) resources */
+    target_ulong bhrb_num_entries;
+    target_ulong bhrb_base;
+    target_ulong bhrb_filter;
+    target_ulong bhrb_offset;
+    target_ulong bhrb_offset_mask;
+    uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
+
     /* These resources are used during exception processing */
     /* CPU model definition */
     target_ulong msr_mask;
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 568f9c3b88..19d7505a73 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6100,6 +6100,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
     pcc->l1_icache_size = 0x8000;
 }
 
+static void bhrb_init_state(CPUPPCState *env, target_long num_entries_log2)
+{
+    if (env->flags & POWERPC_FLAG_BHRB) {
+        if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
+            num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
+        }
+        env->bhrb_num_entries = 1 << num_entries_log2;
+        env->bhrb_base = (target_long)&env->bhrb[0];
+        env->bhrb_offset_mask = (env->bhrb_num_entries * sizeof(uint64_t)) - 1;
+    }
+}
+
+static void bhrb_reset_state(CPUPPCState *env)
+{
+    if (env->flags & POWERPC_FLAG_BHRB) {
+        env->bhrb_offset = 0;
+        env->bhrb_filter = 0;
+        memset(env->bhrb, 0, sizeof(env->bhrb));
+    }
+}
+
+#define POWER8_BHRB_ENTRIES_LOG2 5
 static void init_proc_POWER8(CPUPPCState *env)
 {
     /* Common Registers */
@@ -6141,6 +6163,8 @@ static void init_proc_POWER8(CPUPPCState *env)
     env->dcache_line_size = 128;
     env->icache_line_size = 128;
 
+    bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
+
     /* Allocate hardware IRQ controller */
     init_excp_POWER8(env);
     ppcPOWER7_irq_init(env_archcpu(env));
@@ -6241,7 +6265,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
                  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
                  POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
+                 POWERPC_FLAG_BHRB;
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
 }
@@ -6265,6 +6290,7 @@ static struct ppc_radix_page_info POWER9_radix_page_info = {
 };
 #endif /* CONFIG_USER_ONLY */
 
+#define POWER9_BHRB_ENTRIES_LOG2 5
 static void init_proc_POWER9(CPUPPCState *env)
 {
     /* Common Registers */
@@ -6315,6 +6341,8 @@ static void init_proc_POWER9(CPUPPCState *env)
     env->dcache_line_size = 128;
     env->icache_line_size = 128;
 
+    bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
+
     /* Allocate hardware IRQ controller */
     init_excp_POWER9(env);
     ppcPOWER9_irq_init(env_archcpu(env));
@@ -6434,7 +6462,8 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
     pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
                  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
                  POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV |
+                 POWERPC_FLAG_BHRB;
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
 }
@@ -6458,6 +6487,7 @@ static struct ppc_radix_page_info POWER10_radix_page_info = {
 };
 #endif /* !CONFIG_USER_ONLY */
 
+#define POWER10_BHRB_ENTRIES_LOG2 5
 static void init_proc_POWER10(CPUPPCState *env)
 {
     /* Common Registers */
@@ -6505,6 +6535,8 @@ static void init_proc_POWER10(CPUPPCState *env)
     env->dcache_line_size = 128;
     env->icache_line_size = 128;
 
+    bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
+
     /* Allocate hardware IRQ controller */
     init_excp_POWER10(env);
     ppcPOWER9_irq_init(env_archcpu(env));
@@ -6610,7 +6642,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
     pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
                  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
                  POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV |
+                 POWERPC_FLAG_BHRB;
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
 }
@@ -7178,6 +7211,8 @@ static void ppc_cpu_reset_hold(Object *obj)
         }
         env->spr[i] = spr->default_value;
     }
+
+    bhrb_reset_state(env);
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 4ff054063d..68b27196d9 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -752,3 +752,35 @@ void register_usprgh_sprs(CPUPPCState *env)
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
 }
+
+void hreg_bhrb_filter_update(CPUPPCState *env)
+{
+    target_long ifm;
+
+    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
+        /* disable recording to BHRB */
+        env->bhrb_filter = BHRB_TYPE_NORECORD;
+        return;
+    }
+
+    ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> MMCRA_IFM_SHIFT;
+    switch (ifm) {
+    case 0:
+        /* record all branches */
+        env->bhrb_filter = -1;
+        break;
+    case 1:
+        /* only record calls (LK = 1) */
+        env->bhrb_filter = BHRB_TYPE_CALL;
+        break;
+    case 2:
+        /* only record indirect branches */
+        env->bhrb_filter = BHRB_TYPE_INDIRECT;
+        break;
+    case 3:
+        /* only record conditional branches */
+        env->bhrb_filter = BHRB_TYPE_COND;
+        break;
+    }
+}
+
diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
index 8196c1346d..ab6aba1c24 100644
--- a/target/ppc/helper_regs.h
+++ b/target/ppc/helper_regs.h
@@ -25,6 +25,7 @@ void hreg_compute_hflags(CPUPPCState *env);
 void hreg_update_pmu_hflags(CPUPPCState *env);
 void cpu_interrupt_exittb(CPUState *cs);
 int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv);
+void hreg_bhrb_filter_update(CPUPPCState *env);
 
 #ifdef CONFIG_USER_ONLY
 static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 6f5d4e1256..18149a15b2 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -261,6 +261,7 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
     env->spr[SPR_POWER_MMCR0] = value;
 
     pmu_mmcr01a_updated(env);
+    hreg_bhrb_filter_update(env);
 
     /* Update cycle overflow timers with the current MMCR0 state */
     pmu_update_overflow_timers(env);
@@ -280,6 +281,7 @@ void helper_store_mmcrA(CPUPPCState *env, uint64_t value)
     env->spr[SPR_POWER_MMCRA] = value;
 
     pmu_mmcr01a_updated(env);
+    hreg_bhrb_filter_update(env);
 }
 
 target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
index 87fa8c9334..a887094045 100644
--- a/target/ppc/power8-pmu.h
+++ b/target/ppc/power8-pmu.h
@@ -17,6 +17,13 @@
 
 #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
 
+#define BHRB_TYPE_NORECORD      0x00
+#define BHRB_TYPE_CALL          0x01
+#define BHRB_TYPE_INDIRECT      0x02
+#define BHRB_TYPE_COND          0x04
+#define BHRB_TYPE_OTHER         0x08
+#define BHRB_TYPE_XL_FORM       0x10
+
 void cpu_ppc_pmu_init(CPUPPCState *env);
 void pmu_mmcr01a_updated(CPUPPCState *env);
 #else
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index d93fbd4574..7824475f54 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -177,6 +177,7 @@ struct DisasContext {
 #if defined(TARGET_PPC64)
     bool sf_mode;
     bool has_cfar;
+    bool has_bhrb;
 #endif
     bool fpu_enabled;
     bool altivec_enabled;
@@ -4104,12 +4105,100 @@ static void gen_rvwinkle(DisasContext *ctx)
 }
 #endif /* #if defined(TARGET_PPC64) */
 
-static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
+
+static inline void gen_update_branch_history(DisasContext *ctx,
+                                             target_ulong nip,
+                                             TCGv target,
+                                             target_long inst_type)
 {
 #if defined(TARGET_PPC64)
+    TCGv t0;
+    TCGv t1;
+    TCGv t2;
+    TCGv t3;
+    TCGLabel *no_update;
+
     if (ctx->has_cfar) {
         tcg_gen_movi_tl(cpu_cfar, nip);
     }
+
+    if (!ctx->has_bhrb || inst_type == BHRB_TYPE_NORECORD) {
+        return;
+    }
+
+    /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
+    if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd || !ctx->pr)) {
+        return;
+    }
+
+    /* Check for BHRB "frozen" conditions */
+    if (ctx->mmcr0_fcpc) {
+        if (ctx->mmcr0_fcp) {
+            if ((ctx->hv) && (ctx->pr)) {
+                return;
+            }
+        } else if (!(ctx->hv) && (ctx->pr)) {
+            return;
+        }
+    } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+    no_update = gen_new_label();
+
+    /* check for bhrb filtering */
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_filter));
+    tcg_gen_andi_tl(t0, t0, inst_type);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, no_update);
+
+    /* load bhrb base address into t2 */
+    tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUPPCState, bhrb_base));
+
+    /* load current bhrb_offset into t0 */
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
+
+    /* load a BHRB offset mask into t3 */
+    tcg_gen_ld_tl(t3, cpu_env, offsetof(CPUPPCState, bhrb_offset_mask));
+
+    /* add t2 to t0 to get address of bhrb entry */
+    tcg_gen_add_tl(t1, t0, t2);
+
+    /* store nip into bhrb at bhrb_offset */
+    tcg_gen_st_i64(tcg_constant_i64(nip), (TCGv_ptr)t1, 0);
+
+    /* add 8 to current bhrb_offset */
+    tcg_gen_addi_tl(t0, t0, 8);
+
+    /* apply offset mask */
+    tcg_gen_and_tl(t0, t0, t3);
+
+    if (inst_type & BHRB_TYPE_XL_FORM) {
+        /* Also record the target address for XL-Form branches */
+
+        /* add t2 to t0 to get address of bhrb entry */
+        tcg_gen_add_tl(t1, t0, t2);
+
+        /* Set the 'T' bit for target entries */
+        tcg_gen_ori_tl(t2, target, 0x2);
+
+        /* Store target address in bhrb */
+        tcg_gen_st_tl(t2, (TCGv_ptr)t1, 0);
+
+        /* add 8 to current bhrb_offset */
+        tcg_gen_addi_tl(t0, t0, 8);
+
+        /* apply offset mask */
+        tcg_gen_and_tl(t0, t0, t3);
+    }
+
+    /* save updated bhrb_offset for next time */
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
+
+    gen_set_label(no_update);
 #endif
 }
 
@@ -4239,8 +4328,10 @@ static void gen_b(DisasContext *ctx)
     }
     if (LK(ctx->opcode)) {
         gen_setlr(ctx, ctx->base.pc_next);
+        gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_CALL);
+    } else {
+        gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_OTHER);
     }
-    gen_update_cfar(ctx, ctx->cia);
     gen_goto_tb(ctx, 0, target);
     ctx->base.is_jmp = DISAS_NORETURN;
 }
@@ -4255,6 +4346,7 @@ static void gen_bcond(DisasContext *ctx, int type)
     uint32_t bo = BO(ctx->opcode);
     TCGLabel *l1;
     TCGv target;
+    target_long bhrb_type = BHRB_TYPE_OTHER;
 
     if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
         target = tcg_temp_new();
@@ -4265,11 +4357,16 @@ static void gen_bcond(DisasContext *ctx, int type)
         } else {
             tcg_gen_mov_tl(target, cpu_lr);
         }
+        if (!LK(ctx->opcode)) {
+            bhrb_type |= BHRB_TYPE_INDIRECT;
+        }
+        bhrb_type |= BHRB_TYPE_XL_FORM;
     } else {
         target = NULL;
     }
     if (LK(ctx->opcode)) {
         gen_setlr(ctx, ctx->base.pc_next);
+        bhrb_type |= BHRB_TYPE_CALL;
     }
     l1 = gen_new_label();
     if ((bo & 0x4) == 0) {
@@ -4320,6 +4417,7 @@ static void gen_bcond(DisasContext *ctx, int type)
                 tcg_gen_brcondi_tl(TCG_COND_EQ, temp, 0, l1);
             }
         }
+        bhrb_type |= BHRB_TYPE_COND;
     }
     if ((bo & 0x10) == 0) {
         /* Test CR */
@@ -4334,8 +4432,11 @@ static void gen_bcond(DisasContext *ctx, int type)
             tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
             tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
         }
+        bhrb_type |= BHRB_TYPE_COND;
     }
-    gen_update_cfar(ctx, ctx->cia);
+
+    gen_update_branch_history(ctx, ctx->cia, target, bhrb_type);
+
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
         if (likely(AA(ctx->opcode) == 0)) {
@@ -4451,7 +4552,7 @@ static void gen_rfi(DisasContext *ctx)
     /* Restore CPU state */
     CHK_SV(ctx);
     translator_io_start(&ctx->base);
-    gen_update_cfar(ctx, ctx->cia);
+    gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
     gen_helper_rfi(cpu_env);
     ctx->base.is_jmp = DISAS_EXIT;
 #endif
@@ -4466,7 +4567,7 @@ static void gen_rfid(DisasContext *ctx)
     /* Restore CPU state */
     CHK_SV(ctx);
     translator_io_start(&ctx->base);
-    gen_update_cfar(ctx, ctx->cia);
+    gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
     gen_helper_rfid(cpu_env);
     ctx->base.is_jmp = DISAS_EXIT;
 #endif
@@ -4481,7 +4582,7 @@ static void gen_rfscv(DisasContext *ctx)
     /* Restore CPU state */
     CHK_SV(ctx);
     translator_io_start(&ctx->base);
-    gen_update_cfar(ctx, ctx->cia);
+    gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
     gen_helper_rfscv(cpu_env);
     ctx->base.is_jmp = DISAS_EXIT;
 #endif
@@ -7313,6 +7414,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 #if defined(TARGET_PPC64)
     ctx->sf_mode = (hflags >> HFLAGS_64) & 1;
     ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
+    ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB);
 #endif
     ctx->lazy_tlb_flush = env->mmu_model == POWERPC_MMU_32B
         || env->mmu_model & POWERPC_MMU_64;
diff --git a/target/ppc/translate/branch-impl.c.inc b/target/ppc/translate/branch-impl.c.inc
index f9931b9d73..a76ec6f77e 100644
--- a/target/ppc/translate/branch-impl.c.inc
+++ b/target/ppc/translate/branch-impl.c.inc
@@ -17,7 +17,7 @@ static bool trans_RFEBB(DisasContext *ctx, arg_XL_s *arg)
     REQUIRE_INSNS_FLAGS2(ctx, ISA207S);
 
     translator_io_start(&ctx->base);
-    gen_update_cfar(ctx, ctx->cia);
+    gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
     gen_helper_rfebb(cpu_env, cpu_gpr[arg->s]);
 
     ctx->base.is_jmp = DISAS_CHAIN;
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * Re: [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB
  2023-09-12 20:24 ` [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB Glenn Miles
@ 2023-09-15  1:02   ` Nicholas Piggin
  2023-09-19 21:35     ` Glenn Miles
  0 siblings, 1 reply; 18+ messages in thread
From: Nicholas Piggin @ 2023-09-15  1:02 UTC (permalink / raw)
  To: Glenn Miles, qemu-devel
  Cc: Daniel Henrique Barboza, Cédric Le Goater, David Gibson,
	Greg Kurz, open list:PowerPC TCG CPUs
On Wed Sep 13, 2023 at 6:24 AM AEST, Glenn Miles wrote:
> This commit continues adding support for the Branch History
> Rolling Buffer (BHRB) as is provided starting with the P8
> processor and continuing with its successors.  This commit
> is limited to the recording and filtering of taken branches.
>
> The following changes were made:
>
>   - Added a BHRB buffer for storing branch instruction and
>     target addresses for taken branches
>   - Renamed gen_update_cfar to gen_update_branch_history and
>     added a 'target' parameter to hold the branch target
>     address and 'inst_type' parameter to use for filtering
>   - Added a combination of jit-time and run-time checks to
>     gen_update_branch_history for determining if a branch
>     should be recorded
>   - Added TCG code to gen_update_branch_history that stores
>     data to the BHRB and updates the BHRB offset.
>   - Added BHRB resource initialization and reset functions
>   - Enabled functionality for P8, P9 and P10 processors.
>
> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
> ---
>  target/ppc/cpu.h                       |  18 +++-
>  target/ppc/cpu_init.c                  |  41 ++++++++-
>  target/ppc/helper_regs.c               |  32 +++++++
>  target/ppc/helper_regs.h               |   1 +
>  target/ppc/power8-pmu.c                |   2 +
>  target/ppc/power8-pmu.h                |   7 ++
>  target/ppc/translate.c                 | 114 +++++++++++++++++++++++--
>  target/ppc/translate/branch-impl.c.inc |   2 +-
>  8 files changed, 205 insertions(+), 12 deletions(-)
>
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 20ae1466a5..bda1afb700 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -454,8 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>  #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
>                           MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
>  
> -#define MMCRA_BHRBRD    PPC_BIT(26)            /* BHRB Recording Disable */
> -
> +#define MMCRA_BHRBRD    PPC_BIT(26)         /* BHRB Recording Disable */
Fold this tidying into patch 1.
> +#define MMCRA_IFM_MASK  PPC_BITMASK(32, 33) /* BHRB Instruction Filtering */
> +#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
>  
>  #define MMCR1_EVT_SIZE 8
>  /* extract64() does a right shift before extracting */
> @@ -682,6 +683,8 @@ enum {
>      POWERPC_FLAG_SMT      = 0x00400000,
>      /* Using "LPAR per core" mode  (as opposed to per-thread)                */
>      POWERPC_FLAG_SMT_1LPAR = 0x00800000,
> +    /* Has BHRB */
> +    POWERPC_FLAG_BHRB      = 0x01000000,
>  };
Interesting question of which patch to add different flags. I'm
strongly in add when you add code that uses them like this one,
but it's a matter of taste and not always practical to be an
absolute rule. I don't mind too much what others do, but maybe
this and the pcc->flags init should go in patch 1 since that's adding
flags that aren't yet used?
>  
>  /*
> @@ -1110,6 +1113,9 @@ DEXCR_ASPECT(PHIE, 6)
>  #define PPC_CPU_OPCODES_LEN          0x40
>  #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
>  
> +#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
> +#define BHRB_MAX_NUM_ENTRIES      (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
> +
>  struct CPUArchState {
>      /* Most commonly used resources during translated code execution first */
>      target_ulong gpr[32];  /* general purpose registers */
> @@ -1196,6 +1202,14 @@ struct CPUArchState {
>      int dcache_line_size;
>      int icache_line_size;
>  
> +    /* Branch History Rolling Buffer (BHRB) resources */
> +    target_ulong bhrb_num_entries;
> +    target_ulong bhrb_base;
> +    target_ulong bhrb_filter;
> +    target_ulong bhrb_offset;
> +    target_ulong bhrb_offset_mask;
> +    uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
Put these under ifdef TARGET_PPC64?
> +
>      /* These resources are used during exception processing */
>      /* CPU model definition */
>      target_ulong msr_mask;
> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 568f9c3b88..19d7505a73 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -6100,6 +6100,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>      pcc->l1_icache_size = 0x8000;
>  }
>  
> +static void bhrb_init_state(CPUPPCState *env, target_long num_entries_log2)
> +{
> +    if (env->flags & POWERPC_FLAG_BHRB) {
> +        if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
> +            num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
> +        }
> +        env->bhrb_num_entries = 1 << num_entries_log2;
> +        env->bhrb_base = (target_long)&env->bhrb[0];
> +        env->bhrb_offset_mask = (env->bhrb_num_entries * sizeof(uint64_t)) - 1;
> +    }
> +}
> +
> +static void bhrb_reset_state(CPUPPCState *env)
> +{
> +    if (env->flags & POWERPC_FLAG_BHRB) {
> +        env->bhrb_offset = 0;
> +        env->bhrb_filter = 0;
> +        memset(env->bhrb, 0, sizeof(env->bhrb));
> +    }
> +}
> +
> +#define POWER8_BHRB_ENTRIES_LOG2 5
>  static void init_proc_POWER8(CPUPPCState *env)
>  {
>      /* Common Registers */
> @@ -6141,6 +6163,8 @@ static void init_proc_POWER8(CPUPPCState *env)
>      env->dcache_line_size = 128;
>      env->icache_line_size = 128;
>  
> +    bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
> +
>      /* Allocate hardware IRQ controller */
>      init_excp_POWER8(env);
>      ppcPOWER7_irq_init(env_archcpu(env));
> @@ -6241,7 +6265,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>                   POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>                   POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> -                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
> +                 POWERPC_FLAG_BHRB;
>      pcc->l1_dcache_size = 0x8000;
>      pcc->l1_icache_size = 0x8000;
>  }
> @@ -6265,6 +6290,7 @@ static struct ppc_radix_page_info POWER9_radix_page_info = {
>  };
>  #endif /* CONFIG_USER_ONLY */
>  
> +#define POWER9_BHRB_ENTRIES_LOG2 5
>  static void init_proc_POWER9(CPUPPCState *env)
>  {
>      /* Common Registers */
> @@ -6315,6 +6341,8 @@ static void init_proc_POWER9(CPUPPCState *env)
>      env->dcache_line_size = 128;
>      env->icache_line_size = 128;
>  
> +    bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
> +
>      /* Allocate hardware IRQ controller */
>      init_excp_POWER9(env);
>      ppcPOWER9_irq_init(env_archcpu(env));
> @@ -6434,7 +6462,8 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>                   POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>                   POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> -                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV |
> +                 POWERPC_FLAG_BHRB;
>      pcc->l1_dcache_size = 0x8000;
>      pcc->l1_icache_size = 0x8000;
>  }
> @@ -6458,6 +6487,7 @@ static struct ppc_radix_page_info POWER10_radix_page_info = {
>  };
>  #endif /* !CONFIG_USER_ONLY */
>  
> +#define POWER10_BHRB_ENTRIES_LOG2 5
>  static void init_proc_POWER10(CPUPPCState *env)
>  {
>      /* Common Registers */
> @@ -6505,6 +6535,8 @@ static void init_proc_POWER10(CPUPPCState *env)
>      env->dcache_line_size = 128;
>      env->icache_line_size = 128;
>  
> +    bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
> +
>      /* Allocate hardware IRQ controller */
>      init_excp_POWER10(env);
>      ppcPOWER9_irq_init(env_archcpu(env));
> @@ -6610,7 +6642,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>                   POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>                   POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> -                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV |
> +                 POWERPC_FLAG_BHRB;
>      pcc->l1_dcache_size = 0x8000;
>      pcc->l1_icache_size = 0x8000;
>  }
> @@ -7178,6 +7211,8 @@ static void ppc_cpu_reset_hold(Object *obj)
>          }
>          env->spr[i] = spr->default_value;
>      }
> +
> +    bhrb_reset_state(env);
>  }
>  
>  #ifndef CONFIG_USER_ONLY
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 4ff054063d..68b27196d9 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -752,3 +752,35 @@ void register_usprgh_sprs(CPUPPCState *env)
>                   &spr_read_ureg, SPR_NOACCESS,
>                   0x00000000);
>  }
> +
> +void hreg_bhrb_filter_update(CPUPPCState *env)
> +{
> +    target_long ifm;
> +
> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
> +        /* disable recording to BHRB */
> +        env->bhrb_filter = BHRB_TYPE_NORECORD;
> +        return;
> +    }
> +
> +    ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> MMCRA_IFM_SHIFT;
> +    switch (ifm) {
> +    case 0:
> +        /* record all branches */
> +        env->bhrb_filter = -1;
> +        break;
> +    case 1:
> +        /* only record calls (LK = 1) */
> +        env->bhrb_filter = BHRB_TYPE_CALL;
> +        break;
> +    case 2:
> +        /* only record indirect branches */
> +        env->bhrb_filter = BHRB_TYPE_INDIRECT;
> +        break;
> +    case 3:
> +        /* only record conditional branches */
> +        env->bhrb_filter = BHRB_TYPE_COND;
> +        break;
> +    }
> +}
> +
> diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
> index 8196c1346d..ab6aba1c24 100644
> --- a/target/ppc/helper_regs.h
> +++ b/target/ppc/helper_regs.h
> @@ -25,6 +25,7 @@ void hreg_compute_hflags(CPUPPCState *env);
>  void hreg_update_pmu_hflags(CPUPPCState *env);
>  void cpu_interrupt_exittb(CPUState *cs);
>  int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv);
> +void hreg_bhrb_filter_update(CPUPPCState *env);
>  
>  #ifdef CONFIG_USER_ONLY
>  static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
> index 6f5d4e1256..18149a15b2 100644
> --- a/target/ppc/power8-pmu.c
> +++ b/target/ppc/power8-pmu.c
> @@ -261,6 +261,7 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
>      env->spr[SPR_POWER_MMCR0] = value;
>  
>      pmu_mmcr01a_updated(env);
> +    hreg_bhrb_filter_update(env);
>  
>      /* Update cycle overflow timers with the current MMCR0 state */
>      pmu_update_overflow_timers(env);
> @@ -280,6 +281,7 @@ void helper_store_mmcrA(CPUPPCState *env, uint64_t value)
>      env->spr[SPR_POWER_MMCRA] = value;
>  
>      pmu_mmcr01a_updated(env);
> +    hreg_bhrb_filter_update(env);
>  }
>  
Can these just be moved into pmu_mmcr01a_updated? AFAIKS they only
depend on MMCR0/A.
>  target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
> diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
> index 87fa8c9334..a887094045 100644
> --- a/target/ppc/power8-pmu.h
> +++ b/target/ppc/power8-pmu.h
> @@ -17,6 +17,13 @@
>  
>  #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
>  
> +#define BHRB_TYPE_NORECORD      0x00
> +#define BHRB_TYPE_CALL          0x01
> +#define BHRB_TYPE_INDIRECT      0x02
> +#define BHRB_TYPE_COND          0x04
> +#define BHRB_TYPE_OTHER         0x08
> +#define BHRB_TYPE_XL_FORM       0x10
> +
>  void cpu_ppc_pmu_init(CPUPPCState *env);
>  void pmu_mmcr01a_updated(CPUPPCState *env);
>  #else
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index d93fbd4574..7824475f54 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -177,6 +177,7 @@ struct DisasContext {
>  #if defined(TARGET_PPC64)
>      bool sf_mode;
>      bool has_cfar;
> +    bool has_bhrb;
>  #endif
>      bool fpu_enabled;
>      bool altivec_enabled;
> @@ -4104,12 +4105,100 @@ static void gen_rvwinkle(DisasContext *ctx)
>  }
>  #endif /* #if defined(TARGET_PPC64) */
>  
> -static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
> +
> +static inline void gen_update_branch_history(DisasContext *ctx,
> +                                             target_ulong nip,
> +                                             TCGv target,
> +                                             target_long inst_type)
>  {
>  #if defined(TARGET_PPC64)
> +    TCGv t0;
> +    TCGv t1;
> +    TCGv t2;
> +    TCGv t3;
> +    TCGLabel *no_update;
> +
>      if (ctx->has_cfar) {
>          tcg_gen_movi_tl(cpu_cfar, nip);
>      }
> +
> +    if (!ctx->has_bhrb || inst_type == BHRB_TYPE_NORECORD) {
> +        return;
> +    }
> +
> +    /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
> +    if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd || !ctx->pr)) {
> +        return;
> +    }
> +
> +    /* Check for BHRB "frozen" conditions */
> +    if (ctx->mmcr0_fcpc) {
> +        if (ctx->mmcr0_fcp) {
> +            if ((ctx->hv) && (ctx->pr)) {
> +                return;
> +            }
> +        } else if (!(ctx->hv) && (ctx->pr)) {
> +            return;
> +        }
> +    } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
> +        return;
> +    }
> +
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    t2 = tcg_temp_new();
> +    t3 = tcg_temp_new();
> +    no_update = gen_new_label();
> +
> +    /* check for bhrb filtering */
> +    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_filter));
> +    tcg_gen_andi_tl(t0, t0, inst_type);
> +    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, no_update);
> +
> +    /* load bhrb base address into t2 */
> +    tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUPPCState, bhrb_base));
> +
> +    /* load current bhrb_offset into t0 */
> +    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
> +
> +    /* load a BHRB offset mask into t3 */
> +    tcg_gen_ld_tl(t3, cpu_env, offsetof(CPUPPCState, bhrb_offset_mask));
> +
> +    /* add t2 to t0 to get address of bhrb entry */
> +    tcg_gen_add_tl(t1, t0, t2);
> +
> +    /* store nip into bhrb at bhrb_offset */
> +    tcg_gen_st_i64(tcg_constant_i64(nip), (TCGv_ptr)t1, 0);
> +
> +    /* add 8 to current bhrb_offset */
> +    tcg_gen_addi_tl(t0, t0, 8);
> +
> +    /* apply offset mask */
> +    tcg_gen_and_tl(t0, t0, t3);
For some cases where the value remains live for a time and not reused,
maybe use a more descriptive name? I know existing code is pretty lazy
with names (and I share some blame). t0,t2,t3 could be base, offset,
mask respectively, and t1 could be tmp that is used for filter and
address generation. Just a suggestion?
> +
> +    if (inst_type & BHRB_TYPE_XL_FORM) {
> +        /* Also record the target address for XL-Form branches */
> +
> +        /* add t2 to t0 to get address of bhrb entry */
> +        tcg_gen_add_tl(t1, t0, t2);
> +
> +        /* Set the 'T' bit for target entries */
> +        tcg_gen_ori_tl(t2, target, 0x2);
> +
> +        /* Store target address in bhrb */
> +        tcg_gen_st_tl(t2, (TCGv_ptr)t1, 0);
> +
> +        /* add 8 to current bhrb_offset */
> +        tcg_gen_addi_tl(t0, t0, 8);
> +
> +        /* apply offset mask */
> +        tcg_gen_and_tl(t0, t0, t3);
> +    }
I would say this is bordering on warranting a new function since it
mostly duplicates previous block. You could have it take base, mask,
offset and value to store, and have it return new offset.
> +
> +    /* save updated bhrb_offset for next time */
> +    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
> +
> +    gen_set_label(no_update);
>  #endif
>  }
It all looks pretty good otherwise. I do worry about POWER8/9 which
do not have BHRB disable bit. How much do they slow down I wonder?
Thanks,
Nick
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB
  2023-09-15  1:02   ` Nicholas Piggin
@ 2023-09-19 21:35     ` Glenn Miles
  2023-09-21 15:45       ` Glenn Miles
  0 siblings, 1 reply; 18+ messages in thread
From: Glenn Miles @ 2023-09-19 21:35 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: qemu-devel, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, open list:PowerPC TCG CPUs
On 2023-09-14 20:02, Nicholas Piggin wrote:
> On Wed Sep 13, 2023 at 6:24 AM AEST, Glenn Miles wrote:
>> This commit continues adding support for the Branch History
>> Rolling Buffer (BHRB) as is provided starting with the P8
>> processor and continuing with its successors.  This commit
>> is limited to the recording and filtering of taken branches.
>> 
>> The following changes were made:
>> 
>>   - Added a BHRB buffer for storing branch instruction and
>>     target addresses for taken branches
>>   - Renamed gen_update_cfar to gen_update_branch_history and
>>     added a 'target' parameter to hold the branch target
>>     address and 'inst_type' parameter to use for filtering
>>   - Added a combination of jit-time and run-time checks to
>>     gen_update_branch_history for determining if a branch
>>     should be recorded
>>   - Added TCG code to gen_update_branch_history that stores
>>     data to the BHRB and updates the BHRB offset.
>>   - Added BHRB resource initialization and reset functions
>>   - Enabled functionality for P8, P9 and P10 processors.
>> 
>> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
>> ---
>>  target/ppc/cpu.h                       |  18 +++-
>>  target/ppc/cpu_init.c                  |  41 ++++++++-
>>  target/ppc/helper_regs.c               |  32 +++++++
>>  target/ppc/helper_regs.h               |   1 +
>>  target/ppc/power8-pmu.c                |   2 +
>>  target/ppc/power8-pmu.h                |   7 ++
>>  target/ppc/translate.c                 | 114 
>> +++++++++++++++++++++++--
>>  target/ppc/translate/branch-impl.c.inc |   2 +-
>>  8 files changed, 205 insertions(+), 12 deletions(-)
>> 
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index 20ae1466a5..bda1afb700 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -454,8 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>>  #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
>>                           MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
>> 
>> -#define MMCRA_BHRBRD    PPC_BIT(26)            /* BHRB Recording 
>> Disable */
>> -
>> +#define MMCRA_BHRBRD    PPC_BIT(26)         /* BHRB Recording Disable 
>> */
> 
> Fold this tidying into patch 1.
Done.
> 
>> +#define MMCRA_IFM_MASK  PPC_BITMASK(32, 33) /* BHRB Instruction 
>> Filtering */
>> +#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
>> 
>>  #define MMCR1_EVT_SIZE 8
>>  /* extract64() does a right shift before extracting */
>> @@ -682,6 +683,8 @@ enum {
>>      POWERPC_FLAG_SMT      = 0x00400000,
>>      /* Using "LPAR per core" mode  (as opposed to per-thread)         
>>        */
>>      POWERPC_FLAG_SMT_1LPAR = 0x00800000,
>> +    /* Has BHRB */
>> +    POWERPC_FLAG_BHRB      = 0x01000000,
>>  };
> 
> Interesting question of which patch to add different flags. I'm
> strongly in add when you add code that uses them like this one,
> but it's a matter of taste and not always practical to be an
> absolute rule. I don't mind too much what others do, but maybe
> this and the pcc->flags init should go in patch 1 since that's adding
> flags that aren't yet used?
> 
I think I prefer keeping it in patch 2 since patch 1 is more about
the hflags, which seems unrelated to this flag.
>> 
>>  /*
>> @@ -1110,6 +1113,9 @@ DEXCR_ASPECT(PHIE, 6)
>>  #define PPC_CPU_OPCODES_LEN          0x40
>>  #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
>> 
>> +#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
>> +#define BHRB_MAX_NUM_ENTRIES      (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
>> +
>>  struct CPUArchState {
>>      /* Most commonly used resources during translated code execution 
>> first */
>>      target_ulong gpr[32];  /* general purpose registers */
>> @@ -1196,6 +1202,14 @@ struct CPUArchState {
>>      int dcache_line_size;
>>      int icache_line_size;
>> 
>> +    /* Branch History Rolling Buffer (BHRB) resources */
>> +    target_ulong bhrb_num_entries;
>> +    target_ulong bhrb_base;
>> +    target_ulong bhrb_filter;
>> +    target_ulong bhrb_offset;
>> +    target_ulong bhrb_offset_mask;
>> +    uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
> 
> Put these under ifdef TARGET_PPC64?
> 
Ok.
>> +
>>      /* These resources are used during exception processing */
>>      /* CPU model definition */
>>      target_ulong msr_mask;
>> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
>> index 568f9c3b88..19d7505a73 100644
>> --- a/target/ppc/cpu_init.c
>> +++ b/target/ppc/cpu_init.c
>> @@ -6100,6 +6100,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void 
>> *data)
>>      pcc->l1_icache_size = 0x8000;
>>  }
>> 
>> +static void bhrb_init_state(CPUPPCState *env, target_long 
>> num_entries_log2)
>> +{
>> +    if (env->flags & POWERPC_FLAG_BHRB) {
>> +        if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
>> +            num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
>> +        }
>> +        env->bhrb_num_entries = 1 << num_entries_log2;
>> +        env->bhrb_base = (target_long)&env->bhrb[0];
>> +        env->bhrb_offset_mask = (env->bhrb_num_entries * 
>> sizeof(uint64_t)) - 1;
>> +    }
>> +}
>> +
>> +static void bhrb_reset_state(CPUPPCState *env)
>> +{
>> +    if (env->flags & POWERPC_FLAG_BHRB) {
>> +        env->bhrb_offset = 0;
>> +        env->bhrb_filter = 0;
>> +        memset(env->bhrb, 0, sizeof(env->bhrb));
>> +    }
>> +}
>> +
>> +#define POWER8_BHRB_ENTRIES_LOG2 5
>>  static void init_proc_POWER8(CPUPPCState *env)
>>  {
>>      /* Common Registers */
>> @@ -6141,6 +6163,8 @@ static void init_proc_POWER8(CPUPPCState *env)
>>      env->dcache_line_size = 128;
>>      env->icache_line_size = 128;
>> 
>> +    bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
>> +
>>      /* Allocate hardware IRQ controller */
>>      init_excp_POWER8(env);
>>      ppcPOWER7_irq_init(env_archcpu(env));
>> @@ -6241,7 +6265,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void 
>> *data)
>>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>>                   POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>>                   POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
>> -                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
>> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
>> +                 POWERPC_FLAG_BHRB;
>>      pcc->l1_dcache_size = 0x8000;
>>      pcc->l1_icache_size = 0x8000;
>>  }
>> @@ -6265,6 +6290,7 @@ static struct ppc_radix_page_info 
>> POWER9_radix_page_info = {
>>  };
>>  #endif /* CONFIG_USER_ONLY */
>> 
>> +#define POWER9_BHRB_ENTRIES_LOG2 5
>>  static void init_proc_POWER9(CPUPPCState *env)
>>  {
>>      /* Common Registers */
>> @@ -6315,6 +6341,8 @@ static void init_proc_POWER9(CPUPPCState *env)
>>      env->dcache_line_size = 128;
>>      env->icache_line_size = 128;
>> 
>> +    bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
>> +
>>      /* Allocate hardware IRQ controller */
>>      init_excp_POWER9(env);
>>      ppcPOWER9_irq_init(env_archcpu(env));
>> @@ -6434,7 +6462,8 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void 
>> *data)
>>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>>                   POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>>                   POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
>> -                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | 
>> POWERPC_FLAG_SCV;
>> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | 
>> POWERPC_FLAG_SCV |
>> +                 POWERPC_FLAG_BHRB;
>>      pcc->l1_dcache_size = 0x8000;
>>      pcc->l1_icache_size = 0x8000;
>>  }
>> @@ -6458,6 +6487,7 @@ static struct ppc_radix_page_info 
>> POWER10_radix_page_info = {
>>  };
>>  #endif /* !CONFIG_USER_ONLY */
>> 
>> +#define POWER10_BHRB_ENTRIES_LOG2 5
>>  static void init_proc_POWER10(CPUPPCState *env)
>>  {
>>      /* Common Registers */
>> @@ -6505,6 +6535,8 @@ static void init_proc_POWER10(CPUPPCState *env)
>>      env->dcache_line_size = 128;
>>      env->icache_line_size = 128;
>> 
>> +    bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
>> +
>>      /* Allocate hardware IRQ controller */
>>      init_excp_POWER10(env);
>>      ppcPOWER9_irq_init(env_archcpu(env));
>> @@ -6610,7 +6642,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void 
>> *data)
>>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>>                   POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>>                   POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
>> -                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | 
>> POWERPC_FLAG_SCV;
>> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | 
>> POWERPC_FLAG_SCV |
>> +                 POWERPC_FLAG_BHRB;
>>      pcc->l1_dcache_size = 0x8000;
>>      pcc->l1_icache_size = 0x8000;
>>  }
>> @@ -7178,6 +7211,8 @@ static void ppc_cpu_reset_hold(Object *obj)
>>          }
>>          env->spr[i] = spr->default_value;
>>      }
>> +
>> +    bhrb_reset_state(env);
>>  }
>> 
>>  #ifndef CONFIG_USER_ONLY
>> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
>> index 4ff054063d..68b27196d9 100644
>> --- a/target/ppc/helper_regs.c
>> +++ b/target/ppc/helper_regs.c
>> @@ -752,3 +752,35 @@ void register_usprgh_sprs(CPUPPCState *env)
>>                   &spr_read_ureg, SPR_NOACCESS,
>>                   0x00000000);
>>  }
>> +
>> +void hreg_bhrb_filter_update(CPUPPCState *env)
>> +{
>> +    target_long ifm;
>> +
>> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
>> +        /* disable recording to BHRB */
>> +        env->bhrb_filter = BHRB_TYPE_NORECORD;
>> +        return;
>> +    }
>> +
>> +    ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> 
>> MMCRA_IFM_SHIFT;
>> +    switch (ifm) {
>> +    case 0:
>> +        /* record all branches */
>> +        env->bhrb_filter = -1;
>> +        break;
>> +    case 1:
>> +        /* only record calls (LK = 1) */
>> +        env->bhrb_filter = BHRB_TYPE_CALL;
>> +        break;
>> +    case 2:
>> +        /* only record indirect branches */
>> +        env->bhrb_filter = BHRB_TYPE_INDIRECT;
>> +        break;
>> +    case 3:
>> +        /* only record conditional branches */
>> +        env->bhrb_filter = BHRB_TYPE_COND;
>> +        break;
>> +    }
>> +}
>> +
>> diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
>> index 8196c1346d..ab6aba1c24 100644
>> --- a/target/ppc/helper_regs.h
>> +++ b/target/ppc/helper_regs.h
>> @@ -25,6 +25,7 @@ void hreg_compute_hflags(CPUPPCState *env);
>>  void hreg_update_pmu_hflags(CPUPPCState *env);
>>  void cpu_interrupt_exittb(CPUState *cs);
>>  int hreg_store_msr(CPUPPCState *env, target_ulong value, int 
>> alter_hv);
>> +void hreg_bhrb_filter_update(CPUPPCState *env);
>> 
>>  #ifdef CONFIG_USER_ONLY
>>  static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
>> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
>> index 6f5d4e1256..18149a15b2 100644
>> --- a/target/ppc/power8-pmu.c
>> +++ b/target/ppc/power8-pmu.c
>> @@ -261,6 +261,7 @@ void helper_store_mmcr0(CPUPPCState *env, 
>> target_ulong value)
>>      env->spr[SPR_POWER_MMCR0] = value;
>> 
>>      pmu_mmcr01a_updated(env);
>> +    hreg_bhrb_filter_update(env);
>> 
>>      /* Update cycle overflow timers with the current MMCR0 state */
>>      pmu_update_overflow_timers(env);
>> @@ -280,6 +281,7 @@ void helper_store_mmcrA(CPUPPCState *env, uint64_t 
>> value)
>>      env->spr[SPR_POWER_MMCRA] = value;
>> 
>>      pmu_mmcr01a_updated(env);
>> +    hreg_bhrb_filter_update(env);
>>  }
>> 
> 
> Can these just be moved into pmu_mmcr01a_updated? AFAIKS they only
> depend on MMCR0/A.
> 
Done.
>>  target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
>> diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
>> index 87fa8c9334..a887094045 100644
>> --- a/target/ppc/power8-pmu.h
>> +++ b/target/ppc/power8-pmu.h
>> @@ -17,6 +17,13 @@
>> 
>>  #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
>> 
>> +#define BHRB_TYPE_NORECORD      0x00
>> +#define BHRB_TYPE_CALL          0x01
>> +#define BHRB_TYPE_INDIRECT      0x02
>> +#define BHRB_TYPE_COND          0x04
>> +#define BHRB_TYPE_OTHER         0x08
>> +#define BHRB_TYPE_XL_FORM       0x10
>> +
>>  void cpu_ppc_pmu_init(CPUPPCState *env);
>>  void pmu_mmcr01a_updated(CPUPPCState *env);
>>  #else
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index d93fbd4574..7824475f54 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -177,6 +177,7 @@ struct DisasContext {
>>  #if defined(TARGET_PPC64)
>>      bool sf_mode;
>>      bool has_cfar;
>> +    bool has_bhrb;
>>  #endif
>>      bool fpu_enabled;
>>      bool altivec_enabled;
>> @@ -4104,12 +4105,100 @@ static void gen_rvwinkle(DisasContext *ctx)
>>  }
>>  #endif /* #if defined(TARGET_PPC64) */
>> 
>> -static inline void gen_update_cfar(DisasContext *ctx, target_ulong 
>> nip)
>> +
>> +static inline void gen_update_branch_history(DisasContext *ctx,
>> +                                             target_ulong nip,
>> +                                             TCGv target,
>> +                                             target_long inst_type)
>>  {
>>  #if defined(TARGET_PPC64)
>> +    TCGv t0;
>> +    TCGv t1;
>> +    TCGv t2;
>> +    TCGv t3;
>> +    TCGLabel *no_update;
>> +
>>      if (ctx->has_cfar) {
>>          tcg_gen_movi_tl(cpu_cfar, nip);
>>      }
>> +
>> +    if (!ctx->has_bhrb || inst_type == BHRB_TYPE_NORECORD) {
>> +        return;
>> +    }
>> +
>> +    /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
>> +    if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd || 
>> !ctx->pr)) {
>> +        return;
>> +    }
>> +
>> +    /* Check for BHRB "frozen" conditions */
>> +    if (ctx->mmcr0_fcpc) {
>> +        if (ctx->mmcr0_fcp) {
>> +            if ((ctx->hv) && (ctx->pr)) {
>> +                return;
>> +            }
>> +        } else if (!(ctx->hv) && (ctx->pr)) {
>> +            return;
>> +        }
>> +    } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
>> +        return;
>> +    }
>> +
>> +    t0 = tcg_temp_new();
>> +    t1 = tcg_temp_new();
>> +    t2 = tcg_temp_new();
>> +    t3 = tcg_temp_new();
>> +    no_update = gen_new_label();
>> +
>> +    /* check for bhrb filtering */
>> +    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_filter));
>> +    tcg_gen_andi_tl(t0, t0, inst_type);
>> +    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, no_update);
>> +
>> +    /* load bhrb base address into t2 */
>> +    tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUPPCState, bhrb_base));
>> +
>> +    /* load current bhrb_offset into t0 */
>> +    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
>> +
>> +    /* load a BHRB offset mask into t3 */
>> +    tcg_gen_ld_tl(t3, cpu_env, offsetof(CPUPPCState, 
>> bhrb_offset_mask));
>> +
>> +    /* add t2 to t0 to get address of bhrb entry */
>> +    tcg_gen_add_tl(t1, t0, t2);
>> +
>> +    /* store nip into bhrb at bhrb_offset */
>> +    tcg_gen_st_i64(tcg_constant_i64(nip), (TCGv_ptr)t1, 0);
>> +
>> +    /* add 8 to current bhrb_offset */
>> +    tcg_gen_addi_tl(t0, t0, 8);
>> +
>> +    /* apply offset mask */
>> +    tcg_gen_and_tl(t0, t0, t3);
> 
> For some cases where the value remains live for a time and not reused,
> maybe use a more descriptive name? I know existing code is pretty lazy
> with names (and I share some blame). t0,t2,t3 could be base, offset,
> mask respectively, and t1 could be tmp that is used for filter and
> address generation. Just a suggestion?
> 
Yeah, I agree.  That is normally what I would have done, but changed
my mind after seeing how everyone else seemed to be doing it :)
Changing for readability.
> 
>> +
>> +    if (inst_type & BHRB_TYPE_XL_FORM) {
>> +        /* Also record the target address for XL-Form branches */
>> +
>> +        /* add t2 to t0 to get address of bhrb entry */
>> +        tcg_gen_add_tl(t1, t0, t2);
>> +
>> +        /* Set the 'T' bit for target entries */
>> +        tcg_gen_ori_tl(t2, target, 0x2);
>> +
>> +        /* Store target address in bhrb */
>> +        tcg_gen_st_tl(t2, (TCGv_ptr)t1, 0);
>> +
>> +        /* add 8 to current bhrb_offset */
>> +        tcg_gen_addi_tl(t0, t0, 8);
>> +
>> +        /* apply offset mask */
>> +        tcg_gen_and_tl(t0, t0, t3);
>> +    }
> 
> I would say this is bordering on warranting a new function since it
> mostly duplicates previous block. You could have it take base, mask,
> offset and value to store, and have it return new offset.
> 
I agree.  Will make the changes.
>> +
>> +    /* save updated bhrb_offset for next time */
>> +    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
>> +
>> +    gen_set_label(no_update);
>>  #endif
>>  }
> 
> It all looks pretty good otherwise. I do worry about POWER8/9 which
> do not have BHRB disable bit. How much do they slow down I wonder?
That is a good question!  I'll see if I can get some linux boot times
with and without the changes on P9.
> 
> Thanks,
> Nick
Thanks for taking the time to review my code!
Glenn
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB
  2023-09-19 21:35     ` Glenn Miles
@ 2023-09-21 15:45       ` Glenn Miles
  0 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-21 15:45 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: qemu-devel, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, open list:PowerPC TCG CPUs
>> It all looks pretty good otherwise. I do worry about POWER8/9 which
>> do not have BHRB disable bit. How much do they slow down I wonder?
> 
> That is a good question!  I'll see if I can get some linux boot times
> with and without the changes on P9.
> 
I ran some tests booting ubuntu 20.04 on the powrnv9 model and found
that it took about 5% longer to boot to the login prompt with the
changes than without.  As discussed offline, I'll remove this support
from P8/P9 as the 5% reduction in performance is not worth the added
functionality.  We could perhaps add it back in the future with a
command line option that has it disabled by default on P8/P9.
P10 and on has it disabled for non problem state branches, so we
should be good there.
Thanks,
Glenn
^ permalink raw reply	[flat|nested] 18+ messages in thread 
 
 
 
- * [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
  2023-09-12 20:23 ` [PATCH 1/4] target/ppc: Add new hflags to support BHRB Glenn Miles
  2023-09-12 20:24 ` [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB Glenn Miles
@ 2023-09-12 20:24 ` Glenn Miles
  2023-09-15  1:13   ` Nicholas Piggin
  2023-09-12 20:25 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 20:24 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
Add support for the clrbhrb and mfbhrbe instructions.
Since neither instruction is believed to be critical to
performance, both instructions were implemented using helper
functions.
Access to both instructions is controlled by bits in the
HFSCR (for privileged state) and MMCR0 (for problem state).
A new function, helper_mmcr0_facility_check, was added for
checking MMCR0[BHRBA] and raising a facility_unavailable exception
if required.
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/cpu.h         |  1 +
 target/ppc/helper.h      |  4 ++++
 target/ppc/misc_helper.c | 43 ++++++++++++++++++++++++++++++++++++++++
 target/ppc/translate.c   | 13 ++++++++++++
 4 files changed, 61 insertions(+)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bda1afb700..ee81ede4ee 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -541,6 +541,7 @@ FIELD(MSR, LE, MSR_LE, 1)
 
 /* HFSCR bits */
 #define HFSCR_MSGP     PPC_BIT(53) /* Privileged Message Send Facilities */
+#define HFSCR_BHRB     PPC_BIT(59) /* BHRB Instructions */
 #define HFSCR_IC_MSGP  0xA
 
 #define DBCR0_ICMP (1 << 27)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 1a3d9a7e57..bbc32ff114 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -816,3 +816,7 @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
 
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
+
+DEF_HELPER_1(clrbhrb, void, env)
+DEF_HELPER_FLAGS_2(mfbhrbe, TCG_CALL_NO_WG, i64, env, i32)
+
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 692d058665..45abe04f66 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -139,6 +139,17 @@ void helper_fscr_facility_check(CPUPPCState *env, uint32_t bit,
 #endif
 }
 
+static void helper_mmcr0_facility_check(CPUPPCState *env, uint32_t bit,
+                                 uint32_t sprn, uint32_t cause)
+{
+#ifdef TARGET_PPC64
+    if (FIELD_EX64(env->msr, MSR, PR) &&
+        !(env->spr[SPR_POWER_MMCR0] & (1ULL << bit))) {
+        raise_fu_exception(env, bit, sprn, cause, GETPC());
+    }
+#endif
+}
+
 void helper_msr_facility_check(CPUPPCState *env, uint32_t bit,
                                uint32_t sprn, uint32_t cause)
 {
@@ -351,3 +362,35 @@ void helper_fixup_thrm(CPUPPCState *env)
         env->spr[i] = v;
     }
 }
+
+void helper_clrbhrb(CPUPPCState *env)
+{
+    helper_hfscr_facility_check(env, HFSCR_BHRB, "clrbhrb", FSCR_IC_BHRB);
+
+    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
+
+    memset(env->bhrb, 0, sizeof(env->bhrb));
+}
+
+uint64_t helper_mfbhrbe(CPUPPCState *env, uint32_t bhrbe)
+{
+    unsigned int index;
+
+    helper_hfscr_facility_check(env, HFSCR_BHRB, "mfbhrbe", FSCR_IC_BHRB);
+
+    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
+
+    if ((bhrbe >= env->bhrb_num_entries) ||
+       (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
+        return 0;
+    }
+
+    /*
+     * Note: bhrb_offset is the byte offset for writing the
+     * next entry (over the oldest entry), which is why we
+     * must offset bhrbe by 1 to get to the 0th entry.
+     */
+    index = ((env->bhrb_offset / sizeof(uint64_t)) - (bhrbe + 1)) %
+            env->bhrb_num_entries;
+    return env->bhrb[index];
+}
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 7824475f54..b330871793 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6549,12 +6549,25 @@ static void gen_brh(DisasContext *ctx)
 }
 #endif
 
+static void gen_clrbhrb(DisasContext *ctx)
+{
+    gen_helper_clrbhrb(cpu_env);
+}
+
+static void gen_mfbhrbe(DisasContext *ctx)
+{
+    TCGv_i32 bhrbe = tcg_constant_i32(_SPR(ctx->opcode));
+    gen_helper_mfbhrbe(cpu_gpr[rD(ctx->opcode)], cpu_env, bhrbe);
+}
+
 static opcode_t opcodes[] = {
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
 #endif
+GEN_HANDLER_E(clrbhrb, 0x1F, 0x0E, 0x0D, 0x3FFF801, PPC_NONE, PPC2_ISA207S),
+GEN_HANDLER_E(mfbhrbe, 0x1F, 0x0E, 0x09, 0x0000001, PPC_NONE, PPC2_ISA207S),
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * Re: [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions
  2023-09-12 20:24 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
@ 2023-09-15  1:13   ` Nicholas Piggin
  2023-09-19 21:40     ` Glenn Miles
  0 siblings, 1 reply; 18+ messages in thread
From: Nicholas Piggin @ 2023-09-15  1:13 UTC (permalink / raw)
  To: Glenn Miles, qemu-devel
  Cc: Daniel Henrique Barboza, Cédric Le Goater, David Gibson,
	Greg Kurz, open list:PowerPC TCG CPUs
On Wed Sep 13, 2023 at 6:24 AM AEST, Glenn Miles wrote:
> Add support for the clrbhrb and mfbhrbe instructions.
>
> Since neither instruction is believed to be critical to
> performance, both instructions were implemented using helper
> functions.
>
> Access to both instructions is controlled by bits in the
> HFSCR (for privileged state) and MMCR0 (for problem state).
> A new function, helper_mmcr0_facility_check, was added for
> checking MMCR0[BHRBA] and raising a facility_unavailable exception
> if required.
>
> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
> ---
>  target/ppc/cpu.h         |  1 +
>  target/ppc/helper.h      |  4 ++++
>  target/ppc/misc_helper.c | 43 ++++++++++++++++++++++++++++++++++++++++
>  target/ppc/translate.c   | 13 ++++++++++++
>  4 files changed, 61 insertions(+)
>
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index bda1afb700..ee81ede4ee 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -541,6 +541,7 @@ FIELD(MSR, LE, MSR_LE, 1)
>  
>  /* HFSCR bits */
>  #define HFSCR_MSGP     PPC_BIT(53) /* Privileged Message Send Facilities */
> +#define HFSCR_BHRB     PPC_BIT(59) /* BHRB Instructions */
>  #define HFSCR_IC_MSGP  0xA
>  
>  #define DBCR0_ICMP (1 << 27)
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index 1a3d9a7e57..bbc32ff114 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -816,3 +816,7 @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
>  
>  DEF_HELPER_1(tbegin, void, env)
>  DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
> +
> +DEF_HELPER_1(clrbhrb, void, env)
> +DEF_HELPER_FLAGS_2(mfbhrbe, TCG_CALL_NO_WG, i64, env, i32)
> +
> diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
> index 692d058665..45abe04f66 100644
> --- a/target/ppc/misc_helper.c
> +++ b/target/ppc/misc_helper.c
> @@ -139,6 +139,17 @@ void helper_fscr_facility_check(CPUPPCState *env, uint32_t bit,
>  #endif
>  }
>  
> +static void helper_mmcr0_facility_check(CPUPPCState *env, uint32_t bit,
> +                                 uint32_t sprn, uint32_t cause)
> +{
> +#ifdef TARGET_PPC64
> +    if (FIELD_EX64(env->msr, MSR, PR) &&
> +        !(env->spr[SPR_POWER_MMCR0] & (1ULL << bit))) {
> +        raise_fu_exception(env, bit, sprn, cause, GETPC());
> +    }
> +#endif
> +}
> +
>  void helper_msr_facility_check(CPUPPCState *env, uint32_t bit,
>                                 uint32_t sprn, uint32_t cause)
>  {
> @@ -351,3 +362,35 @@ void helper_fixup_thrm(CPUPPCState *env)
>          env->spr[i] = v;
>      }
>  }
> +
> +void helper_clrbhrb(CPUPPCState *env)
> +{
> +    helper_hfscr_facility_check(env, HFSCR_BHRB, "clrbhrb", FSCR_IC_BHRB);
> +
> +    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
Repeating the comment about MMCR0_BHRBA and PPC_BIT_NR discrepancy here
for posterity.
> +
> +    memset(env->bhrb, 0, sizeof(env->bhrb));
> +}
> +
> +uint64_t helper_mfbhrbe(CPUPPCState *env, uint32_t bhrbe)
> +{
> +    unsigned int index;
> +
> +    helper_hfscr_facility_check(env, HFSCR_BHRB, "mfbhrbe", FSCR_IC_BHRB);
> +
> +    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
> +
> +    if ((bhrbe >= env->bhrb_num_entries) ||
> +       (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
Nitpick, but multi line statment starts again inside the first
parenthesis after a keyword like this.
> +        return 0;
> +    }
> +
> +    /*
> +     * Note: bhrb_offset is the byte offset for writing the
> +     * next entry (over the oldest entry), which is why we
> +     * must offset bhrbe by 1 to get to the 0th entry.
> +     */
> +    index = ((env->bhrb_offset / sizeof(uint64_t)) - (bhrbe + 1)) %
> +            env->bhrb_num_entries;
> +    return env->bhrb[index];
> +}
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 7824475f54..b330871793 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -6549,12 +6549,25 @@ static void gen_brh(DisasContext *ctx)
>  }
>  #endif
>  
> +static void gen_clrbhrb(DisasContext *ctx)
> +{
> +    gen_helper_clrbhrb(cpu_env);
> +}
> +
> +static void gen_mfbhrbe(DisasContext *ctx)
> +{
> +    TCGv_i32 bhrbe = tcg_constant_i32(_SPR(ctx->opcode));
> +    gen_helper_mfbhrbe(cpu_gpr[rD(ctx->opcode)], cpu_env, bhrbe);
> +}
> +
>  static opcode_t opcodes[] = {
>  #if defined(TARGET_PPC64)
>  GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA310),
>  GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
>  GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
>  #endif
> +GEN_HANDLER_E(clrbhrb, 0x1F, 0x0E, 0x0D, 0x3FFF801, PPC_NONE, PPC2_ISA207S),
> +GEN_HANDLER_E(mfbhrbe, 0x1F, 0x0E, 0x09, 0x0000001, PPC_NONE, PPC2_ISA207S),
How much of a pain would it be to add it as decodetree? If there is an
addition a family of existing instrutions here it makes sense to add it
here, for new family would be nice to use decodetree.
I think they're only supported in 64-bit ISA so it could be ifdef
TARGET_PPC64.
Thanks,
Nick
>  GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
>  #if defined(TARGET_PPC64)
>  GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions
  2023-09-15  1:13   ` Nicholas Piggin
@ 2023-09-19 21:40     ` Glenn Miles
  0 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-19 21:40 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: qemu-devel, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, open list:PowerPC TCG CPUs
On 2023-09-14 20:13, Nicholas Piggin wrote:
> On Wed Sep 13, 2023 at 6:24 AM AEST, Glenn Miles wrote:
>> Add support for the clrbhrb and mfbhrbe instructions.
>> 
>> Since neither instruction is believed to be critical to
>> performance, both instructions were implemented using helper
>> functions.
>> 
>> Access to both instructions is controlled by bits in the
>> HFSCR (for privileged state) and MMCR0 (for problem state).
>> A new function, helper_mmcr0_facility_check, was added for
>> checking MMCR0[BHRBA] and raising a facility_unavailable exception
>> if required.
>> 
>> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
>> ---
>>  target/ppc/cpu.h         |  1 +
>>  target/ppc/helper.h      |  4 ++++
>>  target/ppc/misc_helper.c | 43 
>> ++++++++++++++++++++++++++++++++++++++++
>>  target/ppc/translate.c   | 13 ++++++++++++
>>  4 files changed, 61 insertions(+)
>> 
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index bda1afb700..ee81ede4ee 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -541,6 +541,7 @@ FIELD(MSR, LE, MSR_LE, 1)
>> 
>>  /* HFSCR bits */
>>  #define HFSCR_MSGP     PPC_BIT(53) /* Privileged Message Send 
>> Facilities */
>> +#define HFSCR_BHRB     PPC_BIT(59) /* BHRB Instructions */
>>  #define HFSCR_IC_MSGP  0xA
>> 
>>  #define DBCR0_ICMP (1 << 27)
>> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>> index 1a3d9a7e57..bbc32ff114 100644
>> --- a/target/ppc/helper.h
>> +++ b/target/ppc/helper.h
>> @@ -816,3 +816,7 @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
>> 
>>  DEF_HELPER_1(tbegin, void, env)
>>  DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
>> +
>> +DEF_HELPER_1(clrbhrb, void, env)
>> +DEF_HELPER_FLAGS_2(mfbhrbe, TCG_CALL_NO_WG, i64, env, i32)
>> +
>> diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
>> index 692d058665..45abe04f66 100644
>> --- a/target/ppc/misc_helper.c
>> +++ b/target/ppc/misc_helper.c
>> @@ -139,6 +139,17 @@ void helper_fscr_facility_check(CPUPPCState *env, 
>> uint32_t bit,
>>  #endif
>>  }
>> 
>> +static void helper_mmcr0_facility_check(CPUPPCState *env, uint32_t 
>> bit,
>> +                                 uint32_t sprn, uint32_t cause)
>> +{
>> +#ifdef TARGET_PPC64
>> +    if (FIELD_EX64(env->msr, MSR, PR) &&
>> +        !(env->spr[SPR_POWER_MMCR0] & (1ULL << bit))) {
>> +        raise_fu_exception(env, bit, sprn, cause, GETPC());
>> +    }
>> +#endif
>> +}
>> +
>>  void helper_msr_facility_check(CPUPPCState *env, uint32_t bit,
>>                                 uint32_t sprn, uint32_t cause)
>>  {
>> @@ -351,3 +362,35 @@ void helper_fixup_thrm(CPUPPCState *env)
>>          env->spr[i] = v;
>>      }
>>  }
>> +
>> +void helper_clrbhrb(CPUPPCState *env)
>> +{
>> +    helper_hfscr_facility_check(env, HFSCR_BHRB, "clrbhrb", 
>> FSCR_IC_BHRB);
>> +
>> +    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
> 
> Repeating the comment about MMCR0_BHRBA and PPC_BIT_NR discrepancy here
> for posterity.
> 
Added NR suffix.
>> +
>> +    memset(env->bhrb, 0, sizeof(env->bhrb));
>> +}
>> +
>> +uint64_t helper_mfbhrbe(CPUPPCState *env, uint32_t bhrbe)
>> +{
>> +    unsigned int index;
>> +
>> +    helper_hfscr_facility_check(env, HFSCR_BHRB, "mfbhrbe", 
>> FSCR_IC_BHRB);
>> +
>> +    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
>> +
>> +    if ((bhrbe >= env->bhrb_num_entries) ||
>> +       (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
> 
> Nitpick, but multi line statment starts again inside the first
> parenthesis after a keyword like this.
> 
Fixed.
>> +        return 0;
>> +    }
>> +
>> +    /*
>> +     * Note: bhrb_offset is the byte offset for writing the
>> +     * next entry (over the oldest entry), which is why we
>> +     * must offset bhrbe by 1 to get to the 0th entry.
>> +     */
>> +    index = ((env->bhrb_offset / sizeof(uint64_t)) - (bhrbe + 1)) %
>> +            env->bhrb_num_entries;
>> +    return env->bhrb[index];
>> +}
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index 7824475f54..b330871793 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -6549,12 +6549,25 @@ static void gen_brh(DisasContext *ctx)
>>  }
>>  #endif
>> 
>> +static void gen_clrbhrb(DisasContext *ctx)
>> +{
>> +    gen_helper_clrbhrb(cpu_env);
>> +}
>> +
>> +static void gen_mfbhrbe(DisasContext *ctx)
>> +{
>> +    TCGv_i32 bhrbe = tcg_constant_i32(_SPR(ctx->opcode));
>> +    gen_helper_mfbhrbe(cpu_gpr[rD(ctx->opcode)], cpu_env, bhrbe);
>> +}
>> +
>>  static opcode_t opcodes[] = {
>>  #if defined(TARGET_PPC64)
>>  GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, 
>> PPC2_ISA310),
>>  GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, 
>> PPC2_ISA310),
>>  GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, 
>> PPC2_ISA310),
>>  #endif
>> +GEN_HANDLER_E(clrbhrb, 0x1F, 0x0E, 0x0D, 0x3FFF801, PPC_NONE, 
>> PPC2_ISA207S),
>> +GEN_HANDLER_E(mfbhrbe, 0x1F, 0x0E, 0x09, 0x0000001, PPC_NONE, 
>> PPC2_ISA207S),
> 
> How much of a pain would it be to add it as decodetree? If there is an
> addition a family of existing instrutions here it makes sense to add it
> here, for new family would be nice to use decodetree.
> 
> I think they're only supported in 64-bit ISA so it could be ifdef
> TARGET_PPC64.
> 
Ok, switched to using decodetree.
> Thanks,
> Nick
> 
Thanks for the review!
Glenn
>>  GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
>>  #if defined(TARGET_PPC64)
>>  GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, 
>> PPC2_ISA300),
^ permalink raw reply	[flat|nested] 18+ messages in thread
 
 
- * [PATCH 4/4] target/ppc: Add migration support for BHRB
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
                   ` (2 preceding siblings ...)
  2023-09-12 20:24 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
@ 2023-09-12 20:25 ` Glenn Miles
  2023-09-15  1:20   ` Nicholas Piggin
  2023-09-12 20:57 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 20:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
Adds migration support for Branch History Rolling
Buffer (BHRB) internal state.
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/machine.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index b195fb4dc8..89146969c8 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -314,6 +314,7 @@ static int cpu_post_load(void *opaque, int version_id)
 
     if (tcg_enabled()) {
         pmu_mmcr01a_updated(env);
+        hreg_bhrb_filter_update(env);
     }
 
     return 0;
@@ -670,6 +671,27 @@ static const VMStateDescription vmstate_compat = {
     }
 };
 
+#ifdef TARGET_PPC64
+static bool bhrb_needed(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    return (cpu->env.flags & POWERPC_FLAG_BHRB) != 0;
+}
+
+static const VMStateDescription vmstate_bhrb = {
+    .name = "cpu/bhrb",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = bhrb_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL(env.bhrb_num_entries, PowerPCCPU),
+        VMSTATE_UINTTL(env.bhrb_offset, PowerPCCPU),
+        VMSTATE_UINT64_ARRAY(env.bhrb, PowerPCCPU, BHRB_MAX_NUM_ENTRIES),
+        VMSTATE_END_OF_LIST()
+    }
+};
+#endif
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -716,6 +738,7 @@ const VMStateDescription vmstate_ppc_cpu = {
 #ifdef TARGET_PPC64
         &vmstate_tm,
         &vmstate_slb,
+        &vmstate_bhrb,
 #endif /* TARGET_PPC64 */
         &vmstate_tlb6xx,
         &vmstate_tlbemb,
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * Re: [PATCH 4/4] target/ppc: Add migration support for BHRB
  2023-09-12 20:25 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
@ 2023-09-15  1:20   ` Nicholas Piggin
  2023-09-19 22:01     ` Glenn Miles
  0 siblings, 1 reply; 18+ messages in thread
From: Nicholas Piggin @ 2023-09-15  1:20 UTC (permalink / raw)
  To: Glenn Miles, qemu-devel
  Cc: Daniel Henrique Barboza, Cédric Le Goater, David Gibson,
	Greg Kurz, open list:PowerPC TCG CPUs
On Wed Sep 13, 2023 at 6:25 AM AEST, Glenn Miles wrote:
> Adds migration support for Branch History Rolling
> Buffer (BHRB) internal state.
>
> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
> ---
>  target/ppc/machine.c | 23 +++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index b195fb4dc8..89146969c8 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -314,6 +314,7 @@ static int cpu_post_load(void *opaque, int version_id)
>  
>      if (tcg_enabled()) {
>          pmu_mmcr01a_updated(env);
> +        hreg_bhrb_filter_update(env);
>      }
>  
>      return 0;
> @@ -670,6 +671,27 @@ static const VMStateDescription vmstate_compat = {
>      }
>  };
>  
> +#ifdef TARGET_PPC64
> +static bool bhrb_needed(void *opaque)
> +{
> +    PowerPCCPU *cpu = opaque;
> +    return (cpu->env.flags & POWERPC_FLAG_BHRB) != 0;
> +}
> +
> +static const VMStateDescription vmstate_bhrb = {
> +    .name = "cpu/bhrb",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = bhrb_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINTTL(env.bhrb_num_entries, PowerPCCPU),
Maybe don't need bhrb_num_entries since target machine should have the
same?
> +        VMSTATE_UINTTL(env.bhrb_offset, PowerPCCPU),
> +        VMSTATE_UINT64_ARRAY(env.bhrb, PowerPCCPU, BHRB_MAX_NUM_ENTRIES),
Is it possible to migrate only bhrb_num_entries items? Wants a VARRAY
AFAIKS but there is no VARRAY_UINT64?
Since all sizes are the same 32 now, would it be possible to turn it
into a VARRAY sometime later if supposing a new CPU changed to a
different size, and would the wire format for the VARRAY still be
compatible with this fixed size array, or does a VARRAY look different
I wonder?
Thanks,
Nick
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [PATCH 4/4] target/ppc: Add migration support for BHRB
  2023-09-15  1:20   ` Nicholas Piggin
@ 2023-09-19 22:01     ` Glenn Miles
  0 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-19 22:01 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: qemu-devel, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, open list:PowerPC TCG CPUs
On 2023-09-14 20:20, Nicholas Piggin wrote:
> On Wed Sep 13, 2023 at 6:25 AM AEST, Glenn Miles wrote:
>> Adds migration support for Branch History Rolling
>> Buffer (BHRB) internal state.
>> 
>> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
>> ---
>>  target/ppc/machine.c | 23 +++++++++++++++++++++++
>>  1 file changed, 23 insertions(+)
>> 
>> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
>> index b195fb4dc8..89146969c8 100644
>> --- a/target/ppc/machine.c
>> +++ b/target/ppc/machine.c
>> @@ -314,6 +314,7 @@ static int cpu_post_load(void *opaque, int 
>> version_id)
>> 
>>      if (tcg_enabled()) {
>>          pmu_mmcr01a_updated(env);
>> +        hreg_bhrb_filter_update(env);
>>      }
>> 
>>      return 0;
>> @@ -670,6 +671,27 @@ static const VMStateDescription vmstate_compat = 
>> {
>>      }
>>  };
>> 
>> +#ifdef TARGET_PPC64
>> +static bool bhrb_needed(void *opaque)
>> +{
>> +    PowerPCCPU *cpu = opaque;
>> +    return (cpu->env.flags & POWERPC_FLAG_BHRB) != 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_bhrb = {
>> +    .name = "cpu/bhrb",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .needed = bhrb_needed,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINTTL(env.bhrb_num_entries, PowerPCCPU),
> 
> Maybe don't need bhrb_num_entries since target machine should have the
> same?
> 
Removed.
>> +        VMSTATE_UINTTL(env.bhrb_offset, PowerPCCPU),
>> +        VMSTATE_UINT64_ARRAY(env.bhrb, PowerPCCPU, 
>> BHRB_MAX_NUM_ENTRIES),
> 
> Is it possible to migrate only bhrb_num_entries items? Wants a VARRAY
> AFAIKS but there is no VARRAY_UINT64?
> 
> Since all sizes are the same 32 now, would it be possible to turn it
> into a VARRAY sometime later if supposing a new CPU changed to a
> different size, and would the wire format for the VARRAY still be
> compatible with this fixed size array, or does a VARRAY look different
> I wonder?
> 
I looked into this some more.  It turns out that the UINT32 in 
VARRAY_UINT32
is referring to the size of the field that holds the number of entries 
in
the array, not the size of the array elements.  So, it is possible to do 
this
with the VARRAY_UINT32 type.  I would need to change the type for 
bhrb_num_entries
to a uint32_t and also, since VARRAY_UINT32 requires the array field to 
be a
pointer to an array, I would need to store the address of the array in 
another
field.
> Thanks,
> Nick
Thank you for taking the time to review my code!
Glenn
^ permalink raw reply	[flat|nested] 18+ messages in thread
 
 
- * [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
                   ` (3 preceding siblings ...)
  2023-09-12 20:25 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
@ 2023-09-12 20:57 ` Glenn Miles
  2023-09-12 21:05 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
  2023-09-12 21:27 ` Glenn Miles
  6 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 20:57 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
Add support for the clrbhrb and mfbhrbe instructions.
Since neither instruction is believed to be critical to
performance, both instructions were implemented using helper
functions.
Access to both instructions is controlled by bits in the
HFSCR (for privileged state) and MMCR0 (for problem state).
A new function, helper_mmcr0_facility_check, was added for
checking MMCR0[BHRBA] and raising a facility_unavailable exception
if required.
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/cpu.h         |  1 +
 target/ppc/helper.h      |  4 ++++
 target/ppc/misc_helper.c | 43 ++++++++++++++++++++++++++++++++++++++++
 target/ppc/translate.c   | 13 ++++++++++++
 4 files changed, 61 insertions(+)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bda1afb700..ee81ede4ee 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -541,6 +541,7 @@ FIELD(MSR, LE, MSR_LE, 1)
 
 /* HFSCR bits */
 #define HFSCR_MSGP     PPC_BIT(53) /* Privileged Message Send Facilities */
+#define HFSCR_BHRB     PPC_BIT(59) /* BHRB Instructions */
 #define HFSCR_IC_MSGP  0xA
 
 #define DBCR0_ICMP (1 << 27)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 1a3d9a7e57..bbc32ff114 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -816,3 +816,7 @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
 
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
+
+DEF_HELPER_1(clrbhrb, void, env)
+DEF_HELPER_FLAGS_2(mfbhrbe, TCG_CALL_NO_WG, i64, env, i32)
+
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 692d058665..45abe04f66 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -139,6 +139,17 @@ void helper_fscr_facility_check(CPUPPCState *env, uint32_t bit,
 #endif
 }
 
+static void helper_mmcr0_facility_check(CPUPPCState *env, uint32_t bit,
+                                 uint32_t sprn, uint32_t cause)
+{
+#ifdef TARGET_PPC64
+    if (FIELD_EX64(env->msr, MSR, PR) &&
+        !(env->spr[SPR_POWER_MMCR0] & (1ULL << bit))) {
+        raise_fu_exception(env, bit, sprn, cause, GETPC());
+    }
+#endif
+}
+
 void helper_msr_facility_check(CPUPPCState *env, uint32_t bit,
                                uint32_t sprn, uint32_t cause)
 {
@@ -351,3 +362,35 @@ void helper_fixup_thrm(CPUPPCState *env)
         env->spr[i] = v;
     }
 }
+
+void helper_clrbhrb(CPUPPCState *env)
+{
+    helper_hfscr_facility_check(env, HFSCR_BHRB, "clrbhrb", FSCR_IC_BHRB);
+
+    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
+
+    memset(env->bhrb, 0, sizeof(env->bhrb));
+}
+
+uint64_t helper_mfbhrbe(CPUPPCState *env, uint32_t bhrbe)
+{
+    unsigned int index;
+
+    helper_hfscr_facility_check(env, HFSCR_BHRB, "mfbhrbe", FSCR_IC_BHRB);
+
+    helper_mmcr0_facility_check(env, MMCR0_BHRBA, 0, FSCR_IC_BHRB);
+
+    if ((bhrbe >= env->bhrb_num_entries) ||
+       (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
+        return 0;
+    }
+
+    /*
+     * Note: bhrb_offset is the byte offset for writing the
+     * next entry (over the oldest entry), which is why we
+     * must offset bhrbe by 1 to get to the 0th entry.
+     */
+    index = ((env->bhrb_offset / sizeof(uint64_t)) - (bhrbe + 1)) %
+            env->bhrb_num_entries;
+    return env->bhrb[index];
+}
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 7824475f54..b330871793 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6549,12 +6549,25 @@ static void gen_brh(DisasContext *ctx)
 }
 #endif
 
+static void gen_clrbhrb(DisasContext *ctx)
+{
+    gen_helper_clrbhrb(cpu_env);
+}
+
+static void gen_mfbhrbe(DisasContext *ctx)
+{
+    TCGv_i32 bhrbe = tcg_constant_i32(_SPR(ctx->opcode));
+    gen_helper_mfbhrbe(cpu_gpr[rD(ctx->opcode)], cpu_env, bhrbe);
+}
+
 static opcode_t opcodes[] = {
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
 #endif
+GEN_HANDLER_E(clrbhrb, 0x1F, 0x0E, 0x0D, 0x3FFF801, PPC_NONE, PPC2_ISA207S),
+GEN_HANDLER_E(mfbhrbe, 0x1F, 0x0E, 0x09, 0x0000001, PPC_NONE, PPC2_ISA207S),
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [PATCH 4/4] target/ppc: Add migration support for BHRB
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
                   ` (4 preceding siblings ...)
  2023-09-12 20:57 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
@ 2023-09-12 21:05 ` Glenn Miles
  2023-09-12 21:27 ` Glenn Miles
  6 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 21:05 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
Adds migration support for Branch History Rolling
Buffer (BHRB) internal state.
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/machine.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index b195fb4dc8..89146969c8 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -314,6 +314,7 @@ static int cpu_post_load(void *opaque, int version_id)
 
     if (tcg_enabled()) {
         pmu_mmcr01a_updated(env);
+        hreg_bhrb_filter_update(env);
     }
 
     return 0;
@@ -670,6 +671,27 @@ static const VMStateDescription vmstate_compat = {
     }
 };
 
+#ifdef TARGET_PPC64
+static bool bhrb_needed(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    return (cpu->env.flags & POWERPC_FLAG_BHRB) != 0;
+}
+
+static const VMStateDescription vmstate_bhrb = {
+    .name = "cpu/bhrb",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = bhrb_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL(env.bhrb_num_entries, PowerPCCPU),
+        VMSTATE_UINTTL(env.bhrb_offset, PowerPCCPU),
+        VMSTATE_UINT64_ARRAY(env.bhrb, PowerPCCPU, BHRB_MAX_NUM_ENTRIES),
+        VMSTATE_END_OF_LIST()
+    }
+};
+#endif
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -716,6 +738,7 @@ const VMStateDescription vmstate_ppc_cpu = {
 #ifdef TARGET_PPC64
         &vmstate_tm,
         &vmstate_slb,
+        &vmstate_bhrb,
 #endif /* TARGET_PPC64 */
         &vmstate_tlb6xx,
         &vmstate_tlbemb,
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [PATCH 4/4] target/ppc: Add migration support for BHRB
  2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
                   ` (5 preceding siblings ...)
  2023-09-12 21:05 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
@ 2023-09-12 21:27 ` Glenn Miles
  6 siblings, 0 replies; 18+ messages in thread
From: Glenn Miles @ 2023-09-12 21:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Glenn Miles, Daniel Henrique Barboza, Cédric Le Goater,
	David Gibson, Greg Kurz, Nicholas Piggin,
	open list:PowerPC TCG CPUs
Adds migration support for Branch History Rolling
Buffer (BHRB) internal state.
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
---
 target/ppc/machine.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index b195fb4dc8..89146969c8 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -314,6 +314,7 @@ static int cpu_post_load(void *opaque, int version_id)
 
     if (tcg_enabled()) {
         pmu_mmcr01a_updated(env);
+        hreg_bhrb_filter_update(env);
     }
 
     return 0;
@@ -670,6 +671,27 @@ static const VMStateDescription vmstate_compat = {
     }
 };
 
+#ifdef TARGET_PPC64
+static bool bhrb_needed(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    return (cpu->env.flags & POWERPC_FLAG_BHRB) != 0;
+}
+
+static const VMStateDescription vmstate_bhrb = {
+    .name = "cpu/bhrb",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = bhrb_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL(env.bhrb_num_entries, PowerPCCPU),
+        VMSTATE_UINTTL(env.bhrb_offset, PowerPCCPU),
+        VMSTATE_UINT64_ARRAY(env.bhrb, PowerPCCPU, BHRB_MAX_NUM_ENTRIES),
+        VMSTATE_END_OF_LIST()
+    }
+};
+#endif
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -716,6 +738,7 @@ const VMStateDescription vmstate_ppc_cpu = {
 #ifdef TARGET_PPC64
         &vmstate_tm,
         &vmstate_slb,
+        &vmstate_bhrb,
 #endif /* TARGET_PPC64 */
         &vmstate_tlb6xx,
         &vmstate_tlbemb,
-- 
2.31.1
^ permalink raw reply related	[flat|nested] 18+ messages in thread